feat(llm): add LLM_JSON_MODE to support runtimes without response_format
OpenAI-compatible runtimes differ in how they handle `response_format`:
cloud providers (OpenAI, Qwen/Dashscope, Ollama) accept
`{"type": "json_object"}`, while local runtimes like LM Studio and
llama.cpp server reject it with HTTP 400, only accepting `json_schema`
or `text`. This prevented MiroFish from running against fully-local
stacks.
Introduce `LLM_JSON_MODE` (default `json_object`) so users can opt out
of strict JSON response mode by setting `LLM_JSON_MODE=none`. The
existing prompt-based JSON + markdown-tolerant parsing already handles
the unstructured response path robustly, so `none` is viable for any
OpenAI-compatible endpoint.
Applied at all three call sites that send `response_format`:
- utils/llm_client.py (chat_json helper)
- services/oasis_profile_generator.py (persona synthesis)
- services/simulation_config_generator.py (time/event/agent config)
Documented in .env.example with guidance on when to pick each value.
This commit is contained in:
parent
fa0f6519b1
commit
af75f626fd
|
|
@ -5,6 +5,12 @@ LLM_API_KEY=your_api_key_here
|
|||
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
LLM_MODEL_NAME=qwen-plus
|
||||
|
||||
# LLM JSON output mode (optional, default: json_object)
|
||||
# json_object — OpenAI, Ollama, Qwen Cloud, Anthropic-compatible endpoints
|
||||
# none — LM Studio, llama.cpp server, or any runtime that rejects the
|
||||
# `response_format` parameter. Relies on prompt-based JSON + robust parsing.
|
||||
# LLM_JSON_MODE=json_object
|
||||
|
||||
# ===== ZEP记忆图谱配置 =====
|
||||
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
|
||||
ZEP_API_KEY=your_zep_api_key_here
|
||||
|
|
|
|||
|
|
@ -31,6 +31,10 @@ class Config:
|
|||
LLM_API_KEY = os.environ.get('LLM_API_KEY')
|
||||
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
|
||||
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
|
||||
# JSON output mode for LLM calls. Options:
|
||||
# "json_object" — OpenAI-compatible default (Ollama, Qwen Cloud, Anthropic via proxy)
|
||||
# "none" — runtimes that reject response_format (LM Studio, llama.cpp server)
|
||||
LLM_JSON_MODE = os.environ.get('LLM_JSON_MODE', 'json_object').lower()
|
||||
|
||||
# Zep配置
|
||||
ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
|
||||
|
|
|
|||
|
|
@ -527,16 +527,18 @@ class OasisProfileGenerator:
|
|||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=[
|
||||
create_kwargs = {
|
||||
"model": self.model_name,
|
||||
"messages": [
|
||||
{"role": "system", "content": self._get_system_prompt(is_individual)},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
|
||||
"temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度
|
||||
# 不设置max_tokens,让LLM自由发挥
|
||||
)
|
||||
}
|
||||
if Config.LLM_JSON_MODE == "json_object":
|
||||
create_kwargs["response_format"] = {"type": "json_object"}
|
||||
response = self.client.chat.completions.create(**create_kwargs)
|
||||
|
||||
content = response.choices[0].message.content
|
||||
|
||||
|
|
|
|||
|
|
@ -440,16 +440,18 @@ class SimulationConfigGenerator:
|
|||
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=[
|
||||
create_kwargs = {
|
||||
"model": self.model_name,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
|
||||
"temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度
|
||||
# 不设置max_tokens,让LLM自由发挥
|
||||
)
|
||||
}
|
||||
if Config.LLM_JSON_MODE == "json_object":
|
||||
create_kwargs["response_format"] = {"type": "json_object"}
|
||||
response = self.client.chat.completions.create(**create_kwargs)
|
||||
|
||||
content = response.choices[0].message.content
|
||||
finish_reason = response.choices[0].finish_reason
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class LLMClient:
|
|||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
response_format={"type": "json_object"}
|
||||
response_format={"type": "json_object"} if Config.LLM_JSON_MODE == "json_object" else None
|
||||
)
|
||||
# 清理markdown代码块标记
|
||||
cleaned_response = response.strip()
|
||||
|
|
|
|||
Loading…
Reference in New Issue