feat(llm): add LLM_JSON_MODE to support runtimes without response_format

OpenAI-compatible runtimes differ in how they handle `response_format`:
cloud providers (OpenAI, Qwen/Dashscope, Ollama) accept
`{"type": "json_object"}`, while local runtimes like LM Studio and
llama.cpp server reject it with HTTP 400, only accepting `json_schema`
or `text`. This prevented MiroFish from running against fully-local
stacks.

Introduce `LLM_JSON_MODE` (default `json_object`) so users can opt out
of strict JSON response mode by setting `LLM_JSON_MODE=none`. The
existing prompt-based JSON + markdown-tolerant parsing already handles
the unstructured response path robustly, so `none` is viable for any
OpenAI-compatible endpoint.

Applied at all three call sites that send `response_format`:
- utils/llm_client.py (chat_json helper)
- services/oasis_profile_generator.py (persona synthesis)
- services/simulation_config_generator.py (time/event/agent config)

Documented in .env.example with guidance on when to pick each value.
This commit is contained in:
Pedro Renan 2026-04-24 21:46:12 +01:00
parent fa0f6519b1
commit af75f626fd
5 changed files with 27 additions and 13 deletions

View File

@ -5,6 +5,12 @@ LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus
# LLM JSON output mode (optional, default: json_object)
# json_object — OpenAI, Ollama, Qwen Cloud, Anthropic-compatible endpoints
# none — LM Studio, llama.cpp server, or any runtime that rejects the
# `response_format` parameter. Relies on prompt-based JSON + robust parsing.
# LLM_JSON_MODE=json_object
# ===== ZEP记忆图谱配置 =====
# 每月免费额度即可支撑简单使用https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here

View File

@ -31,6 +31,10 @@ class Config:
LLM_API_KEY = os.environ.get('LLM_API_KEY')
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
# JSON output mode for LLM calls. Options:
# "json_object" — OpenAI-compatible default (Ollama, Qwen Cloud, Anthropic via proxy)
# "none" — runtimes that reject response_format (LM Studio, llama.cpp server)
LLM_JSON_MODE = os.environ.get('LLM_JSON_MODE', 'json_object').lower()
# Zep配置
ZEP_API_KEY = os.environ.get('ZEP_API_KEY')

View File

@ -527,16 +527,18 @@ class OasisProfileGenerator:
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
create_kwargs = {
"model": self.model_name,
"messages": [
{"role": "system", "content": self._get_system_prompt(is_individual)},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
"temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度
# 不设置max_tokens让LLM自由发挥
)
}
if Config.LLM_JSON_MODE == "json_object":
create_kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**create_kwargs)
content = response.choices[0].message.content

View File

@ -440,16 +440,18 @@ class SimulationConfigGenerator:
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
create_kwargs = {
"model": self.model_name,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
"temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度
# 不设置max_tokens让LLM自由发挥
)
}
if Config.LLM_JSON_MODE == "json_object":
create_kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**create_kwargs)
content = response.choices[0].message.content
finish_reason = response.choices[0].finish_reason

View File

@ -88,7 +88,7 @@ class LLMClient:
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"}
response_format={"type": "json_object"} if Config.LLM_JSON_MODE == "json_object" else None
)
# 清理markdown代码块标记
cleaned_response = response.strip()