From af75f626fd3139a89e79bcf76859884477da1870 Mon Sep 17 00:00:00 2001 From: Pedro Renan Date: Fri, 24 Apr 2026 21:46:12 +0100 Subject: [PATCH] feat(llm): add LLM_JSON_MODE to support runtimes without response_format OpenAI-compatible runtimes differ in how they handle `response_format`: cloud providers (OpenAI, Qwen/Dashscope, Ollama) accept `{"type": "json_object"}`, while local runtimes like LM Studio and llama.cpp server reject it with HTTP 400, only accepting `json_schema` or `text`. This prevented MiroFish from running against fully-local stacks. Introduce `LLM_JSON_MODE` (default `json_object`) so users can opt out of strict JSON response mode by setting `LLM_JSON_MODE=none`. The existing prompt-based JSON + markdown-tolerant parsing already handles the unstructured response path robustly, so `none` is viable for any OpenAI-compatible endpoint. Applied at all three call sites that send `response_format`: - utils/llm_client.py (chat_json helper) - services/oasis_profile_generator.py (persona synthesis) - services/simulation_config_generator.py (time/event/agent config) Documented in .env.example with guidance on when to pick each value. --- .env.example | 6 ++++++ backend/app/config.py | 4 ++++ backend/app/services/oasis_profile_generator.py | 14 ++++++++------ .../app/services/simulation_config_generator.py | 14 ++++++++------ backend/app/utils/llm_client.py | 2 +- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/.env.example b/.env.example index 78a3b72c..49a60525 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,12 @@ LLM_API_KEY=your_api_key_here LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus +# LLM JSON output mode (optional, default: json_object) +# json_object — OpenAI, Ollama, Qwen Cloud, Anthropic-compatible endpoints +# none — LM Studio, llama.cpp server, or any runtime that rejects the +# `response_format` parameter. Relies on prompt-based JSON + robust parsing. +# LLM_JSON_MODE=json_object + # ===== ZEP记忆图谱配置 ===== # 每月免费额度即可支撑简单使用:https://app.getzep.com/ ZEP_API_KEY=your_zep_api_key_here diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50..d28ea904 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -31,6 +31,10 @@ class Config: LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') + # JSON output mode for LLM calls. Options: + # "json_object" — OpenAI-compatible default (Ollama, Qwen Cloud, Anthropic via proxy) + # "none" — runtimes that reject response_format (LM Studio, llama.cpp server) + LLM_JSON_MODE = os.environ.get('LLM_JSON_MODE', 'json_object').lower() # Zep配置 ZEP_API_KEY = os.environ.get('ZEP_API_KEY') diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 7704a627..10fbe8ee 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -527,16 +527,18 @@ class OasisProfileGenerator: for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( - model=self.model_name, - messages=[ + create_kwargs = { + "model": self.model_name, + "messages": [ {"role": "system", "content": self._get_system_prompt(is_individual)}, {"role": "user", "content": prompt} ], - response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + "temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度 # 不设置max_tokens,让LLM自由发挥 - ) + } + if Config.LLM_JSON_MODE == "json_object": + create_kwargs["response_format"] = {"type": "json_object"} + response = self.client.chat.completions.create(**create_kwargs) content = response.choices[0].message.content diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cb77f6b6..9be15449 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -440,16 +440,18 @@ class SimulationConfigGenerator: for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( - model=self.model_name, - messages=[ + create_kwargs = { + "model": self.model_name, + "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], - response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + "temperature": 0.7 - (attempt * 0.1) # 每次重试降低温度 # 不设置max_tokens,让LLM自由发挥 - ) + } + if Config.LLM_JSON_MODE == "json_object": + create_kwargs["response_format"] = {"type": "json_object"} + response = self.client.chat.completions.create(**create_kwargs) content = response.choices[0].message.content finish_reason = response.choices[0].finish_reason diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f4..a136e04c 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -88,7 +88,7 @@ class LLMClient: messages=messages, temperature=temperature, max_tokens=max_tokens, - response_format={"type": "json_object"} + response_format={"type": "json_object"} if Config.LLM_JSON_MODE == "json_object" else None ) # 清理markdown代码块标记 cleaned_response = response.strip()