From 0a3272197b124d7d9a6ac7a6d08038e63338a162 Mon Sep 17 00:00:00 2001 From: Md_Mushfiqur Rahim <20mahin20201@gmail.com> Date: Wed, 27 May 2026 02:36:58 +0000 Subject: [PATCH] fix: remove response_format=json_object from chat_json, increase ontology max_tokens Bug 1: chat_json() was passing response_format={'type': 'json_object'} to the LLM, which enforces JSON grammar from token 0. Reasoning models (Qwen3, DeepSeek-R1, etc.) generate ... blocks before JSON output, causing garbled results. The fix removes the response_format parameter since the system prompt already requests JSON output and the existing cleanup handles any remaining tags. Bug 2: ontology_generator hardcoded max_tokens=4096, causing truncation for models with larger context windows. Increased to 16384 to accommodate reasoning model outputs. Fixes #642 --- backend/app/services/ontology_generator.py | 2 +- backend/app/utils/llm_client.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index 01a3d799..3a8f2be6 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -217,7 +217,7 @@ class OntologyGenerator: result = self.llm_client.chat_json( messages=messages, temperature=0.3, - max_tokens=4096 + max_tokens=16384 ) # 验证和后处理 diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f4..b4c6e2f2 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -88,7 +88,6 @@ class LLMClient: messages=messages, temperature=temperature, max_tokens=max_tokens, - response_format={"type": "json_object"} ) # 清理markdown代码块标记 cleaned_response = response.strip()