From 0a3272197b124d7d9a6ac7a6d08038e63338a162 Mon Sep 17 00:00:00 2001
From: Md_Mushfiqur Rahim <20mahin20201@gmail.com>
Date: Wed, 27 May 2026 02:36:58 +0000
Subject: [PATCH] fix: remove response_format=json_object from chat_json,
increase ontology max_tokens
Bug 1: chat_json() was passing response_format={'type': 'json_object'}
to the LLM, which enforces JSON grammar from token 0. Reasoning
models (Qwen3, DeepSeek-R1, etc.) generate ... blocks
before JSON output, causing garbled results. The fix removes the
response_format parameter since the system prompt already requests
JSON output and the existing cleanup handles any remaining
tags.
Bug 2: ontology_generator hardcoded max_tokens=4096, causing
truncation for models with larger context windows. Increased to
16384 to accommodate reasoning model outputs.
Fixes #642
---
backend/app/services/ontology_generator.py | 2 +-
backend/app/utils/llm_client.py | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py
index 01a3d799..3a8f2be6 100644
--- a/backend/app/services/ontology_generator.py
+++ b/backend/app/services/ontology_generator.py
@@ -217,7 +217,7 @@ class OntologyGenerator:
result = self.llm_client.chat_json(
messages=messages,
temperature=0.3,
- max_tokens=4096
+ max_tokens=16384
)
# 验证和后处理
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..b4c6e2f2 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -88,7 +88,6 @@ class LLMClient:
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
- response_format={"type": "json_object"}
)
# 清理markdown代码块标记
cleaned_response = response.strip()