From 0a3272197b124d7d9a6ac7a6d08038e63338a162 Mon Sep 17 00:00:00 2001
From: Md_Mushfiqur Rahim <20mahin20201@gmail.com>
Date: Wed, 27 May 2026 02:36:58 +0000
Subject: [PATCH] fix: remove response_format=json_object from chat_json,
 increase ontology max_tokens

Bug 1: chat_json() was passing response_format={'type': 'json_object'}
to the LLM, which enforces JSON grammar from token 0. Reasoning
models (Qwen3, DeepSeek-R1, etc.) generate <think>...</think> blocks
before JSON output, causing garbled results. The fix removes the
response_format parameter since the system prompt already requests
JSON output and the existing <think> cleanup handles any remaining
tags.

Bug 2: ontology_generator hardcoded max_tokens=4096, causing
truncation for models with larger context windows. Increased to
16384 to accommodate reasoning model outputs.

Fixes #642
---
 backend/app/services/ontology_generator.py | 2 +-
 backend/app/utils/llm_client.py            | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py
index 01a3d799..3a8f2be6 100644
--- a/backend/app/services/ontology_generator.py
+++ b/backend/app/services/ontology_generator.py
@@ -217,7 +217,7 @@ class OntologyGenerator:
         result = self.llm_client.chat_json(
             messages=messages,
             temperature=0.3,
-            max_tokens=4096
+            max_tokens=16384
         )
         
         # 验证和后处理
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..b4c6e2f2 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -88,7 +88,6 @@ class LLMClient:
             messages=messages,
             temperature=temperature,
             max_tokens=max_tokens,
-            response_format={"type": "json_object"}
         )
         # 清理markdown代码块标记
         cleaned_response = response.strip()