fix: robust LLM JSON parsing + sanitize report sections + chat history dedup

Closes #624, #622, #601, #599, #577

## LLM JSON parsing (#624 / #622 / #601)
- New `_parse_llm_json()` in llm_client.py with 5-stage fallback:
  1. Strip markdown fences (existing)
  2. Strict json.loads (fast path)
  3. json.JSONDecoder.raw_decode (handles trailing prose after JSON)
  4. Balanced-brace extraction (leading prose + embedded JSON)
  5. Strip control chars + retry
- Replaces strict json.loads in chat_json() that was failing on any LLM
  appending text after the JSON (common with qwen-plus, ollama, gemma even
  with response_format=json_object).
- Logs which fallback was used so problematic LLMs are visible.
- 8 unit-test cases covering each strategy.

## Report section tool_call leak (#599)
- New `_sanitize_section_content()` in report_agent.py detects when a
  section's "final answer" is actually an unexecuted tool_call JSON
  (e.g. `{"name":"quick_search","parameters":{...}}`) and replaces it
  with a clear fallback message instead of writing the raw artifact to
  the report.
- Applied at all 3 places where final_answer is returned in
  write_section(): the Final Answer path, the no-prefix fallback, and
  the force-finalize path.

## Chat history duplicate user message (#577)
- In report_agent.py chat(), defensively dedupe chat_history:
  - Only keep {role, content} from history items
  - Skip entries that match the current message exactly
- This prevents LLM from seeing a duplicate trailing user message and
  echoing back the previous answer.
- Added debug log of constructed messages array for diagnostics.
This commit is contained in:
rqd6f4g6zn-bit 2026-05-17 08:23:16 +02:00
parent fa0f6519b1
commit c2d533d933
2 changed files with 173 additions and 17 deletions

View File

@ -1064,6 +1064,42 @@ class ReportAgent:
# 合法的工具名称集合,用于裸 JSON 兜底解析时校验 # 合法的工具名称集合,用于裸 JSON 兜底解析时校验
VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"} VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
def _sanitize_section_content(self, content: str, section_title: str = "") -> str:
"""
Bereinigt einen LLM-Output, der als Section-Content geschrieben werden soll.
Wenn der Content NUR aus einem unausgeführten tool_call besteht
(z.B. `{"name":"quick_search","parameters":{...}}`), wäre das nutzlos im
finalen Report siehe github.com/666ghj/MiroFish#599.
Returns: bereinigter Content. Bei reinem tool_call-Leak: Fallback-Hinweis.
"""
if not content:
return content
cleaned = re.sub(r'<tool_call>.*?</tool_call>', '', content, flags=re.DOTALL)
cleaned = re.sub(r'\[TOOL_CALL\].*?\)', '', cleaned)
cleaned = cleaned.strip()
if not cleaned:
return f"_(Keine Inhalte verfügbar für: {section_title})_" if section_title else "_(Keine Inhalte)_"
# Detect: ist der ganze Content ein einzelnes tool_call JSON?
try:
parsed = json.loads(cleaned)
if isinstance(parsed, dict):
tool_name = parsed.get("name") or parsed.get("tool")
if tool_name and tool_name in self.VALID_TOOL_NAMES:
logger.warning(
"Section '%s' content is raw tool_call (tool=%s) — replaced with fallback",
section_title, tool_name
)
return (
f"_(Hinweis: Für diesen Abschnitt konnte das Tool `{tool_name}` "
f"innerhalb der Iterations-Limits nicht ausgeführt werden. "
f"Bitte Report neu generieren oder Modell-Konfiguration prüfen.)_"
)
except (json.JSONDecodeError, TypeError):
pass
return cleaned
def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]: def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
""" """
从LLM响应中解析工具调用 从LLM响应中解析工具调用
@ -1390,6 +1426,7 @@ class ReportAgent:
# 正常结束 # 正常结束
final_answer = response.split("Final Answer:")[-1].strip() final_answer = response.split("Final Answer:")[-1].strip()
final_answer = self._sanitize_section_content(final_answer, section.title)
logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count)) logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count))
if self.report_logger: if self.report_logger:
@ -1488,7 +1525,7 @@ class ReportAgent:
# 工具调用已足够LLM 输出了内容但没带 "Final Answer:" 前缀 # 工具调用已足够LLM 输出了内容但没带 "Final Answer:" 前缀
# 直接将这段内容作为最终答案,不再空转 # 直接将这段内容作为最终答案,不再空转
logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count)) logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count))
final_answer = response.strip() final_answer = self._sanitize_section_content(response.strip(), section.title)
if self.report_logger: if self.report_logger:
self.report_logger.log_section_content( self.report_logger.log_section_content(
@ -1517,6 +1554,8 @@ class ReportAgent:
final_answer = response.split("Final Answer:")[-1].strip() final_answer = response.split("Final Answer:")[-1].strip()
else: else:
final_answer = response final_answer = response
# Sanitize: kein raw tool_call JSON als Content schreiben (Bug #599)
final_answer = self._sanitize_section_content(final_answer, section.title)
# 记录章节内容生成完成日志 # 记录章节内容生成完成日志
if self.report_logger: if self.report_logger:
@ -1809,16 +1848,33 @@ class ReportAgent:
# 构建消息 # 构建消息
messages = [{"role": "system", "content": system_prompt}] messages = [{"role": "system", "content": system_prompt}]
# 添加历史对话 # 添加历史对话— defensiv: nur {role, content}, keine Duplikate der aktuellen Frage
for h in chat_history[-10:]: # 限制历史长度 # Fix für github.com/666ghj/MiroFish#577 (Chat wiederholt erste Antwort):
messages.append(h) # Wenn das Frontend versehentlich die aktuelle User-Nachricht im chat_history mitschickt,
# würde der LLM die Frage als "schon gestellt" sehen und die alte Antwort wiederholen.
for h in chat_history[-10:]:
if not isinstance(h, dict):
continue
role = h.get("role")
content = h.get("content")
if role not in ("user", "assistant") or not content:
continue
# Skip falls dies bereits die aktuelle User-Frage ist
if role == "user" and content.strip() == message.strip():
continue
messages.append({"role": role, "content": content})
# 添加用户消息 # 添加用户消息
messages.append({ messages.append({
"role": "user", "role": "user",
"content": message "content": message
}) })
logger.debug(
"report_agent.chat: total_messages=%d history_len=%d current_msg_len=%d",
len(messages), len(chat_history), len(message)
)
# ReACT循环简化版 # ReACT循环简化版
tool_calls_made = [] tool_calls_made = []

View File

@ -5,11 +5,120 @@ LLM客户端封装
import json import json
import re import re
import logging
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any, List
from openai import OpenAI from openai import OpenAI
from ..config import Config from ..config import Config
logger = logging.getLogger(__name__)
def _parse_llm_json(response: str) -> Dict[str, Any]:
"""
Robuster JSON-Parser für LLM-Outputs.
LLMs (besonders qwen, gemma, ollama-Modelle) hängen oft Trailing-Text
nach dem JSON an, auch mit response_format=json_object. Außerdem werden
JSON-Blöcke häufig in ```json ... ``` Markdown-Fences gewrappt.
Strategie:
1. Markdown-Fences entfernen
2. json.loads (strict, schnellster Weg)
3. raw_decode (parsed Prefix, ignoriert Trailing-Text)
4. Balanced-Brace-Extraktion (sucht erste vollständige {...} Struktur)
5. Strip Control-Chars + Retry
Bei allen Fehlern: ValueError mit hilfreichem Snippet.
Fixes:
- github.com/666ghj/MiroFish#624 ("Unexpected non-whitespace character after JSON at position N")
- github.com/666ghj/MiroFish#622 (duplikat)
- github.com/666ghj/MiroFish#601 (500 error on ontology/generate mit qwen-plus/ollama)
"""
if not response or not response.strip():
raise ValueError("LLM lieferte leere Antwort")
# 1. Strip Markdown-Fences
cleaned = response.strip()
cleaned = re.sub(r'^```(?:json|JSON)?\s*\n?', '', cleaned)
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
cleaned = cleaned.strip()
# 2. Schneller Pfad: vollständiges JSON
try:
return json.loads(cleaned)
except json.JSONDecodeError as e_strict:
first_error = e_strict
# 3. raw_decode — parsed JSON-Prefix, ignoriert Trailing-Text
try:
decoder = json.JSONDecoder()
obj, end_idx = decoder.raw_decode(cleaned)
trailing = cleaned[end_idx:].strip()
if trailing:
logger.warning(
"LLM appended trailing text after JSON (%d chars), ignored. Preview: %s",
len(trailing), trailing[:120]
)
if isinstance(obj, dict):
return obj
if isinstance(obj, list):
# Wrap in dict für Konsistenz mit chat_json-Erwartung
return {"items": obj}
except json.JSONDecodeError:
pass
# 4. Balanced-Brace-Extraktion: find first complete {...}
start = cleaned.find('{')
if start >= 0:
depth = 0
in_string = False
escape = False
for i in range(start, len(cleaned)):
ch = cleaned[i]
if escape:
escape = False
continue
if ch == '\\' and in_string:
escape = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == '{':
depth += 1
elif ch == '}':
depth -= 1
if depth == 0:
candidate = cleaned[start:i + 1]
try:
result = json.loads(candidate)
logger.warning(
"Extracted JSON from messy LLM output (%d chars before, %d after)",
start, len(cleaned) - (i + 1)
)
return result
except json.JSONDecodeError:
break
# 5. Letzter Versuch: control chars entfernen + retry
sanitized = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', cleaned)
if sanitized != cleaned:
try:
return json.loads(sanitized)
except json.JSONDecodeError:
pass
# Alle Strategien fehlgeschlagen — sprechende Fehlermeldung
snippet = cleaned[:200] + ('...' if len(cleaned) > 200 else '')
raise ValueError(
f"LLM返回的JSON格式无效 (alle Parse-Strategien fehlgeschlagen): "
f"first_error={first_error.msg} at pos {first_error.pos}. "
f"Response-Preview: {snippet}"
)
class LLMClient: class LLMClient:
"""LLM客户端""" """LLM客户端"""
@ -90,14 +199,5 @@ class LLMClient:
max_tokens=max_tokens, max_tokens=max_tokens,
response_format={"type": "json_object"} response_format={"type": "json_object"}
) )
# 清理markdown代码块标记 return _parse_llm_json(response)
cleaned_response = response.strip()
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
cleaned_response = cleaned_response.strip()
try:
return json.loads(cleaned_response)
except json.JSONDecodeError:
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")