fix: strip fabricated tool_result blocks to prevent LLM hallucination

When the LLM generates a <tool_call> block followed by a self-generated
<tool_result> block in the same response, the fake result must be stripped
before appending to message history. The real tool result will be injected
separately by the system.

This fixes a React Hallucination bug where models could fabricate tool
results that didn't actually come from tool invocations.
This commit is contained in:
MiroFish Contributor 2026-04-21 00:02:58 +08:00
parent fa0f6519b1
commit 3ea9d10d25
1 changed files with 35 additions and 6 deletions

View File

@ -664,6 +664,12 @@ SECTION_SYSTEM_PROMPT_TEMPLATE = """\
- 不要添加模拟中不存在的信息
- 如果某方面信息不足如实说明
5. 禁止捏造数据
- 禁止捏造用户名引用统计数字或互动数据
- 禁止在回复中包含 <tool_result> 只有系统会提供工具结果
- 只能引用真实出现在工具结果中的实体引用和数据
- 如果工具结果中没有相关内容应如实说明而非编造
格式规范 - 极其重要
@ -1134,6 +1140,25 @@ class ReportAgent:
desc_parts.append(f" 参数: {params_desc}")
return "\n".join(desc_parts)
@staticmethod
def _strip_fake_tool_results(response: str) -> str:
"""Strip any <tool_result> blocks the LLM fabricated in its response.
When the LLM generates a <tool_call> block and then continues to generate
a <tool_result> block in the same response, we must strip the fake result
before appending to message history. The real tool result will be injected
separately by the system.
"""
import re
cleaned = re.sub(
r'<tool_result>.*?</tool_result>',
'',
response,
flags=re.DOTALL,
)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
return cleaned.strip()
def plan_outline(
self,
progress_callback: Optional[Callable] = None
@ -1335,7 +1360,8 @@ class ReportAgent:
if conflict_retries <= 2:
# 前两次:丢弃本次响应,要求 LLM 重新回复
messages.append({"role": "assistant", "content": response})
cleaned_response = ReportAgent._strip_fake_tool_results(response)
messages.append({"role": "assistant", "content": cleaned_response})
messages.append({
"role": "user",
"content": (
@ -1375,7 +1401,8 @@ class ReportAgent:
if has_final_answer:
# 工具调用次数不足,拒绝并要求继续调工具
if tool_calls_count < min_tool_calls:
messages.append({"role": "assistant", "content": response})
cleaned_response = ReportAgent._strip_fake_tool_results(response)
messages.append({"role": "assistant", "content": cleaned_response})
unused_tools = all_tools - used_tools
unused_hint = f"(这些工具还未使用,推荐用一下他们: {', '.join(unused_tools)}" if unused_tools else ""
messages.append({
@ -1451,9 +1478,10 @@ class ReportAgent:
unused_tools = all_tools - used_tools
unused_hint = ""
if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION:
unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list="".join(unused_tools))
unlock_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list="".join(unused_tools))
messages.append({"role": "assistant", "content": response})
cleaned_response = ReportAgent._strip_fake_tool_results(response)
messages.append({"role": "assistant", "content": cleaned_response})
messages.append({
"role": "user",
"content": REACT_OBSERVATION_TEMPLATE.format(
@ -1857,7 +1885,8 @@ class ReportAgent:
tool_calls_made.append(call)
# 将结果添加到消息
messages.append({"role": "assistant", "content": response})
cleaned_response = ReportAgent._strip_fake_tool_results(response)
messages.append({"role": "assistant", "content": cleaned_response})
observation = "\n".join([f"[{r['tool']}结果]\n{r['result']}" for r in tool_results])
messages.append({
"role": "user",