fix: strip hallucinated <tool_result> blocks in ReACT loop

Prevents LLM-fabricated tool results from being appended to conversation
history. Adds _sanitize_tool_results() and applies it to both
_generate_section_react() and chat() code paths.

System prompt is hardened with an explicit warning against fabricating
<tool_result> content.

Closes #529
This commit is contained in:
Md_Mushfiqur Rahim 2026-05-28 00:52:25 +00:00
parent 96096ea0ff
commit 11c525a1ac
1 changed files with 20 additions and 2 deletions

View File

@ -732,6 +732,7 @@ SECTION_SYSTEM_PROMPT_TEMPLATE = """\
严格禁止
- 禁止在一次回复中同时包含工具调用和 Final Answer
- 禁止自行编写 <tool_result> 内容工具结果会由系统真实执行后返回如果你在回复中包含 <tool_result> 无论是否包含内容系统将自动将其从对话历史中删除不会被视为真实工具执行结果
- 禁止自己编造工具返回结果Observation所有工具结果由系统注入
- 每次回复最多调用一个工具
@ -1218,6 +1219,14 @@ class ReportAgent:
]
)
@staticmethod
def _sanitize_tool_results(response: str) -> str:
"""移除 LLM 自行编造的 <tool_result> 块,防止虚假内容污染消息历史。"""
cleaned = re.sub(r'<tool_result>.*?</tool_result>', '', response, flags=re.DOTALL)
if cleaned != response:
logger.warning("Stripped hallucinated <tool_result> block(s) from LLM response")
return cleaned
def _generate_section_react(
self,
section: ReportSection,
@ -1308,6 +1317,10 @@ class ReportAgent:
max_tokens=4096
)
# 清洗:移除 LLM 自行编造的 <tool_result> 块(防止虚假内容污染消息历史)
if response:
response = self._sanitize_tool_results(response)
# 检查 LLM 返回是否为 NoneAPI 异常或内容为空)
if response is None:
logger.warning(t('report.sectionIterNone', title=section.title, iteration=iteration + 1))
@ -1829,9 +1842,12 @@ class ReportAgent:
messages=messages,
temperature=0.5
)
if response:
response = self._sanitize_tool_results(response)
# 解析工具调用
tool_calls = self._parse_tool_calls(response)
tool_calls = self._parse_tool_calls(response)
if not tool_calls:
# 没有工具调用,直接返回响应
@ -1869,6 +1885,8 @@ class ReportAgent:
messages=messages,
temperature=0.5
)
if final_response:
final_response = self._sanitize_tool_results(final_response)
# 清理响应
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', final_response, flags=re.DOTALL)