MicroFish/backend/app/services/report_agent.py

2584 lines
101 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Report Agent Service
Implements ReACT-pattern simulation report generation using Zep.
Features:
1. Generates reports based on simulation requirements and Zep graph data
2. First plans the table of contents, then generates content section by section
3. Each section uses multi-round ReACT thinking and reflection
4. Supports user conversation in which the agent autonomously calls retrieval tools
"""
import os
import json
import time
import re
from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from ..config import Config
from ..utils.llm_client import LLMClient
from ..utils.logger import get_logger
from ..utils.locale import get_language_instruction, t
from .zep_tools import (
ZepToolsService,
SearchResult,
InsightForgeResult,
PanoramaResult,
InterviewResult
)
logger = get_logger('mirofish.report_agent')
class ReportLogger:
"""
Detailed log recorder for the Report Agent.
Generates an agent_log.jsonl file inside the report folder, recording each
step in detail. Every line is a complete JSON object containing a timestamp,
action type, and detailed content.
"""
def __init__(self, report_id: str):
"""
Initialise the log recorder.
Args:
report_id: Report ID used to determine the log file path
"""
self.report_id = report_id
self.log_file_path = os.path.join(
Config.UPLOAD_FOLDER, 'reports', report_id, 'agent_log.jsonl'
)
self.start_time = datetime.now()
self._ensure_log_file()
def _ensure_log_file(self):
"""Ensure the directory containing the log file exists"""
log_dir = os.path.dirname(self.log_file_path)
os.makedirs(log_dir, exist_ok=True)
def _get_elapsed_time(self) -> float:
"""Return elapsed time in seconds since the logger was created"""
return (datetime.now() - self.start_time).total_seconds()
def log(
self,
action: str,
stage: str,
details: Dict[str, Any],
section_title: str = None,
section_index: int = None
):
"""
Record a single log entry.
Args:
action: Action type, e.g. 'start', 'tool_call', 'llm_response', 'section_complete'
stage: Current stage, e.g. 'planning', 'generating', 'completed'
details: Dictionary of detailed content (not truncated)
section_title: Current section title (optional)
section_index: Current section index (optional)
"""
log_entry = {
"timestamp": datetime.now().isoformat(),
"elapsed_seconds": round(self._get_elapsed_time(), 2),
"report_id": self.report_id,
"action": action,
"stage": stage,
"section_title": section_title,
"section_index": section_index,
"details": details
}
# Append to the JSONL file
with open(self.log_file_path, 'a', encoding='utf-8') as f:
f.write(json.dumps(log_entry, ensure_ascii=False) + '\n')
def log_start(self, simulation_id: str, graph_id: str, simulation_requirement: str):
"""Record the start of report generation"""
self.log(
action="report_start",
stage="pending",
details={
"simulation_id": simulation_id,
"graph_id": graph_id,
"simulation_requirement": simulation_requirement,
"message": t('report.taskStarted')
}
)
def log_planning_start(self):
"""Record the start of outline planning"""
self.log(
action="planning_start",
stage="planning",
details={"message": t('report.planningStart')}
)
def log_planning_context(self, context: Dict[str, Any]):
"""Record context information obtained during planning"""
self.log(
action="planning_context",
stage="planning",
details={
"message": t('report.fetchSimContext'),
"context": context
}
)
def log_planning_complete(self, outline_dict: Dict[str, Any]):
"""Record the completion of outline planning"""
self.log(
action="planning_complete",
stage="planning",
details={
"message": t('report.planningComplete'),
"outline": outline_dict
}
)
def log_section_start(self, section_title: str, section_index: int):
"""Record the start of section generation"""
self.log(
action="section_start",
stage="generating",
section_title=section_title,
section_index=section_index,
details={"message": t('report.sectionStart', title=section_title)}
)
def log_react_thought(self, section_title: str, section_index: int, iteration: int, thought: str):
"""Record a ReACT thinking step"""
self.log(
action="react_thought",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"iteration": iteration,
"thought": thought,
"message": t('report.reactThought', iteration=iteration)
}
)
def log_tool_call(
self,
section_title: str,
section_index: int,
tool_name: str,
parameters: Dict[str, Any],
iteration: int
):
"""Record a tool call"""
self.log(
action="tool_call",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"iteration": iteration,
"tool_name": tool_name,
"parameters": parameters,
"message": t('report.toolCall', toolName=tool_name)
}
)
def log_tool_result(
self,
section_title: str,
section_index: int,
tool_name: str,
result: str,
iteration: int
):
"""Record a tool call result (full content, no truncation)"""
self.log(
action="tool_result",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"iteration": iteration,
"tool_name": tool_name,
"result": result, # full result, not truncated
"result_length": len(result),
"message": t('report.toolResult', toolName=tool_name)
}
)
def log_llm_response(
self,
section_title: str,
section_index: int,
response: str,
iteration: int,
has_tool_calls: bool,
has_final_answer: bool
):
"""Record an LLM response (full content, no truncation)"""
self.log(
action="llm_response",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"iteration": iteration,
"response": response, # full response, not truncated
"response_length": len(response),
"has_tool_calls": has_tool_calls,
"has_final_answer": has_final_answer,
"message": t('report.llmResponse', hasToolCalls=has_tool_calls, hasFinalAnswer=has_final_answer)
}
)
def log_section_content(
self,
section_title: str,
section_index: int,
content: str,
tool_calls_count: int
):
"""Record section content generation completion (records content only; does not indicate the whole section is done)"""
self.log(
action="section_content",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"content": content, # full content, not truncated
"content_length": len(content),
"tool_calls_count": tool_calls_count,
"message": t('report.sectionContentDone', title=section_title)
}
)
def log_section_full_complete(
self,
section_title: str,
section_index: int,
full_content: str
):
"""
Record section generation completion.
The frontend should listen for this log entry to determine whether a section
is truly complete and to retrieve the full content.
"""
self.log(
action="section_complete",
stage="generating",
section_title=section_title,
section_index=section_index,
details={
"content": full_content,
"content_length": len(full_content),
"message": t('report.sectionComplete', title=section_title)
}
)
def log_report_complete(self, total_sections: int, total_time_seconds: float):
"""Record the completion of report generation"""
self.log(
action="report_complete",
stage="completed",
details={
"total_sections": total_sections,
"total_time_seconds": round(total_time_seconds, 2),
"message": t('report.reportComplete')
}
)
def log_error(self, error_message: str, stage: str, section_title: str = None):
"""Record an error"""
self.log(
action="error",
stage=stage,
section_title=section_title,
section_index=None,
details={
"error": error_message,
"message": t('report.errorOccurred', error=error_message)
}
)
class ReportConsoleLogger:
"""
Console log recorder for the Report Agent.
Writes console-style log messages (INFO, WARNING, etc.) to a console_log.txt
file inside the report folder. Unlike agent_log.jsonl, this file uses a
plain-text format.
"""
def __init__(self, report_id: str):
"""
Initialise the console log recorder.
Args:
report_id: Report ID used to determine the log file path
"""
self.report_id = report_id
self.log_file_path = os.path.join(
Config.UPLOAD_FOLDER, 'reports', report_id, 'console_log.txt'
)
self._ensure_log_file()
self._file_handler = None
self._setup_file_handler()
def _ensure_log_file(self):
"""Ensure the directory containing the log file exists"""
log_dir = os.path.dirname(self.log_file_path)
os.makedirs(log_dir, exist_ok=True)
def _setup_file_handler(self):
"""Set up a file handler to write log messages to disk"""
import logging
# Create the file handler
self._file_handler = logging.FileHandler(
self.log_file_path,
mode='a',
encoding='utf-8'
)
self._file_handler.setLevel(logging.INFO)
# Use the same concise format as the console
formatter = logging.Formatter(
'[%(asctime)s] %(levelname)s: %(message)s',
datefmt='%H:%M:%S'
)
self._file_handler.setFormatter(formatter)
# Attach to the report_agent-related loggers
loggers_to_attach = [
'mirofish.report_agent',
'mirofish.zep_tools',
]
for logger_name in loggers_to_attach:
target_logger = logging.getLogger(logger_name)
# Avoid attaching the same handler twice
if self._file_handler not in target_logger.handlers:
target_logger.addHandler(self._file_handler)
def close(self):
"""Close the file handler and detach it from loggers"""
import logging
if self._file_handler:
loggers_to_detach = [
'mirofish.report_agent',
'mirofish.zep_tools',
]
for logger_name in loggers_to_detach:
target_logger = logging.getLogger(logger_name)
if self._file_handler in target_logger.handlers:
target_logger.removeHandler(self._file_handler)
self._file_handler.close()
self._file_handler = None
def __del__(self):
"""Ensure the file handler is closed on destruction"""
self.close()
class ReportStatus(str, Enum):
"""Report status"""
PENDING = "pending"
PLANNING = "planning"
GENERATING = "generating"
COMPLETED = "completed"
FAILED = "failed"
@dataclass
class ReportSection:
"""Report section"""
title: str
content: str = ""
def to_dict(self) -> Dict[str, Any]:
return {
"title": self.title,
"content": self.content
}
def to_markdown(self, level: int = 2) -> str:
"""Convert to Markdown format"""
md = f"{'#' * level} {self.title}\n\n"
if self.content:
md += f"{self.content}\n\n"
return md
@dataclass
class ReportOutline:
"""Report outline"""
title: str
summary: str
sections: List[ReportSection]
def to_dict(self) -> Dict[str, Any]:
return {
"title": self.title,
"summary": self.summary,
"sections": [s.to_dict() for s in self.sections]
}
def to_markdown(self) -> str:
"""Convert to Markdown format"""
md = f"# {self.title}\n\n"
md += f"> {self.summary}\n\n"
for section in self.sections:
md += section.to_markdown()
return md
@dataclass
class Report:
"""Complete report"""
report_id: str
simulation_id: str
graph_id: str
simulation_requirement: str
status: ReportStatus
outline: Optional[ReportOutline] = None
markdown_content: str = ""
created_at: str = ""
completed_at: str = ""
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"report_id": self.report_id,
"simulation_id": self.simulation_id,
"graph_id": self.graph_id,
"simulation_requirement": self.simulation_requirement,
"status": self.status.value,
"outline": self.outline.to_dict() if self.outline else None,
"markdown_content": self.markdown_content,
"created_at": self.created_at,
"completed_at": self.completed_at,
"error": self.error
}
# ═══════════════════════════════════════════════════════════════
# Prompt template constants
# ═══════════════════════════════════════════════════════════════
# ── Tool descriptions ──
TOOL_DESC_INSIGHT_FORGE = """\
[Deep Insight Retrieval - Powerful Retrieval Tool]
This is our powerful retrieval function, designed specifically for in-depth analysis. It will:
1. Automatically decompose your question into multiple sub-questions
2. Retrieve information from the simulation graph across multiple dimensions
3. Integrate results from semantic search, entity analysis, and relationship-chain tracing
4. Return the most comprehensive and in-depth retrieved content
[Use cases]
- Need to analyse a topic in depth
- Need to understand multiple aspects of an event
- Need rich material to support a report section
[Returns]
- Verbatim relevant facts (can be quoted directly)
- Core entity insights
- Relationship-chain analysis"""
TOOL_DESC_PANORAMA_SEARCH = """\
[Panorama Search - Get the Full Picture]
This tool is used to obtain a complete overview of the simulation results, and is
especially suited to understanding how events evolved. It will:
1. Retrieve all relevant nodes and relationships
2. Distinguish between currently valid facts and historical/expired facts
3. Help you understand how public opinion has evolved
[Use cases]
- Need to understand the complete development trajectory of an event
- Need to compare public-opinion changes across different stages
- Need comprehensive entity and relationship information
[Returns]
- Currently valid facts (latest simulation results)
- Historical/expired facts (evolution record)
- All entities involved"""
TOOL_DESC_QUICK_SEARCH = """\
[Quick Search - Fast Retrieval]
A lightweight, fast retrieval tool suited to simple, direct information queries.
[Use cases]
- Need to quickly look up a specific piece of information
- Need to verify a fact
- Simple information retrieval
[Returns]
- A list of facts most relevant to the query"""
TOOL_DESC_INTERVIEW_AGENTS = """\
[In-Depth Interview - Real Agent Interviews (Dual Platform)]
Calls the OASIS simulation environment's interview API to conduct real interviews with
currently running simulation agents!
This is not LLM simulation — it calls the real interview endpoint to obtain raw answers
from simulation agents.
Interviews are conducted simultaneously on both Twitter and Reddit by default, providing
more comprehensive perspectives.
Workflow:
1. Automatically reads persona files to learn about all simulation agents
2. Intelligently selects agents most relevant to the interview topic (e.g. students, media, officials)
3. Automatically generates interview questions
4. Calls the /api/simulation/interview/batch endpoint to conduct real interviews on both platforms
5. Integrates all interview results to provide multi-perspective analysis
[Use cases]
- Need to understand how different roles view an event (what do students think? media? officials?)
- Need to collect opinions and positions from multiple parties
- Need real answers from simulation agents (from the OASIS simulation environment)
- Want to make the report more vivid by including "interview transcripts"
[Returns]
- Identity information of interviewed agents
- Each agent's interview responses on Twitter and Reddit
- Key quotes (can be cited directly)
- Interview summary and comparison of viewpoints
[Important] The OASIS simulation environment must be running to use this feature!"""
# ── Outline planning prompt ──
PLAN_SYSTEM_PROMPT = """\
You are an expert writer of "Future Prediction Reports" with a "god's-eye view" of the simulated world — you can observe the behaviour, statements, and interactions of every Agent in the simulation.
[Core concept]
We have built a simulated world and injected a specific "simulation requirement" into it as a variable. The outcome of the simulated world's evolution is a prediction of what may happen in the future. What you are observing is not "experimental data" but a "rehearsal of the future".
[Your task]
Write a "Future Prediction Report" that answers:
1. Under the conditions we set, what happened in the future?
2. How did the various types of Agents (population groups) react and act?
3. What future trends and risks worth attention did this simulation reveal?
[Report positioning]
- ✅ This is a simulation-based future prediction report that reveals "if this happens, what will the future look like"
- ✅ Focus on predicted outcomes: how events unfold, group reactions, emergent phenomena, potential risks
- ✅ The words and actions of Agents in the simulated world are predictions of future human behaviour
- ❌ Not an analysis of the current state of the real world
- ❌ Not a generic public-opinion overview
[Section count limit]
- Minimum 2 sections, maximum 5 sections
- No sub-sections needed; write complete content directly within each section
- Content should be concise and focused on the core prediction findings
- You design the section structure yourself based on the prediction results
Output the report outline in JSON format as follows:
{
"title": "Report title",
"summary": "Report summary (one sentence summarising the core prediction findings)",
"sections": [
{
"title": "Section title",
"description": "Description of the section content"
}
]
}
Note: the sections array must have a minimum of 2 and a maximum of 5 elements!"""
PLAN_USER_PROMPT_TEMPLATE = """\
[Prediction Scenario Setup]
The variable we injected into the simulated world (simulation requirement): {simulation_requirement}
[Scale of the Simulated World]
- Number of entities in the simulation: {total_nodes}
- Number of relationships between entities: {total_edges}
- Distribution of entity types: {entity_types}
- Number of active agents: {total_entities}
[Sample of Future Facts Predicted by the Simulation]
{related_facts_json}
Please examine this rehearsal of the future from a "god's-eye view":
1. Under the conditions we set, what state did the future settle into?
2. How did the various population groups (Agents) react and act?
3. What future trends worth attention did this simulation reveal?
Based on the prediction results, design the most appropriate report section structure.
[Reminder] Report section count: minimum 2, maximum 5; content should be concise and focused on core prediction findings."""
# ── Section generation prompt ──
SECTION_SYSTEM_PROMPT_TEMPLATE = """\
You are an expert writer of "Future Prediction Reports", currently writing one section of a report.
Report title: {report_title}
Report summary: {report_summary}
Prediction scenario (simulation requirement): {simulation_requirement}
Section to write now: {section_title}
═══════════════════════════════════════════════════════════════
[Core concept]
═══════════════════════════════════════════════════════════════
The simulated world is a rehearsal of the future. We injected specific conditions
(the simulation requirement) into the simulated world; the behaviour and interactions
of the Agents in the simulation are predictions of future human behaviour.
Your task is to:
- Reveal what happened in the future under the conditions set
- Predict how the various population groups (Agents) reacted and acted
- Identify future trends, risks, and opportunities worth attention
❌ Do not write this as an analysis of the current state of the real world
✅ Focus on "what will happen in the future" — the simulation results ARE the predicted future
═══════════════════════════════════════════════════════════════
[Most important rules — must be followed]
═══════════════════════════════════════════════════════════════
1. [You must call tools to observe the simulated world]
- You are observing the rehearsal of the future from a "god's-eye view"
- All content must come from events that occurred and Agent statements in the simulated world
- You are prohibited from using your own knowledge to write the report content
- Call tools at least 3 times (and no more than 5 times) per section to observe the simulated world, which represents the future
2. [You must quote Agents' original statements and actions]
- Agent statements and behaviour are predictions of future human behaviour
- Present these predictions in the report using quotation format, for example:
> "A certain population group would say: verbatim content..."
- These quotations are the core evidence of the simulation's predictions
3. [Language consistency — quoted content must be translated to the report language]
- Tool return values may contain expressions in a language different from the report language
- The entire report must be written in the language specified by the user
- When quoting tool return values in other languages, you must translate them into the report language before writing them in
- Keep the original meaning intact; ensure the phrasing is natural and fluent
- This rule applies to both the main body text and blockquote (> format) content
4. [Faithfully present prediction results]
- Report content must reflect the simulation results that represent the future
- Do not add information that does not exist in the simulation
- If information on some aspect is insufficient, state that honestly
═══════════════════════════════════════════════════════════════
[⚠️ Formatting rules — extremely important!]
═══════════════════════════════════════════════════════════════
[One section = the minimum content unit]
- Each section is the smallest division of the report
- ❌ No Markdown headings (#, ##, ###, #### etc.) are permitted anywhere inside a section
- ❌ Do not add the section's main heading at the top of the content
- ✅ The section heading is added automatically by the system; you only need to write the plain body content
- ✅ Use **bold**, paragraph breaks, block quotes, and lists to organise content — but no headings
[Correct example]
```
This section analyses the public-opinion dissemination dynamics of the event. Through in-depth analysis of the simulation data, we found...
**Initial Ignition Stage**
Platform X served as the primary venue, taking on the core function of first publishing the information:
> "Platform X contributed 68% of the initial volume..."
**Emotional Amplification Stage**
Video platforms further amplified the impact of the event:
- Strong visual impact
- High emotional resonance
```
[Incorrect example]
```
## Executive Summary ← Wrong! Do not add any headings
### I. Initial Stage ← Wrong! Do not use ### for sub-sections
#### 1.1 Detailed Analysis ← Wrong! Do not use #### for finer divisions
This section analyses...
```
═══════════════════════════════════════════════════════════════
[Available retrieval tools] (call 35 times per section)
═══════════════════════════════════════════════════════════════
{tools_description}
[Tool usage advice — mix different tools; do not use only one]
- insight_forge: Deep insight analysis; automatically decomposes questions and retrieves facts and relationships across multiple dimensions
- panorama_search: Wide-angle panorama search; understand the full picture of an event, its timeline, and how it evolved
- quick_search: Quickly verify a specific piece of information
- interview_agents: Interview simulation agents to obtain first-person perspectives and real reactions from different roles
═══════════════════════════════════════════════════════════════
[Workflow]
═══════════════════════════════════════════════════════════════
In each reply you may do only one of the following two things (not both simultaneously):
Option A — Call a tool:
Output your thoughts, then call one tool using the following format:
<tool_call>
{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}}
</tool_call>
The system will execute the tool and return the result to you. You must not and cannot write the tool's return result yourself.
Option B — Output the final content:
Once you have obtained sufficient information through tools, output the section content starting with "Final Answer:".
⚠️ Strictly prohibited:
- Including both a tool call and a Final Answer in a single reply
- Fabricating tool return results (Observations); all tool results are injected by the system
- Calling more than one tool per reply
═══════════════════════════════════════════════════════════════
[Section content requirements]
═══════════════════════════════════════════════════════════════
1. Content must be based on simulation data retrieved by tools
2. Quote original text extensively to demonstrate simulation results
3. Use Markdown formatting (but headings are prohibited):
- Use **bold text** to mark key points (instead of sub-headings)
- Use lists (- or 1. 2. 3.) to organise points
- Use blank lines to separate different paragraphs
- ❌ Any heading syntax (#, ##, ###, #### etc.) is prohibited
4. [Quotation format rules — must stand alone as a paragraph]
Quotations must be their own paragraph with one blank line before and after; they cannot be embedded in a paragraph:
✅ Correct format:
```
The institution's response was considered to lack substance.
> "The institution's response pattern appeared rigid and slow-moving in the fast-changing social media environment."
This evaluation reflects the widespread public dissatisfaction.
```
❌ Incorrect format:
```
The institution's response was considered to lack substance. > "The institution's response pattern..." This evaluation reflects...
```
5. Maintain logical coherence with other sections
6. [Avoid repetition] Carefully read the already-completed section content below; do not repeat the same information
7. [Emphasis again] Do not add any headings! Use **bold** instead of sub-section headings"""
SECTION_USER_PROMPT_TEMPLATE = """\
Already-completed section content (please read carefully to avoid repetition):
{previous_content}
═══════════════════════════════════════════════════════════════
[Current task] Write section: {section_title}
═══════════════════════════════════════════════════════════════
[Important reminders]
1. Read the already-completed sections above carefully and avoid repeating the same content!
2. You must call tools to retrieve simulation data before starting
3. Mix different tools; do not use only one
4. Report content must come from retrieval results; do not use your own knowledge
[⚠️ Formatting warning — must be followed]
- ❌ Do not write any headings (#, ##, ###, #### are all prohibited)
- ❌ Do not write "{section_title}" as the opening line
- ✅ The section heading is added automatically by the system
- ✅ Write the body text directly; use **bold** instead of sub-section headings
Please begin:
1. First, think (Thought) about what information this section needs
2. Then call a tool (Action) to retrieve simulation data
3. Once you have gathered enough information, output Final Answer (plain body text, no headings of any kind)"""
# ── ReACT loop message templates ──
REACT_OBSERVATION_TEMPLATE = """\
Observation (retrieval result):
═══ Tool {tool_name} returned ═══
{result}
═══════════════════════════════════════════════════════════════
Tools called: {tool_calls_count}/{max_tool_calls} (used: {used_tools_str}){unused_hint}
- If the information is sufficient: output the section content starting with "Final Answer:" (must quote the original text above)
- If more information is needed: call one tool to continue retrieval
═══════════════════════════════════════════════════════════════"""
REACT_INSUFFICIENT_TOOLS_MSG = (
"[Note] You have only called {tool_calls_count} tool(s), but at least {min_tool_calls} are required. "
"Please call more tools to retrieve simulation data before outputting Final Answer. {unused_hint}"
)
REACT_INSUFFICIENT_TOOLS_MSG_ALT = (
"You have only called {tool_calls_count} tool(s) so far; at least {min_tool_calls} are required. "
"Please call a tool to retrieve simulation data. {unused_hint}"
)
REACT_TOOL_LIMIT_MSG = (
"The tool call limit has been reached ({tool_calls_count}/{max_tool_calls}); no more tools can be called. "
'Please immediately output the section content starting with "Final Answer:" based on the information already retrieved.'
)
REACT_UNUSED_TOOLS_HINT = "\n💡 You have not yet used: {unused_list} — consider trying different tools to gather multi-perspective information"
REACT_FORCE_FINAL_MSG = "The tool call limit has been reached. Please output Final Answer: directly and generate the section content."
# ── Chat prompt ──
CHAT_SYSTEM_PROMPT_TEMPLATE = """\
You are a concise and efficient simulation prediction assistant.
[Background]
Prediction conditions: {simulation_requirement}
[Already-generated analysis report]
{report_content}
[Rules]
1. Prioritise answering questions based on the report content above
2. Answer questions directly; avoid lengthy reasoning
3. Only call tools to retrieve more data when the report content is insufficient to answer
4. Answers should be concise, clear, and well-organised
[Available tools] (use only when needed; call at most 12 times)
{tools_description}
[Tool call format]
<tool_call>
{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}}
</tool_call>
[Answer style]
- Concise and direct; avoid long-winded explanations
- Use the > format to quote key content
- Lead with the conclusion, then explain the reasoning"""
CHAT_OBSERVATION_SUFFIX = "\n\nPlease answer the question concisely."
# ═══════════════════════════════════════════════════════════════
# ReportAgent main class
# ═══════════════════════════════════════════════════════════════
class ReportAgent:
"""
Report Agent — Simulation report generation agent.
Uses the ReACT (Reasoning + Acting) pattern:
1. Planning stage: analyse simulation requirements and plan the report structure
2. Generation stage: generate content section by section; each section may call tools
multiple times to retrieve information
3. Reflection stage: check content completeness and accuracy
"""
# Maximum tool calls per section
MAX_TOOL_CALLS_PER_SECTION = 5
# Maximum reflection rounds
MAX_REFLECTION_ROUNDS = 3
# Maximum tool calls per chat turn
MAX_TOOL_CALLS_PER_CHAT = 2
def __init__(
self,
graph_id: str,
simulation_id: str,
simulation_requirement: str,
llm_client: Optional[LLMClient] = None,
zep_tools: Optional[ZepToolsService] = None
):
"""
Initialise the Report Agent.
Args:
graph_id: Graph ID
simulation_id: Simulation ID
simulation_requirement: Simulation requirement description
llm_client: LLM client (optional)
zep_tools: Zep tools service (optional)
"""
self.graph_id = graph_id
self.simulation_id = simulation_id
self.simulation_requirement = simulation_requirement
self.llm = llm_client or LLMClient()
self.zep_tools = zep_tools or ZepToolsService()
# Tool definitions
self.tools = self._define_tools()
# Structured log recorder (initialised in generate_report)
self.report_logger: Optional[ReportLogger] = None
# Console log recorder (initialised in generate_report)
self.console_logger: Optional[ReportConsoleLogger] = None
logger.info(t('report.agentInitDone', graphId=graph_id, simulationId=simulation_id))
def _define_tools(self) -> Dict[str, Dict[str, Any]]:
"""Define available tools"""
return {
"insight_forge": {
"name": "insight_forge",
"description": TOOL_DESC_INSIGHT_FORGE,
"parameters": {
"query": "The question or topic you want to analyse in depth",
"report_context": "Context of the current report section (optional; helps generate more precise sub-questions)"
}
},
"panorama_search": {
"name": "panorama_search",
"description": TOOL_DESC_PANORAMA_SEARCH,
"parameters": {
"query": "Search query for relevance ranking",
"include_expired": "Whether to include expired/historical content (default True)"
}
},
"quick_search": {
"name": "quick_search",
"description": TOOL_DESC_QUICK_SEARCH,
"parameters": {
"query": "Search query string",
"limit": "Number of results to return (optional, default 10)"
}
},
"interview_agents": {
"name": "interview_agents",
"description": TOOL_DESC_INTERVIEW_AGENTS,
"parameters": {
"interview_topic": "Interview topic or requirement description",
"max_agents": "Maximum number of agents to interview (optional, default 5, max 10)"
}
}
}
def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_context: str = "") -> str:
"""
Execute a tool call.
Args:
tool_name: Tool name
parameters: Tool parameters
report_context: Report context (used by InsightForge)
Returns:
Tool execution result (text format)
"""
logger.info(t('report.executingTool', toolName=tool_name, params=parameters))
try:
if tool_name == "insight_forge":
query = parameters.get("query", "")
ctx = parameters.get("report_context", "") or report_context
result = self.zep_tools.insight_forge(
graph_id=self.graph_id,
query=query,
simulation_requirement=self.simulation_requirement,
report_context=ctx
)
return result.to_text()
elif tool_name == "panorama_search":
# Panorama search - get the full picture
query = parameters.get("query", "")
include_expired = parameters.get("include_expired", True)
if isinstance(include_expired, str):
include_expired = include_expired.lower() in ['true', '1', 'yes']
result = self.zep_tools.panorama_search(
graph_id=self.graph_id,
query=query,
include_expired=include_expired
)
return result.to_text()
elif tool_name == "quick_search":
# Quick search - fast retrieval
query = parameters.get("query", "")
limit = parameters.get("limit", 10)
if isinstance(limit, str):
limit = int(limit)
result = self.zep_tools.quick_search(
graph_id=self.graph_id,
query=query,
limit=limit
)
return result.to_text()
elif tool_name == "interview_agents":
# In-depth interview - calls the real OASIS interview API to get simulation agent responses (dual platform)
interview_topic = parameters.get("interview_topic", parameters.get("query", ""))
max_agents = parameters.get("max_agents", 5)
if isinstance(max_agents, str):
max_agents = int(max_agents)
max_agents = min(max_agents, 10)
result = self.zep_tools.interview_agents(
simulation_id=self.simulation_id,
interview_requirement=interview_topic,
simulation_requirement=self.simulation_requirement,
max_agents=max_agents
)
return result.to_text()
# ========== Legacy tools for backward compatibility (internally redirect to new tools) ==========
elif tool_name == "search_graph":
# Redirect to quick_search
logger.info(t('report.redirectToQuickSearch'))
return self._execute_tool("quick_search", parameters, report_context)
elif tool_name == "get_graph_statistics":
result = self.zep_tools.get_graph_statistics(self.graph_id)
return json.dumps(result, ensure_ascii=False, indent=2)
elif tool_name == "get_entity_summary":
entity_name = parameters.get("entity_name", "")
result = self.zep_tools.get_entity_summary(
graph_id=self.graph_id,
entity_name=entity_name
)
return json.dumps(result, ensure_ascii=False, indent=2)
elif tool_name == "get_simulation_context":
# Redirect to insight_forge as it is more powerful
logger.info(t('report.redirectToInsightForge'))
query = parameters.get("query", self.simulation_requirement)
return self._execute_tool("insight_forge", {"query": query}, report_context)
elif tool_name == "get_entities_by_type":
entity_type = parameters.get("entity_type", "")
nodes = self.zep_tools.get_entities_by_type(
graph_id=self.graph_id,
entity_type=entity_type
)
result = [n.to_dict() for n in nodes]
return json.dumps(result, ensure_ascii=False, indent=2)
else:
return f"Unknown tool: {tool_name}. Please use one of: insight_forge, panorama_search, quick_search"
except Exception as e:
logger.error(t('report.toolExecFailed', toolName=tool_name, error=str(e)))
return f"Tool execution failed: {str(e)}"
# Valid tool names; used when validating the bare-JSON fallback parse
VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
"""
Parse tool calls from an LLM response.
Supported formats (in priority order):
1. <tool_call>{"name": "tool_name", "parameters": {...}}</tool_call>
2. Bare JSON (the entire response or a single line is a tool call JSON object)
"""
tool_calls = []
# Format 1: XML-style (standard format)
xml_pattern = r'<tool_call>\s*(\{.*?\})\s*</tool_call>'
for match in re.finditer(xml_pattern, response, re.DOTALL):
try:
call_data = json.loads(match.group(1))
tool_calls.append(call_data)
except json.JSONDecodeError:
pass
if tool_calls:
return tool_calls
# Format 2: fallback — LLM outputs bare JSON directly (without <tool_call> tags)
# Only attempted when Format 1 did not match, to avoid false positives in body text
stripped = response.strip()
if stripped.startswith('{') and stripped.endswith('}'):
try:
call_data = json.loads(stripped)
if self._is_valid_tool_call(call_data):
tool_calls.append(call_data)
return tool_calls
except json.JSONDecodeError:
pass
# Response may contain reasoning text + bare JSON; try to extract the last JSON object
json_pattern = r'(\{"(?:name|tool)"\s*:.*?\})\s*$'
match = re.search(json_pattern, stripped, re.DOTALL)
if match:
try:
call_data = json.loads(match.group(1))
if self._is_valid_tool_call(call_data):
tool_calls.append(call_data)
except json.JSONDecodeError:
pass
return tool_calls
def _is_valid_tool_call(self, data: dict) -> bool:
"""Validate whether the parsed JSON is a valid tool call"""
# Supports both {"name": ..., "parameters": ...} and {"tool": ..., "params": ...} key forms
tool_name = data.get("name") or data.get("tool")
if tool_name and tool_name in self.VALID_TOOL_NAMES:
# Normalise keys to name / parameters
if "tool" in data:
data["name"] = data.pop("tool")
if "params" in data and "parameters" not in data:
data["parameters"] = data.pop("params")
return True
return False
def _get_tools_description(self) -> str:
"""Generate tool description text"""
desc_parts = ["Available tools:"]
for name, tool in self.tools.items():
params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
desc_parts.append(f"- {name}: {tool['description']}")
if params_desc:
desc_parts.append(f" Parameters: {params_desc}")
return "\n".join(desc_parts)
def plan_outline(
self,
progress_callback: Optional[Callable] = None
) -> ReportOutline:
"""
Plan the report outline.
Uses an LLM to analyse the simulation requirements and plan the report structure.
Args:
progress_callback: Progress callback function
Returns:
ReportOutline: Report outline
"""
logger.info(t('report.startPlanningOutline'))
if progress_callback:
progress_callback("planning", 0, t('progress.analyzingRequirements'))
# Obtain the simulation context first
context = self.zep_tools.get_simulation_context(
graph_id=self.graph_id,
simulation_requirement=self.simulation_requirement
)
if progress_callback:
progress_callback("planning", 30, t('progress.generatingOutline'))
system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}"
user_prompt = PLAN_USER_PROMPT_TEMPLATE.format(
simulation_requirement=self.simulation_requirement,
total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0),
total_edges=context.get('graph_statistics', {}).get('total_edges', 0),
entity_types=list(context.get('graph_statistics', {}).get('entity_types', {}).keys()),
total_entities=context.get('total_entities', 0),
related_facts_json=json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2),
)
try:
response = self.llm.chat_json(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.3
)
if progress_callback:
progress_callback("planning", 80, t('progress.parsingOutline'))
# Parse the outline
sections = []
for section_data in response.get("sections", []):
sections.append(ReportSection(
title=section_data.get("title", ""),
content=""
))
outline = ReportOutline(
title=response.get("title", "Simulation Analysis Report"),
summary=response.get("summary", ""),
sections=sections
)
if progress_callback:
progress_callback("planning", 100, t('progress.outlinePlanComplete'))
logger.info(t('report.outlinePlanDone', count=len(sections)))
return outline
except Exception as e:
logger.error(t('report.outlinePlanFailed', error=str(e)))
# Return a default outline (3 sections, as fallback)
return ReportOutline(
title="Future Forecast Report",
summary="Future trends and risk analysis based on simulation forecasts",
sections=[
ReportSection(title="Predicted Scenario and Core Findings"),
ReportSection(title="Population Behaviour Prediction Analysis"),
ReportSection(title="Trend Outlook and Risk Warnings")
]
)
def _generate_section_react(
self,
section: ReportSection,
outline: ReportOutline,
previous_sections: List[str],
progress_callback: Optional[Callable] = None,
section_index: int = 0
) -> str:
"""
Generate a single section using the ReACT pattern.
ReACT loop:
1. Thought — analyse what information is needed
2. Action — call a tool to retrieve information
3. Observation — analyse the tool's return value
4. Repeat until enough information is gathered or the maximum count is reached
5. Final Answer — generate the section content
Args:
section: The section to generate
outline: The full outline
previous_sections: Content of previously generated sections (for coherence)
progress_callback: Progress callback
section_index: Section index (used for logging)
Returns:
Section content (Markdown format)
"""
logger.info(t('report.reactGenerateSection', title=section.title))
# Log section start
if self.report_logger:
self.report_logger.log_section_start(section.title, section_index)
system_prompt = SECTION_SYSTEM_PROMPT_TEMPLATE.format(
report_title=outline.title,
report_summary=outline.summary,
simulation_requirement=self.simulation_requirement,
section_title=section.title,
tools_description=self._get_tools_description(),
)
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
# Build the user prompt — each already-completed section is capped at 4000 characters
if previous_sections:
previous_parts = []
for sec in previous_sections:
# Each section is limited to 4000 characters
truncated = sec[:4000] + "..." if len(sec) > 4000 else sec
previous_parts.append(truncated)
previous_content = "\n\n---\n\n".join(previous_parts)
else:
previous_content = "(This is the first section)"
user_prompt = SECTION_USER_PROMPT_TEMPLATE.format(
previous_content=previous_content,
section_title=section.title,
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
# ReACT loop
tool_calls_count = 0
max_iterations = 5 # maximum iterations
min_tool_calls = 3 # minimum tool calls required
conflict_retries = 0 # consecutive conflict count (tool call + Final Answer in same reply)
used_tools = set() # tracks tool names that have been called
all_tools = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
# Report context used by InsightForge for sub-question generation
report_context = f"Section title: {section.title}\nSimulation requirement: {self.simulation_requirement}"
for iteration in range(max_iterations):
if progress_callback:
progress_callback(
"generating",
int((iteration / max_iterations) * 100),
t('progress.deepSearchAndWrite', current=tool_calls_count, max=self.MAX_TOOL_CALLS_PER_SECTION)
)
# Call the LLM
response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=4096
)
# Check whether the LLM returned None (API error or empty content)
if response is None:
logger.warning(t('report.sectionIterNone', title=section.title, iteration=iteration + 1))
# If iterations remain, append a message and retry
if iteration < max_iterations - 1:
messages.append({"role": "assistant", "content": "(empty response)"})
messages.append({"role": "user", "content": "Please continue generating content."})
continue
# Last iteration also returned None; break out of the loop and force a conclusion
break
logger.debug(f"LLM response: {response[:200]}...")
# Parse once and reuse the result
tool_calls = self._parse_tool_calls(response)
has_tool_calls = bool(tool_calls)
has_final_answer = "Final Answer:" in response
# ── Conflict handling: LLM output both a tool call and a Final Answer ──
if has_tool_calls and has_final_answer:
conflict_retries += 1
logger.warning(
t('report.sectionConflict', title=section.title, iteration=iteration+1, conflictCount=conflict_retries)
)
if conflict_retries <= 2:
# First two occurrences: discard this response and ask the LLM to reply again
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": (
"[Format error] Your reply contained both a tool call and a Final Answer, which is not allowed.\n"
"Each reply may do only one of the following two things:\n"
"- Call a tool (output a <tool_call> block; do not write Final Answer)\n"
"- Output the final content (begin with 'Final Answer:'; do not include <tool_call>)\n"
"Please reply again and do only one of the two."
),
})
continue
else:
# Third occurrence: downgrade — truncate to the first tool call and force execution
logger.warning(
t('report.sectionConflictDowngrade', title=section.title, conflictCount=conflict_retries)
)
first_tool_end = response.find('</tool_call>')
if first_tool_end != -1:
response = response[:first_tool_end + len('</tool_call>')]
tool_calls = self._parse_tool_calls(response)
has_tool_calls = bool(tool_calls)
has_final_answer = False
conflict_retries = 0
# Log the LLM response
if self.report_logger:
self.report_logger.log_llm_response(
section_title=section.title,
section_index=section_index,
response=response,
iteration=iteration + 1,
has_tool_calls=has_tool_calls,
has_final_answer=has_final_answer
)
# ── Case 1: LLM output a Final Answer ──
if has_final_answer:
# Insufficient tool calls — reject and require more tool usage
if tool_calls_count < min_tool_calls:
messages.append({"role": "assistant", "content": response})
unused_tools = all_tools - used_tools
unused_hint = f"(These tools have not been used yet, consider trying them: {', '.join(unused_tools)})" if unused_tools else ""
messages.append({
"role": "user",
"content": REACT_INSUFFICIENT_TOOLS_MSG.format(
tool_calls_count=tool_calls_count,
min_tool_calls=min_tool_calls,
unused_hint=unused_hint,
),
})
continue
# Normal completion
final_answer = response.split("Final Answer:")[-1].strip()
logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count))
if self.report_logger:
self.report_logger.log_section_content(
section_title=section.title,
section_index=section_index,
content=final_answer,
tool_calls_count=tool_calls_count
)
return final_answer
# ── Case 2: LLM attempted a tool call ──
if has_tool_calls:
# Tool quota exhausted → notify clearly and require Final Answer output
if tool_calls_count >= self.MAX_TOOL_CALLS_PER_SECTION:
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": REACT_TOOL_LIMIT_MSG.format(
tool_calls_count=tool_calls_count,
max_tool_calls=self.MAX_TOOL_CALLS_PER_SECTION,
),
})
continue
# Execute only the first tool call
call = tool_calls[0]
if len(tool_calls) > 1:
logger.info(t('report.multiToolOnlyFirst', total=len(tool_calls), toolName=call['name']))
if self.report_logger:
self.report_logger.log_tool_call(
section_title=section.title,
section_index=section_index,
tool_name=call["name"],
parameters=call.get("parameters", {}),
iteration=iteration + 1
)
result = self._execute_tool(
call["name"],
call.get("parameters", {}),
report_context=report_context
)
if self.report_logger:
self.report_logger.log_tool_result(
section_title=section.title,
section_index=section_index,
tool_name=call["name"],
result=result,
iteration=iteration + 1
)
tool_calls_count += 1
used_tools.add(call['name'])
# Build the unused-tools hint
unused_tools = all_tools - used_tools
unused_hint = ""
if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION:
unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list=", ".join(unused_tools))
messages.append({"role": "assistant", "content": response})
messages.append({
"role": "user",
"content": REACT_OBSERVATION_TEMPLATE.format(
tool_name=call["name"],
result=result,
tool_calls_count=tool_calls_count,
max_tool_calls=self.MAX_TOOL_CALLS_PER_SECTION,
used_tools_str=", ".join(used_tools),
unused_hint=unused_hint,
),
})
continue
# ── Case 3: Neither a tool call nor a Final Answer ──
messages.append({"role": "assistant", "content": response})
if tool_calls_count < min_tool_calls:
# Insufficient tool calls — recommend unused tools
unused_tools = all_tools - used_tools
unused_hint = f"(These tools have not been used yet, consider trying them: {', '.join(unused_tools)})" if unused_tools else ""
messages.append({
"role": "user",
"content": REACT_INSUFFICIENT_TOOLS_MSG_ALT.format(
tool_calls_count=tool_calls_count,
min_tool_calls=min_tool_calls,
unused_hint=unused_hint,
),
})
continue
# Enough tool calls have been made; the LLM output content without the "Final Answer:" prefix.
# Use this content as the final answer directly without further looping.
logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count))
final_answer = response.strip()
if self.report_logger:
self.report_logger.log_section_content(
section_title=section.title,
section_index=section_index,
content=final_answer,
tool_calls_count=tool_calls_count
)
return final_answer
# Maximum iterations reached; force content generation
logger.warning(t('report.sectionMaxIter', title=section.title))
messages.append({"role": "user", "content": REACT_FORCE_FINAL_MSG})
response = self.llm.chat(
messages=messages,
temperature=0.5,
max_tokens=4096
)
# Check whether LLM returned None during forced conclusion
if response is None:
logger.error(t('report.sectionForceFailed', title=section.title))
final_answer = t('report.sectionGenFailedContent')
elif "Final Answer:" in response:
final_answer = response.split("Final Answer:")[-1].strip()
else:
final_answer = response
# Log section content generation completion
if self.report_logger:
self.report_logger.log_section_content(
section_title=section.title,
section_index=section_index,
content=final_answer,
tool_calls_count=tool_calls_count
)
return final_answer
def generate_report(
self,
progress_callback: Optional[Callable[[str, int, str], None]] = None,
report_id: Optional[str] = None
) -> Report:
"""
Generate the complete report (real-time section-by-section output).
Each section is saved to the folder immediately upon completion;
there is no need to wait for the entire report to finish.
File structure:
reports/{report_id}/
meta.json - Report metadata
outline.json - Report outline
progress.json - Generation progress
section_01.md - Section 1
section_02.md - Section 2
...
full_report.md - Complete report
Args:
progress_callback: Progress callback function (stage, progress, message)
report_id: Report ID (optional; auto-generated if not provided)
Returns:
Report: The complete report
"""
import uuid
# Auto-generate report_id if not provided
if not report_id:
report_id = f"report_{uuid.uuid4().hex[:12]}"
start_time = datetime.now()
report = Report(
report_id=report_id,
simulation_id=self.simulation_id,
graph_id=self.graph_id,
simulation_requirement=self.simulation_requirement,
status=ReportStatus.PENDING,
created_at=datetime.now().isoformat()
)
# List of completed section titles (for progress tracking)
completed_section_titles = []
try:
# Initialise: create the report folder and save the initial state
ReportManager._ensure_report_folder(report_id)
# Initialise the structured log recorder (agent_log.jsonl)
self.report_logger = ReportLogger(report_id)
self.report_logger.log_start(
simulation_id=self.simulation_id,
graph_id=self.graph_id,
simulation_requirement=self.simulation_requirement
)
# Initialise the console log recorder (console_log.txt)
self.console_logger = ReportConsoleLogger(report_id)
ReportManager.update_progress(
report_id, "pending", 0, t('progress.initReport'),
completed_sections=[]
)
ReportManager.save_report(report)
# Stage 1: Plan the outline
report.status = ReportStatus.PLANNING
ReportManager.update_progress(
report_id, "planning", 5, t('progress.startPlanningOutline'),
completed_sections=[]
)
# Log planning start
self.report_logger.log_planning_start()
if progress_callback:
progress_callback("planning", 0, t('progress.startPlanningOutline'))
outline = self.plan_outline(
progress_callback=lambda stage, prog, msg:
progress_callback(stage, prog // 5, msg) if progress_callback else None
)
report.outline = outline
# Log planning completion
self.report_logger.log_planning_complete(outline.to_dict())
# Save the outline to file
ReportManager.save_outline(report_id, outline)
ReportManager.update_progress(
report_id, "planning", 15, t('progress.outlineDone', count=len(outline.sections)),
completed_sections=[]
)
ReportManager.save_report(report)
logger.info(t('report.outlineSavedToFile', reportId=report_id))
# Stage 2: Generate section by section (save as each section is completed)
report.status = ReportStatus.GENERATING
total_sections = len(outline.sections)
generated_sections = [] # saved content used for context
for i, section in enumerate(outline.sections):
section_num = i + 1
base_progress = 20 + int((i / total_sections) * 70)
# Update progress
ReportManager.update_progress(
report_id, "generating", base_progress,
t('progress.generatingSection', title=section.title, current=section_num, total=total_sections),
current_section=section.title,
completed_sections=completed_section_titles
)
if progress_callback:
progress_callback(
"generating",
base_progress,
t('progress.generatingSection', title=section.title, current=section_num, total=total_sections)
)
# Generate the main section content
section_content = self._generate_section_react(
section=section,
outline=outline,
previous_sections=generated_sections,
progress_callback=lambda stage, prog, msg:
progress_callback(
stage,
base_progress + int(prog * 0.7 / total_sections),
msg
) if progress_callback else None,
section_index=section_num
)
section.content = section_content
generated_sections.append(f"## {section.title}\n\n{section_content}")
# Save the section
ReportManager.save_section(report_id, section_num, section)
completed_section_titles.append(section.title)
# Log section completion
full_section_content = f"## {section.title}\n\n{section_content}"
if self.report_logger:
self.report_logger.log_section_full_complete(
section_title=section.title,
section_index=section_num,
full_content=full_section_content.strip()
)
logger.info(t('report.sectionSaved', reportId=report_id, sectionNum=f"{section_num:02d}"))
# Update progress
ReportManager.update_progress(
report_id, "generating",
base_progress + int(70 / total_sections),
t('progress.sectionDone', title=section.title),
current_section=None,
completed_sections=completed_section_titles
)
# Stage 3: Assemble the complete report
if progress_callback:
progress_callback("generating", 95, t('progress.assemblingReport'))
ReportManager.update_progress(
report_id, "generating", 95, t('progress.assemblingReport'),
completed_sections=completed_section_titles
)
# Use ReportManager to assemble the complete report
report.markdown_content = ReportManager.assemble_full_report(report_id, outline)
report.status = ReportStatus.COMPLETED
report.completed_at = datetime.now().isoformat()
# Calculate total elapsed time
total_time_seconds = (datetime.now() - start_time).total_seconds()
# Log report completion
if self.report_logger:
self.report_logger.log_report_complete(
total_sections=total_sections,
total_time_seconds=total_time_seconds
)
# Save the final report
ReportManager.save_report(report)
ReportManager.update_progress(
report_id, "completed", 100, t('progress.reportComplete'),
completed_sections=completed_section_titles
)
if progress_callback:
progress_callback("completed", 100, t('progress.reportComplete'))
logger.info(t('report.reportGenDone', reportId=report_id))
# Close the console log recorder
if self.console_logger:
self.console_logger.close()
self.console_logger = None
return report
except Exception as e:
logger.error(t('report.reportGenFailed', error=str(e)))
report.status = ReportStatus.FAILED
report.error = str(e)
# Log the error
if self.report_logger:
self.report_logger.log_error(str(e), "failed")
# Save the failed state
try:
ReportManager.save_report(report)
ReportManager.update_progress(
report_id, "failed", -1, t('progress.reportFailed', error=str(e)),
completed_sections=completed_section_titles
)
except Exception:
pass # Ignore errors when saving the failed state
# Close the console log recorder
if self.console_logger:
self.console_logger.close()
self.console_logger = None
return report
def chat(
self,
message: str,
chat_history: List[Dict[str, str]] = None
) -> Dict[str, Any]:
"""
Chat with the Report Agent.
During the conversation the Agent may autonomously call retrieval tools to answer questions.
Args:
message: User message
chat_history: Conversation history
Returns:
{
"response": "Agent reply",
"tool_calls": [list of tools called],
"sources": [information sources]
}
"""
logger.info(t('report.agentChat', message=message[:50]))
chat_history = chat_history or []
# Retrieve the already-generated report content
report_content = ""
try:
report = ReportManager.get_report_by_simulation(self.simulation_id)
if report and report.markdown_content:
# Limit report length to avoid overly long context
report_content = report.markdown_content[:15000]
if len(report.markdown_content) > 15000:
report_content += "\n\n... [Report content truncated] ..."
except Exception as e:
logger.warning(t('report.fetchReportFailed', error=e))
system_prompt = CHAT_SYSTEM_PROMPT_TEMPLATE.format(
simulation_requirement=self.simulation_requirement,
report_content=report_content if report_content else "(No report available yet)",
tools_description=self._get_tools_description(),
)
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
# Build the messages list
messages = [{"role": "system", "content": system_prompt}]
# Append conversation history
for h in chat_history[-10:]: # limit history length
messages.append(h)
# Append the user message
messages.append({
"role": "user",
"content": message
})
# ReACT loop (simplified)
tool_calls_made = []
max_iterations = 2 # reduced number of iterations
for iteration in range(max_iterations):
response = self.llm.chat(
messages=messages,
temperature=0.5
)
# Parse tool calls
tool_calls = self._parse_tool_calls(response)
if not tool_calls:
# No tool calls — return the response directly
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', response, flags=re.DOTALL)
clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response)
return {
"response": clean_response.strip(),
"tool_calls": tool_calls_made,
"sources": [tc.get("parameters", {}).get("query", "") for tc in tool_calls_made]
}
# Execute tool calls (with count limit)
tool_results = []
for call in tool_calls[:1]: # at most 1 tool call per round
if len(tool_calls_made) >= self.MAX_TOOL_CALLS_PER_CHAT:
break
result = self._execute_tool(call["name"], call.get("parameters", {}))
tool_results.append({
"tool": call["name"],
"result": result[:1500] # limit result length
})
tool_calls_made.append(call)
# Append the results to the messages
messages.append({"role": "assistant", "content": response})
observation = "\n".join([f"[{r['tool']} result]\n{r['result']}" for r in tool_results])
messages.append({
"role": "user",
"content": observation + CHAT_OBSERVATION_SUFFIX
})
# Maximum iterations reached; get the final response
final_response = self.llm.chat(
messages=messages,
temperature=0.5
)
# Clean the response
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', final_response, flags=re.DOTALL)
clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response)
return {
"response": clean_response.strip(),
"tool_calls": tool_calls_made,
"sources": [tc.get("parameters", {}).get("query", "") for tc in tool_calls_made]
}
class ReportManager:
"""
Report Manager.
Responsible for the persistent storage and retrieval of reports.
File structure (section-by-section output):
reports/
{report_id}/
meta.json - Report metadata and status
outline.json - Report outline
progress.json - Generation progress
section_01.md - Section 1
section_02.md - Section 2
...
full_report.md - Complete report
"""
# Report storage directory
REPORTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'reports')
@classmethod
def _ensure_reports_dir(cls):
"""Ensure the reports root directory exists"""
os.makedirs(cls.REPORTS_DIR, exist_ok=True)
@classmethod
def _get_report_folder(cls, report_id: str) -> str:
"""Get the report folder path"""
return os.path.join(cls.REPORTS_DIR, report_id)
@classmethod
def _ensure_report_folder(cls, report_id: str) -> str:
"""Ensure the report folder exists and return its path"""
folder = cls._get_report_folder(report_id)
os.makedirs(folder, exist_ok=True)
return folder
@classmethod
def _get_report_path(cls, report_id: str) -> str:
"""Get the path to the report metadata file"""
return os.path.join(cls._get_report_folder(report_id), "meta.json")
@classmethod
def _get_report_markdown_path(cls, report_id: str) -> str:
"""Get the path to the full report Markdown file"""
return os.path.join(cls._get_report_folder(report_id), "full_report.md")
@classmethod
def _get_outline_path(cls, report_id: str) -> str:
"""Get the path to the outline file"""
return os.path.join(cls._get_report_folder(report_id), "outline.json")
@classmethod
def _get_progress_path(cls, report_id: str) -> str:
"""Get the path to the progress file"""
return os.path.join(cls._get_report_folder(report_id), "progress.json")
@classmethod
def _get_section_path(cls, report_id: str, section_index: int) -> str:
"""Get the path to a section Markdown file"""
return os.path.join(cls._get_report_folder(report_id), f"section_{section_index:02d}.md")
@classmethod
def _get_agent_log_path(cls, report_id: str) -> str:
"""Get the path to the Agent log file"""
return os.path.join(cls._get_report_folder(report_id), "agent_log.jsonl")
@classmethod
def _get_console_log_path(cls, report_id: str) -> str:
"""Get the path to the console log file"""
return os.path.join(cls._get_report_folder(report_id), "console_log.txt")
@classmethod
def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]:
"""
Retrieve console log content.
This is the console output log (INFO, WARNING, etc.) produced during report
generation — distinct from the structured log in agent_log.jsonl.
Args:
report_id: Report ID
from_line: Line number to start reading from (for incremental retrieval; 0 = from the beginning)
Returns:
{
"logs": [list of log lines],
"total_lines": total line count,
"from_line": starting line number,
"has_more": whether more logs are available
}
"""
log_path = cls._get_console_log_path(report_id)
if not os.path.exists(log_path):
return {
"logs": [],
"total_lines": 0,
"from_line": 0,
"has_more": False
}
logs = []
total_lines = 0
with open(log_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
total_lines = i + 1
if i >= from_line:
# Keep the original log line, stripping the trailing newline
logs.append(line.rstrip('\n\r'))
return {
"logs": logs,
"total_lines": total_lines,
"from_line": from_line,
"has_more": False # read through to the end
}
@classmethod
def get_console_log_stream(cls, report_id: str) -> List[str]:
"""
Retrieve the complete console log (all at once).
Args:
report_id: Report ID
Returns:
List of log lines
"""
result = cls.get_console_log(report_id, from_line=0)
return result["logs"]
@classmethod
def get_agent_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]:
"""
Retrieve Agent log content.
Args:
report_id: Report ID
from_line: Line number to start reading from (for incremental retrieval; 0 = from the beginning)
Returns:
{
"logs": [list of log entries],
"total_lines": total line count,
"from_line": starting line number,
"has_more": whether more logs are available
}
"""
log_path = cls._get_agent_log_path(report_id)
if not os.path.exists(log_path):
return {
"logs": [],
"total_lines": 0,
"from_line": 0,
"has_more": False
}
logs = []
total_lines = 0
with open(log_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
total_lines = i + 1
if i >= from_line:
try:
log_entry = json.loads(line.strip())
logs.append(log_entry)
except json.JSONDecodeError:
# Skip lines that fail to parse
continue
return {
"logs": logs,
"total_lines": total_lines,
"from_line": from_line,
"has_more": False # read through to the end
}
@classmethod
def get_agent_log_stream(cls, report_id: str) -> List[Dict[str, Any]]:
"""
Retrieve the complete Agent log (all at once).
Args:
report_id: Report ID
Returns:
List of log entries
"""
result = cls.get_agent_log(report_id, from_line=0)
return result["logs"]
@classmethod
def save_outline(cls, report_id: str, outline: ReportOutline) -> None:
"""
Save the report outline.
Called immediately after the planning stage completes.
"""
cls._ensure_report_folder(report_id)
with open(cls._get_outline_path(report_id), 'w', encoding='utf-8') as f:
json.dump(outline.to_dict(), f, ensure_ascii=False, indent=2)
logger.info(t('report.outlineSaved', reportId=report_id))
@classmethod
def save_section(
cls,
report_id: str,
section_index: int,
section: ReportSection
) -> str:
"""
Save a single section.
Called immediately after each section is generated to enable section-by-section output.
Args:
report_id: Report ID
section_index: Section index (starting from 1)
section: Section object
Returns:
Path of the saved file
"""
cls._ensure_report_folder(report_id)
# Build the section Markdown content — clean up any duplicate headings
cleaned_content = cls._clean_section_content(section.content, section.title)
md_content = f"## {section.title}\n\n"
if cleaned_content:
md_content += f"{cleaned_content}\n\n"
# Save the file
file_suffix = f"section_{section_index:02d}.md"
file_path = os.path.join(cls._get_report_folder(report_id), file_suffix)
with open(file_path, 'w', encoding='utf-8') as f:
f.write(md_content)
logger.info(t('report.sectionFileSaved', reportId=report_id, fileSuffix=file_suffix))
return file_path
@classmethod
def _clean_section_content(cls, content: str, section_title: str) -> str:
"""
Clean section content.
1. Remove any Markdown heading at the start of the content that duplicates the section title.
2. Convert all headings at level ### and below to bold text.
Args:
content: Raw content
section_title: Section title
Returns:
Cleaned content
"""
import re
if not content:
return content
content = content.strip()
lines = content.split('\n')
cleaned_lines = []
skip_next_empty = False
for i, line in enumerate(lines):
stripped = line.strip()
# Check whether this is a Markdown heading line
heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
if heading_match:
level = len(heading_match.group(1))
title_text = heading_match.group(2).strip()
# Check for a duplicate of the section title (within the first 5 lines)
if i < 5:
if title_text == section_title or title_text.replace(' ', '') == section_title.replace(' ', ''):
skip_next_empty = True
continue
# Convert all heading levels (#, ##, ###, #### etc.) to bold text,
# since section headings are added by the system and content should contain none.
cleaned_lines.append(f"**{title_text}**")
cleaned_lines.append("") # add blank line
continue
# If the previous line was a skipped heading and this line is blank, skip it too
if skip_next_empty and stripped == '':
skip_next_empty = False
continue
skip_next_empty = False
cleaned_lines.append(line)
# Remove leading blank lines
while cleaned_lines and cleaned_lines[0].strip() == '':
cleaned_lines.pop(0)
# Remove leading horizontal rules
while cleaned_lines and cleaned_lines[0].strip() in ['---', '***', '___']:
cleaned_lines.pop(0)
# Also remove any blank lines immediately after the horizontal rule
while cleaned_lines and cleaned_lines[0].strip() == '':
cleaned_lines.pop(0)
return '\n'.join(cleaned_lines)
@classmethod
def update_progress(
cls,
report_id: str,
status: str,
progress: int,
message: str,
current_section: str = None,
completed_sections: List[str] = None
) -> None:
"""
Update the report generation progress.
The frontend can read progress.json to obtain real-time progress.
"""
cls._ensure_report_folder(report_id)
progress_data = {
"status": status,
"progress": progress,
"message": message,
"current_section": current_section,
"completed_sections": completed_sections or [],
"updated_at": datetime.now().isoformat()
}
with open(cls._get_progress_path(report_id), 'w', encoding='utf-8') as f:
json.dump(progress_data, f, ensure_ascii=False, indent=2)
@classmethod
def get_progress(cls, report_id: str) -> Optional[Dict[str, Any]]:
"""Get the report generation progress"""
path = cls._get_progress_path(report_id)
if not os.path.exists(path):
return None
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
@classmethod
def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]:
"""
Get the list of already-generated sections.
Returns information about all saved section files.
"""
folder = cls._get_report_folder(report_id)
if not os.path.exists(folder):
return []
sections = []
for filename in sorted(os.listdir(folder)):
if filename.startswith('section_') and filename.endswith('.md'):
file_path = os.path.join(folder, filename)
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Parse the section index from the filename
parts = filename.replace('.md', '').split('_')
section_index = int(parts[1])
sections.append({
"filename": filename,
"section_index": section_index,
"content": content
})
return sections
@classmethod
def assemble_full_report(cls, report_id: str, outline: ReportOutline) -> str:
"""
Assemble the complete report.
Assembles the full report from the saved section files and performs heading clean-up.
"""
folder = cls._get_report_folder(report_id)
# Build the report header
md_content = f"# {outline.title}\n\n"
md_content += f"> {outline.summary}\n\n"
md_content += f"---\n\n"
# Read all section files in order
sections = cls.get_generated_sections(report_id)
for section_info in sections:
md_content += section_info["content"]
# Post-processing: clean up heading issues across the full report
md_content = cls._post_process_report(md_content, outline)
# Save the complete report
full_path = cls._get_report_markdown_path(report_id)
with open(full_path, 'w', encoding='utf-8') as f:
f.write(md_content)
logger.info(t('report.fullReportAssembled', reportId=report_id))
return md_content
@classmethod
def _post_process_report(cls, content: str, outline: ReportOutline) -> str:
"""
Post-process report content.
1. Remove duplicate headings.
2. Retain the report main title (#) and section titles (##); remove other heading
levels (###, #### etc.).
3. Clean up excessive blank lines and horizontal rules.
Args:
content: Raw report content
outline: Report outline
Returns:
Processed content
"""
import re
lines = content.split('\n')
processed_lines = []
prev_was_heading = False
# Collect all section titles from the outline
section_titles = set()
for section in outline.sections:
section_titles.add(section.title)
i = 0
while i < len(lines):
line = lines[i]
stripped = line.strip()
# Check whether this is a heading line
heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
if heading_match:
level = len(heading_match.group(1))
title = heading_match.group(2).strip()
# Check for a duplicate heading (same text appearing within the previous 5 lines)
is_duplicate = False
for j in range(max(0, len(processed_lines) - 5), len(processed_lines)):
prev_line = processed_lines[j].strip()
prev_match = re.match(r'^(#{1,6})\s+(.+)$', prev_line)
if prev_match:
prev_title = prev_match.group(2).strip()
if prev_title == title:
is_duplicate = True
break
if is_duplicate:
# Skip the duplicate heading and any blank lines that follow it
i += 1
while i < len(lines) and lines[i].strip() == '':
i += 1
continue
# Heading level handling:
# - # (level=1) — keep only the report main title
# - ## (level=2) — keep section titles
# - ### and below (level>=3) — convert to bold text
if level == 1:
if title == outline.title:
# Keep the report main title
processed_lines.append(line)
prev_was_heading = True
elif title in section_titles:
# Section title incorrectly used #; correct to ##
processed_lines.append(f"## {title}")
prev_was_heading = True
else:
# Other level-1 headings become bold text
processed_lines.append(f"**{title}**")
processed_lines.append("")
prev_was_heading = False
elif level == 2:
if title in section_titles or title == outline.title:
# Keep section titles
processed_lines.append(line)
prev_was_heading = True
else:
# Non-section level-2 headings become bold text
processed_lines.append(f"**{title}**")
processed_lines.append("")
prev_was_heading = False
else:
# Headings at level ### and below are converted to bold text
processed_lines.append(f"**{title}**")
processed_lines.append("")
prev_was_heading = False
i += 1
continue
elif stripped == '---' and prev_was_heading:
# Skip horizontal rules that immediately follow a heading
i += 1
continue
elif stripped == '' and prev_was_heading:
# After a heading, keep only one blank line
if processed_lines and processed_lines[-1].strip() != '':
processed_lines.append(line)
prev_was_heading = False
else:
processed_lines.append(line)
prev_was_heading = False
i += 1
# Clean up consecutive blank lines (retain at most 2)
result_lines = []
empty_count = 0
for line in processed_lines:
if line.strip() == '':
empty_count += 1
if empty_count <= 2:
result_lines.append(line)
else:
empty_count = 0
result_lines.append(line)
return '\n'.join(result_lines)
@classmethod
def save_report(cls, report: Report) -> None:
"""Save report metadata and the complete report"""
cls._ensure_report_folder(report.report_id)
# Save metadata JSON
with open(cls._get_report_path(report.report_id), 'w', encoding='utf-8') as f:
json.dump(report.to_dict(), f, ensure_ascii=False, indent=2)
# Save the outline
if report.outline:
cls.save_outline(report.report_id, report.outline)
# Save the complete Markdown report
if report.markdown_content:
with open(cls._get_report_markdown_path(report.report_id), 'w', encoding='utf-8') as f:
f.write(report.markdown_content)
logger.info(t('report.reportSaved', reportId=report.report_id))
@classmethod
def get_report(cls, report_id: str) -> Optional[Report]:
"""Get a report"""
path = cls._get_report_path(report_id)
if not os.path.exists(path):
# Backward compatibility: check for files stored directly in the reports directory
old_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
if os.path.exists(old_path):
path = old_path
else:
return None
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Reconstruct the Report object
outline = None
if data.get('outline'):
outline_data = data['outline']
sections = []
for s in outline_data.get('sections', []):
sections.append(ReportSection(
title=s['title'],
content=s.get('content', '')
))
outline = ReportOutline(
title=outline_data['title'],
summary=outline_data['summary'],
sections=sections
)
# If markdown_content is empty, try reading from full_report.md
markdown_content = data.get('markdown_content', '')
if not markdown_content:
full_report_path = cls._get_report_markdown_path(report_id)
if os.path.exists(full_report_path):
with open(full_report_path, 'r', encoding='utf-8') as f:
markdown_content = f.read()
return Report(
report_id=data['report_id'],
simulation_id=data['simulation_id'],
graph_id=data['graph_id'],
simulation_requirement=data['simulation_requirement'],
status=ReportStatus(data['status']),
outline=outline,
markdown_content=markdown_content,
created_at=data.get('created_at', ''),
completed_at=data.get('completed_at', ''),
error=data.get('error')
)
@classmethod
def get_report_by_simulation(cls, simulation_id: str) -> Optional[Report]:
"""Get a report by simulation ID"""
cls._ensure_reports_dir()
for item in os.listdir(cls.REPORTS_DIR):
item_path = os.path.join(cls.REPORTS_DIR, item)
# New format: folder
if os.path.isdir(item_path):
report = cls.get_report(item)
if report and report.simulation_id == simulation_id:
return report
# Backward compatibility: JSON file
elif item.endswith('.json'):
report_id = item[:-5]
report = cls.get_report(report_id)
if report and report.simulation_id == simulation_id:
return report
return None
@classmethod
def list_reports(cls, simulation_id: Optional[str] = None, limit: int = 50) -> List[Report]:
"""List reports"""
cls._ensure_reports_dir()
reports = []
for item in os.listdir(cls.REPORTS_DIR):
item_path = os.path.join(cls.REPORTS_DIR, item)
# New format: folder
if os.path.isdir(item_path):
report = cls.get_report(item)
if report:
if simulation_id is None or report.simulation_id == simulation_id:
reports.append(report)
# Backward compatibility: JSON file
elif item.endswith('.json'):
report_id = item[:-5]
report = cls.get_report(report_id)
if report:
if simulation_id is None or report.simulation_id == simulation_id:
reports.append(report)
# Sort by creation time, descending
reports.sort(key=lambda r: r.created_at, reverse=True)
return reports[:limit]
@classmethod
def delete_report(cls, report_id: str) -> bool:
"""Delete a report (the entire folder)"""
import shutil
folder_path = cls._get_report_folder(report_id)
# New format: delete the entire folder
if os.path.exists(folder_path) and os.path.isdir(folder_path):
shutil.rmtree(folder_path)
logger.info(t('report.reportFolderDeleted', reportId=report_id))
return True
# Backward compatibility: delete individual files
deleted = False
old_json_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
old_md_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.md")
if os.path.exists(old_json_path):
os.remove(old_json_path)
deleted = True
if os.path.exists(old_md_path):
os.remove(old_md_path)
deleted = True
return deleted