2584 lines
101 KiB
Python
2584 lines
101 KiB
Python
"""
|
||
Report Agent Service
|
||
Implements ReACT-pattern simulation report generation using Zep.
|
||
|
||
Features:
|
||
1. Generates reports based on simulation requirements and Zep graph data
|
||
2. First plans the table of contents, then generates content section by section
|
||
3. Each section uses multi-round ReACT thinking and reflection
|
||
4. Supports user conversation in which the agent autonomously calls retrieval tools
|
||
"""
|
||
|
||
import os
|
||
import json
|
||
import time
|
||
import re
|
||
from typing import Dict, Any, List, Optional, Callable
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime
|
||
from enum import Enum
|
||
|
||
from ..config import Config
|
||
from ..utils.llm_client import LLMClient
|
||
from ..utils.logger import get_logger
|
||
from ..utils.locale import get_language_instruction, t
|
||
from .zep_tools import (
|
||
ZepToolsService,
|
||
SearchResult,
|
||
InsightForgeResult,
|
||
PanoramaResult,
|
||
InterviewResult
|
||
)
|
||
|
||
logger = get_logger('mirofish.report_agent')
|
||
|
||
|
||
class ReportLogger:
|
||
"""
|
||
Detailed log recorder for the Report Agent.
|
||
|
||
Generates an agent_log.jsonl file inside the report folder, recording each
|
||
step in detail. Every line is a complete JSON object containing a timestamp,
|
||
action type, and detailed content.
|
||
"""
|
||
|
||
def __init__(self, report_id: str):
|
||
"""
|
||
Initialise the log recorder.
|
||
|
||
Args:
|
||
report_id: Report ID used to determine the log file path
|
||
"""
|
||
self.report_id = report_id
|
||
self.log_file_path = os.path.join(
|
||
Config.UPLOAD_FOLDER, 'reports', report_id, 'agent_log.jsonl'
|
||
)
|
||
self.start_time = datetime.now()
|
||
self._ensure_log_file()
|
||
|
||
def _ensure_log_file(self):
|
||
"""Ensure the directory containing the log file exists"""
|
||
log_dir = os.path.dirname(self.log_file_path)
|
||
os.makedirs(log_dir, exist_ok=True)
|
||
|
||
def _get_elapsed_time(self) -> float:
|
||
"""Return elapsed time in seconds since the logger was created"""
|
||
return (datetime.now() - self.start_time).total_seconds()
|
||
|
||
def log(
|
||
self,
|
||
action: str,
|
||
stage: str,
|
||
details: Dict[str, Any],
|
||
section_title: str = None,
|
||
section_index: int = None
|
||
):
|
||
"""
|
||
Record a single log entry.
|
||
|
||
Args:
|
||
action: Action type, e.g. 'start', 'tool_call', 'llm_response', 'section_complete'
|
||
stage: Current stage, e.g. 'planning', 'generating', 'completed'
|
||
details: Dictionary of detailed content (not truncated)
|
||
section_title: Current section title (optional)
|
||
section_index: Current section index (optional)
|
||
"""
|
||
log_entry = {
|
||
"timestamp": datetime.now().isoformat(),
|
||
"elapsed_seconds": round(self._get_elapsed_time(), 2),
|
||
"report_id": self.report_id,
|
||
"action": action,
|
||
"stage": stage,
|
||
"section_title": section_title,
|
||
"section_index": section_index,
|
||
"details": details
|
||
}
|
||
|
||
# Append to the JSONL file
|
||
with open(self.log_file_path, 'a', encoding='utf-8') as f:
|
||
f.write(json.dumps(log_entry, ensure_ascii=False) + '\n')
|
||
|
||
def log_start(self, simulation_id: str, graph_id: str, simulation_requirement: str):
|
||
"""Record the start of report generation"""
|
||
self.log(
|
||
action="report_start",
|
||
stage="pending",
|
||
details={
|
||
"simulation_id": simulation_id,
|
||
"graph_id": graph_id,
|
||
"simulation_requirement": simulation_requirement,
|
||
"message": t('report.taskStarted')
|
||
}
|
||
)
|
||
|
||
def log_planning_start(self):
|
||
"""Record the start of outline planning"""
|
||
self.log(
|
||
action="planning_start",
|
||
stage="planning",
|
||
details={"message": t('report.planningStart')}
|
||
)
|
||
|
||
def log_planning_context(self, context: Dict[str, Any]):
|
||
"""Record context information obtained during planning"""
|
||
self.log(
|
||
action="planning_context",
|
||
stage="planning",
|
||
details={
|
||
"message": t('report.fetchSimContext'),
|
||
"context": context
|
||
}
|
||
)
|
||
|
||
def log_planning_complete(self, outline_dict: Dict[str, Any]):
|
||
"""Record the completion of outline planning"""
|
||
self.log(
|
||
action="planning_complete",
|
||
stage="planning",
|
||
details={
|
||
"message": t('report.planningComplete'),
|
||
"outline": outline_dict
|
||
}
|
||
)
|
||
|
||
def log_section_start(self, section_title: str, section_index: int):
|
||
"""Record the start of section generation"""
|
||
self.log(
|
||
action="section_start",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={"message": t('report.sectionStart', title=section_title)}
|
||
)
|
||
|
||
def log_react_thought(self, section_title: str, section_index: int, iteration: int, thought: str):
|
||
"""Record a ReACT thinking step"""
|
||
self.log(
|
||
action="react_thought",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"iteration": iteration,
|
||
"thought": thought,
|
||
"message": t('report.reactThought', iteration=iteration)
|
||
}
|
||
)
|
||
|
||
def log_tool_call(
|
||
self,
|
||
section_title: str,
|
||
section_index: int,
|
||
tool_name: str,
|
||
parameters: Dict[str, Any],
|
||
iteration: int
|
||
):
|
||
"""Record a tool call"""
|
||
self.log(
|
||
action="tool_call",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"iteration": iteration,
|
||
"tool_name": tool_name,
|
||
"parameters": parameters,
|
||
"message": t('report.toolCall', toolName=tool_name)
|
||
}
|
||
)
|
||
|
||
def log_tool_result(
|
||
self,
|
||
section_title: str,
|
||
section_index: int,
|
||
tool_name: str,
|
||
result: str,
|
||
iteration: int
|
||
):
|
||
"""Record a tool call result (full content, no truncation)"""
|
||
self.log(
|
||
action="tool_result",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"iteration": iteration,
|
||
"tool_name": tool_name,
|
||
"result": result, # full result, not truncated
|
||
"result_length": len(result),
|
||
"message": t('report.toolResult', toolName=tool_name)
|
||
}
|
||
)
|
||
|
||
def log_llm_response(
|
||
self,
|
||
section_title: str,
|
||
section_index: int,
|
||
response: str,
|
||
iteration: int,
|
||
has_tool_calls: bool,
|
||
has_final_answer: bool
|
||
):
|
||
"""Record an LLM response (full content, no truncation)"""
|
||
self.log(
|
||
action="llm_response",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"iteration": iteration,
|
||
"response": response, # full response, not truncated
|
||
"response_length": len(response),
|
||
"has_tool_calls": has_tool_calls,
|
||
"has_final_answer": has_final_answer,
|
||
"message": t('report.llmResponse', hasToolCalls=has_tool_calls, hasFinalAnswer=has_final_answer)
|
||
}
|
||
)
|
||
|
||
def log_section_content(
|
||
self,
|
||
section_title: str,
|
||
section_index: int,
|
||
content: str,
|
||
tool_calls_count: int
|
||
):
|
||
"""Record section content generation completion (records content only; does not indicate the whole section is done)"""
|
||
self.log(
|
||
action="section_content",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"content": content, # full content, not truncated
|
||
"content_length": len(content),
|
||
"tool_calls_count": tool_calls_count,
|
||
"message": t('report.sectionContentDone', title=section_title)
|
||
}
|
||
)
|
||
|
||
def log_section_full_complete(
|
||
self,
|
||
section_title: str,
|
||
section_index: int,
|
||
full_content: str
|
||
):
|
||
"""
|
||
Record section generation completion.
|
||
|
||
The frontend should listen for this log entry to determine whether a section
|
||
is truly complete and to retrieve the full content.
|
||
"""
|
||
self.log(
|
||
action="section_complete",
|
||
stage="generating",
|
||
section_title=section_title,
|
||
section_index=section_index,
|
||
details={
|
||
"content": full_content,
|
||
"content_length": len(full_content),
|
||
"message": t('report.sectionComplete', title=section_title)
|
||
}
|
||
)
|
||
|
||
def log_report_complete(self, total_sections: int, total_time_seconds: float):
|
||
"""Record the completion of report generation"""
|
||
self.log(
|
||
action="report_complete",
|
||
stage="completed",
|
||
details={
|
||
"total_sections": total_sections,
|
||
"total_time_seconds": round(total_time_seconds, 2),
|
||
"message": t('report.reportComplete')
|
||
}
|
||
)
|
||
|
||
def log_error(self, error_message: str, stage: str, section_title: str = None):
|
||
"""Record an error"""
|
||
self.log(
|
||
action="error",
|
||
stage=stage,
|
||
section_title=section_title,
|
||
section_index=None,
|
||
details={
|
||
"error": error_message,
|
||
"message": t('report.errorOccurred', error=error_message)
|
||
}
|
||
)
|
||
|
||
|
||
class ReportConsoleLogger:
|
||
"""
|
||
Console log recorder for the Report Agent.
|
||
|
||
Writes console-style log messages (INFO, WARNING, etc.) to a console_log.txt
|
||
file inside the report folder. Unlike agent_log.jsonl, this file uses a
|
||
plain-text format.
|
||
"""
|
||
|
||
def __init__(self, report_id: str):
|
||
"""
|
||
Initialise the console log recorder.
|
||
|
||
Args:
|
||
report_id: Report ID used to determine the log file path
|
||
"""
|
||
self.report_id = report_id
|
||
self.log_file_path = os.path.join(
|
||
Config.UPLOAD_FOLDER, 'reports', report_id, 'console_log.txt'
|
||
)
|
||
self._ensure_log_file()
|
||
self._file_handler = None
|
||
self._setup_file_handler()
|
||
|
||
def _ensure_log_file(self):
|
||
"""Ensure the directory containing the log file exists"""
|
||
log_dir = os.path.dirname(self.log_file_path)
|
||
os.makedirs(log_dir, exist_ok=True)
|
||
|
||
def _setup_file_handler(self):
|
||
"""Set up a file handler to write log messages to disk"""
|
||
import logging
|
||
|
||
# Create the file handler
|
||
self._file_handler = logging.FileHandler(
|
||
self.log_file_path,
|
||
mode='a',
|
||
encoding='utf-8'
|
||
)
|
||
self._file_handler.setLevel(logging.INFO)
|
||
|
||
# Use the same concise format as the console
|
||
formatter = logging.Formatter(
|
||
'[%(asctime)s] %(levelname)s: %(message)s',
|
||
datefmt='%H:%M:%S'
|
||
)
|
||
self._file_handler.setFormatter(formatter)
|
||
|
||
# Attach to the report_agent-related loggers
|
||
loggers_to_attach = [
|
||
'mirofish.report_agent',
|
||
'mirofish.zep_tools',
|
||
]
|
||
|
||
for logger_name in loggers_to_attach:
|
||
target_logger = logging.getLogger(logger_name)
|
||
# Avoid attaching the same handler twice
|
||
if self._file_handler not in target_logger.handlers:
|
||
target_logger.addHandler(self._file_handler)
|
||
|
||
def close(self):
|
||
"""Close the file handler and detach it from loggers"""
|
||
import logging
|
||
|
||
if self._file_handler:
|
||
loggers_to_detach = [
|
||
'mirofish.report_agent',
|
||
'mirofish.zep_tools',
|
||
]
|
||
|
||
for logger_name in loggers_to_detach:
|
||
target_logger = logging.getLogger(logger_name)
|
||
if self._file_handler in target_logger.handlers:
|
||
target_logger.removeHandler(self._file_handler)
|
||
|
||
self._file_handler.close()
|
||
self._file_handler = None
|
||
|
||
def __del__(self):
|
||
"""Ensure the file handler is closed on destruction"""
|
||
self.close()
|
||
|
||
|
||
class ReportStatus(str, Enum):
|
||
"""Report status"""
|
||
PENDING = "pending"
|
||
PLANNING = "planning"
|
||
GENERATING = "generating"
|
||
COMPLETED = "completed"
|
||
FAILED = "failed"
|
||
|
||
|
||
@dataclass
|
||
class ReportSection:
|
||
"""Report section"""
|
||
title: str
|
||
content: str = ""
|
||
|
||
def to_dict(self) -> Dict[str, Any]:
|
||
return {
|
||
"title": self.title,
|
||
"content": self.content
|
||
}
|
||
|
||
def to_markdown(self, level: int = 2) -> str:
|
||
"""Convert to Markdown format"""
|
||
md = f"{'#' * level} {self.title}\n\n"
|
||
if self.content:
|
||
md += f"{self.content}\n\n"
|
||
return md
|
||
|
||
|
||
@dataclass
|
||
class ReportOutline:
|
||
"""Report outline"""
|
||
title: str
|
||
summary: str
|
||
sections: List[ReportSection]
|
||
|
||
def to_dict(self) -> Dict[str, Any]:
|
||
return {
|
||
"title": self.title,
|
||
"summary": self.summary,
|
||
"sections": [s.to_dict() for s in self.sections]
|
||
}
|
||
|
||
def to_markdown(self) -> str:
|
||
"""Convert to Markdown format"""
|
||
md = f"# {self.title}\n\n"
|
||
md += f"> {self.summary}\n\n"
|
||
for section in self.sections:
|
||
md += section.to_markdown()
|
||
return md
|
||
|
||
|
||
@dataclass
|
||
class Report:
|
||
"""Complete report"""
|
||
report_id: str
|
||
simulation_id: str
|
||
graph_id: str
|
||
simulation_requirement: str
|
||
status: ReportStatus
|
||
outline: Optional[ReportOutline] = None
|
||
markdown_content: str = ""
|
||
created_at: str = ""
|
||
completed_at: str = ""
|
||
error: Optional[str] = None
|
||
|
||
def to_dict(self) -> Dict[str, Any]:
|
||
return {
|
||
"report_id": self.report_id,
|
||
"simulation_id": self.simulation_id,
|
||
"graph_id": self.graph_id,
|
||
"simulation_requirement": self.simulation_requirement,
|
||
"status": self.status.value,
|
||
"outline": self.outline.to_dict() if self.outline else None,
|
||
"markdown_content": self.markdown_content,
|
||
"created_at": self.created_at,
|
||
"completed_at": self.completed_at,
|
||
"error": self.error
|
||
}
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════
|
||
# Prompt template constants
|
||
# ═══════════════════════════════════════════════════════════════
|
||
|
||
# ── Tool descriptions ──
|
||
|
||
TOOL_DESC_INSIGHT_FORGE = """\
|
||
[Deep Insight Retrieval - Powerful Retrieval Tool]
|
||
This is our powerful retrieval function, designed specifically for in-depth analysis. It will:
|
||
1. Automatically decompose your question into multiple sub-questions
|
||
2. Retrieve information from the simulation graph across multiple dimensions
|
||
3. Integrate results from semantic search, entity analysis, and relationship-chain tracing
|
||
4. Return the most comprehensive and in-depth retrieved content
|
||
|
||
[Use cases]
|
||
- Need to analyse a topic in depth
|
||
- Need to understand multiple aspects of an event
|
||
- Need rich material to support a report section
|
||
|
||
[Returns]
|
||
- Verbatim relevant facts (can be quoted directly)
|
||
- Core entity insights
|
||
- Relationship-chain analysis"""
|
||
|
||
TOOL_DESC_PANORAMA_SEARCH = """\
|
||
[Panorama Search - Get the Full Picture]
|
||
This tool is used to obtain a complete overview of the simulation results, and is
|
||
especially suited to understanding how events evolved. It will:
|
||
1. Retrieve all relevant nodes and relationships
|
||
2. Distinguish between currently valid facts and historical/expired facts
|
||
3. Help you understand how public opinion has evolved
|
||
|
||
[Use cases]
|
||
- Need to understand the complete development trajectory of an event
|
||
- Need to compare public-opinion changes across different stages
|
||
- Need comprehensive entity and relationship information
|
||
|
||
[Returns]
|
||
- Currently valid facts (latest simulation results)
|
||
- Historical/expired facts (evolution record)
|
||
- All entities involved"""
|
||
|
||
TOOL_DESC_QUICK_SEARCH = """\
|
||
[Quick Search - Fast Retrieval]
|
||
A lightweight, fast retrieval tool suited to simple, direct information queries.
|
||
|
||
[Use cases]
|
||
- Need to quickly look up a specific piece of information
|
||
- Need to verify a fact
|
||
- Simple information retrieval
|
||
|
||
[Returns]
|
||
- A list of facts most relevant to the query"""
|
||
|
||
TOOL_DESC_INTERVIEW_AGENTS = """\
|
||
[In-Depth Interview - Real Agent Interviews (Dual Platform)]
|
||
Calls the OASIS simulation environment's interview API to conduct real interviews with
|
||
currently running simulation agents!
|
||
This is not LLM simulation — it calls the real interview endpoint to obtain raw answers
|
||
from simulation agents.
|
||
Interviews are conducted simultaneously on both Twitter and Reddit by default, providing
|
||
more comprehensive perspectives.
|
||
|
||
Workflow:
|
||
1. Automatically reads persona files to learn about all simulation agents
|
||
2. Intelligently selects agents most relevant to the interview topic (e.g. students, media, officials)
|
||
3. Automatically generates interview questions
|
||
4. Calls the /api/simulation/interview/batch endpoint to conduct real interviews on both platforms
|
||
5. Integrates all interview results to provide multi-perspective analysis
|
||
|
||
[Use cases]
|
||
- Need to understand how different roles view an event (what do students think? media? officials?)
|
||
- Need to collect opinions and positions from multiple parties
|
||
- Need real answers from simulation agents (from the OASIS simulation environment)
|
||
- Want to make the report more vivid by including "interview transcripts"
|
||
|
||
[Returns]
|
||
- Identity information of interviewed agents
|
||
- Each agent's interview responses on Twitter and Reddit
|
||
- Key quotes (can be cited directly)
|
||
- Interview summary and comparison of viewpoints
|
||
|
||
[Important] The OASIS simulation environment must be running to use this feature!"""
|
||
|
||
# ── Outline planning prompt ──
|
||
|
||
PLAN_SYSTEM_PROMPT = """\
|
||
You are an expert writer of "Future Prediction Reports" with a "god's-eye view" of the simulated world — you can observe the behaviour, statements, and interactions of every Agent in the simulation.
|
||
|
||
[Core concept]
|
||
We have built a simulated world and injected a specific "simulation requirement" into it as a variable. The outcome of the simulated world's evolution is a prediction of what may happen in the future. What you are observing is not "experimental data" but a "rehearsal of the future".
|
||
|
||
[Your task]
|
||
Write a "Future Prediction Report" that answers:
|
||
1. Under the conditions we set, what happened in the future?
|
||
2. How did the various types of Agents (population groups) react and act?
|
||
3. What future trends and risks worth attention did this simulation reveal?
|
||
|
||
[Report positioning]
|
||
- ✅ This is a simulation-based future prediction report that reveals "if this happens, what will the future look like"
|
||
- ✅ Focus on predicted outcomes: how events unfold, group reactions, emergent phenomena, potential risks
|
||
- ✅ The words and actions of Agents in the simulated world are predictions of future human behaviour
|
||
- ❌ Not an analysis of the current state of the real world
|
||
- ❌ Not a generic public-opinion overview
|
||
|
||
[Section count limit]
|
||
- Minimum 2 sections, maximum 5 sections
|
||
- No sub-sections needed; write complete content directly within each section
|
||
- Content should be concise and focused on the core prediction findings
|
||
- You design the section structure yourself based on the prediction results
|
||
|
||
Output the report outline in JSON format as follows:
|
||
{
|
||
"title": "Report title",
|
||
"summary": "Report summary (one sentence summarising the core prediction findings)",
|
||
"sections": [
|
||
{
|
||
"title": "Section title",
|
||
"description": "Description of the section content"
|
||
}
|
||
]
|
||
}
|
||
|
||
Note: the sections array must have a minimum of 2 and a maximum of 5 elements!"""
|
||
|
||
PLAN_USER_PROMPT_TEMPLATE = """\
|
||
[Prediction Scenario Setup]
|
||
The variable we injected into the simulated world (simulation requirement): {simulation_requirement}
|
||
|
||
[Scale of the Simulated World]
|
||
- Number of entities in the simulation: {total_nodes}
|
||
- Number of relationships between entities: {total_edges}
|
||
- Distribution of entity types: {entity_types}
|
||
- Number of active agents: {total_entities}
|
||
|
||
[Sample of Future Facts Predicted by the Simulation]
|
||
{related_facts_json}
|
||
|
||
Please examine this rehearsal of the future from a "god's-eye view":
|
||
1. Under the conditions we set, what state did the future settle into?
|
||
2. How did the various population groups (Agents) react and act?
|
||
3. What future trends worth attention did this simulation reveal?
|
||
|
||
Based on the prediction results, design the most appropriate report section structure.
|
||
|
||
[Reminder] Report section count: minimum 2, maximum 5; content should be concise and focused on core prediction findings."""
|
||
|
||
# ── Section generation prompt ──
|
||
|
||
SECTION_SYSTEM_PROMPT_TEMPLATE = """\
|
||
You are an expert writer of "Future Prediction Reports", currently writing one section of a report.
|
||
|
||
Report title: {report_title}
|
||
Report summary: {report_summary}
|
||
Prediction scenario (simulation requirement): {simulation_requirement}
|
||
|
||
Section to write now: {section_title}
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Core concept]
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
The simulated world is a rehearsal of the future. We injected specific conditions
|
||
(the simulation requirement) into the simulated world; the behaviour and interactions
|
||
of the Agents in the simulation are predictions of future human behaviour.
|
||
|
||
Your task is to:
|
||
- Reveal what happened in the future under the conditions set
|
||
- Predict how the various population groups (Agents) reacted and acted
|
||
- Identify future trends, risks, and opportunities worth attention
|
||
|
||
❌ Do not write this as an analysis of the current state of the real world
|
||
✅ Focus on "what will happen in the future" — the simulation results ARE the predicted future
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Most important rules — must be followed]
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
1. [You must call tools to observe the simulated world]
|
||
- You are observing the rehearsal of the future from a "god's-eye view"
|
||
- All content must come from events that occurred and Agent statements in the simulated world
|
||
- You are prohibited from using your own knowledge to write the report content
|
||
- Call tools at least 3 times (and no more than 5 times) per section to observe the simulated world, which represents the future
|
||
|
||
2. [You must quote Agents' original statements and actions]
|
||
- Agent statements and behaviour are predictions of future human behaviour
|
||
- Present these predictions in the report using quotation format, for example:
|
||
> "A certain population group would say: verbatim content..."
|
||
- These quotations are the core evidence of the simulation's predictions
|
||
|
||
3. [Language consistency — quoted content must be translated to the report language]
|
||
- Tool return values may contain expressions in a language different from the report language
|
||
- The entire report must be written in the language specified by the user
|
||
- When quoting tool return values in other languages, you must translate them into the report language before writing them in
|
||
- Keep the original meaning intact; ensure the phrasing is natural and fluent
|
||
- This rule applies to both the main body text and blockquote (> format) content
|
||
|
||
4. [Faithfully present prediction results]
|
||
- Report content must reflect the simulation results that represent the future
|
||
- Do not add information that does not exist in the simulation
|
||
- If information on some aspect is insufficient, state that honestly
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[⚠️ Formatting rules — extremely important!]
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
[One section = the minimum content unit]
|
||
- Each section is the smallest division of the report
|
||
- ❌ No Markdown headings (#, ##, ###, #### etc.) are permitted anywhere inside a section
|
||
- ❌ Do not add the section's main heading at the top of the content
|
||
- ✅ The section heading is added automatically by the system; you only need to write the plain body content
|
||
- ✅ Use **bold**, paragraph breaks, block quotes, and lists to organise content — but no headings
|
||
|
||
[Correct example]
|
||
```
|
||
This section analyses the public-opinion dissemination dynamics of the event. Through in-depth analysis of the simulation data, we found...
|
||
|
||
**Initial Ignition Stage**
|
||
|
||
Platform X served as the primary venue, taking on the core function of first publishing the information:
|
||
|
||
> "Platform X contributed 68% of the initial volume..."
|
||
|
||
**Emotional Amplification Stage**
|
||
|
||
Video platforms further amplified the impact of the event:
|
||
|
||
- Strong visual impact
|
||
- High emotional resonance
|
||
```
|
||
|
||
[Incorrect example]
|
||
```
|
||
## Executive Summary ← Wrong! Do not add any headings
|
||
### I. Initial Stage ← Wrong! Do not use ### for sub-sections
|
||
#### 1.1 Detailed Analysis ← Wrong! Do not use #### for finer divisions
|
||
|
||
This section analyses...
|
||
```
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Available retrieval tools] (call 3–5 times per section)
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
{tools_description}
|
||
|
||
[Tool usage advice — mix different tools; do not use only one]
|
||
- insight_forge: Deep insight analysis; automatically decomposes questions and retrieves facts and relationships across multiple dimensions
|
||
- panorama_search: Wide-angle panorama search; understand the full picture of an event, its timeline, and how it evolved
|
||
- quick_search: Quickly verify a specific piece of information
|
||
- interview_agents: Interview simulation agents to obtain first-person perspectives and real reactions from different roles
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Workflow]
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
In each reply you may do only one of the following two things (not both simultaneously):
|
||
|
||
Option A — Call a tool:
|
||
Output your thoughts, then call one tool using the following format:
|
||
<tool_call>
|
||
{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}}
|
||
</tool_call>
|
||
The system will execute the tool and return the result to you. You must not and cannot write the tool's return result yourself.
|
||
|
||
Option B — Output the final content:
|
||
Once you have obtained sufficient information through tools, output the section content starting with "Final Answer:".
|
||
|
||
⚠️ Strictly prohibited:
|
||
- Including both a tool call and a Final Answer in a single reply
|
||
- Fabricating tool return results (Observations); all tool results are injected by the system
|
||
- Calling more than one tool per reply
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Section content requirements]
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
1. Content must be based on simulation data retrieved by tools
|
||
2. Quote original text extensively to demonstrate simulation results
|
||
3. Use Markdown formatting (but headings are prohibited):
|
||
- Use **bold text** to mark key points (instead of sub-headings)
|
||
- Use lists (- or 1. 2. 3.) to organise points
|
||
- Use blank lines to separate different paragraphs
|
||
- ❌ Any heading syntax (#, ##, ###, #### etc.) is prohibited
|
||
4. [Quotation format rules — must stand alone as a paragraph]
|
||
Quotations must be their own paragraph with one blank line before and after; they cannot be embedded in a paragraph:
|
||
|
||
✅ Correct format:
|
||
```
|
||
The institution's response was considered to lack substance.
|
||
|
||
> "The institution's response pattern appeared rigid and slow-moving in the fast-changing social media environment."
|
||
|
||
This evaluation reflects the widespread public dissatisfaction.
|
||
```
|
||
|
||
❌ Incorrect format:
|
||
```
|
||
The institution's response was considered to lack substance. > "The institution's response pattern..." This evaluation reflects...
|
||
```
|
||
5. Maintain logical coherence with other sections
|
||
6. [Avoid repetition] Carefully read the already-completed section content below; do not repeat the same information
|
||
7. [Emphasis again] Do not add any headings! Use **bold** instead of sub-section headings"""
|
||
|
||
SECTION_USER_PROMPT_TEMPLATE = """\
|
||
Already-completed section content (please read carefully to avoid repetition):
|
||
{previous_content}
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
[Current task] Write section: {section_title}
|
||
═══════════════════════════════════════════════════════════════
|
||
|
||
[Important reminders]
|
||
1. Read the already-completed sections above carefully and avoid repeating the same content!
|
||
2. You must call tools to retrieve simulation data before starting
|
||
3. Mix different tools; do not use only one
|
||
4. Report content must come from retrieval results; do not use your own knowledge
|
||
|
||
[⚠️ Formatting warning — must be followed]
|
||
- ❌ Do not write any headings (#, ##, ###, #### are all prohibited)
|
||
- ❌ Do not write "{section_title}" as the opening line
|
||
- ✅ The section heading is added automatically by the system
|
||
- ✅ Write the body text directly; use **bold** instead of sub-section headings
|
||
|
||
Please begin:
|
||
1. First, think (Thought) about what information this section needs
|
||
2. Then call a tool (Action) to retrieve simulation data
|
||
3. Once you have gathered enough information, output Final Answer (plain body text, no headings of any kind)"""
|
||
|
||
# ── ReACT loop message templates ──
|
||
|
||
REACT_OBSERVATION_TEMPLATE = """\
|
||
Observation (retrieval result):
|
||
|
||
═══ Tool {tool_name} returned ═══
|
||
{result}
|
||
|
||
═══════════════════════════════════════════════════════════════
|
||
Tools called: {tool_calls_count}/{max_tool_calls} (used: {used_tools_str}){unused_hint}
|
||
- If the information is sufficient: output the section content starting with "Final Answer:" (must quote the original text above)
|
||
- If more information is needed: call one tool to continue retrieval
|
||
═══════════════════════════════════════════════════════════════"""
|
||
|
||
REACT_INSUFFICIENT_TOOLS_MSG = (
|
||
"[Note] You have only called {tool_calls_count} tool(s), but at least {min_tool_calls} are required. "
|
||
"Please call more tools to retrieve simulation data before outputting Final Answer. {unused_hint}"
|
||
)
|
||
|
||
REACT_INSUFFICIENT_TOOLS_MSG_ALT = (
|
||
"You have only called {tool_calls_count} tool(s) so far; at least {min_tool_calls} are required. "
|
||
"Please call a tool to retrieve simulation data. {unused_hint}"
|
||
)
|
||
|
||
REACT_TOOL_LIMIT_MSG = (
|
||
"The tool call limit has been reached ({tool_calls_count}/{max_tool_calls}); no more tools can be called. "
|
||
'Please immediately output the section content starting with "Final Answer:" based on the information already retrieved.'
|
||
)
|
||
|
||
REACT_UNUSED_TOOLS_HINT = "\n💡 You have not yet used: {unused_list} — consider trying different tools to gather multi-perspective information"
|
||
|
||
REACT_FORCE_FINAL_MSG = "The tool call limit has been reached. Please output Final Answer: directly and generate the section content."
|
||
|
||
# ── Chat prompt ──
|
||
|
||
CHAT_SYSTEM_PROMPT_TEMPLATE = """\
|
||
You are a concise and efficient simulation prediction assistant.
|
||
|
||
[Background]
|
||
Prediction conditions: {simulation_requirement}
|
||
|
||
[Already-generated analysis report]
|
||
{report_content}
|
||
|
||
[Rules]
|
||
1. Prioritise answering questions based on the report content above
|
||
2. Answer questions directly; avoid lengthy reasoning
|
||
3. Only call tools to retrieve more data when the report content is insufficient to answer
|
||
4. Answers should be concise, clear, and well-organised
|
||
|
||
[Available tools] (use only when needed; call at most 1–2 times)
|
||
{tools_description}
|
||
|
||
[Tool call format]
|
||
<tool_call>
|
||
{{"name": "tool_name", "parameters": {{"param_name": "param_value"}}}}
|
||
</tool_call>
|
||
|
||
[Answer style]
|
||
- Concise and direct; avoid long-winded explanations
|
||
- Use the > format to quote key content
|
||
- Lead with the conclusion, then explain the reasoning"""
|
||
|
||
CHAT_OBSERVATION_SUFFIX = "\n\nPlease answer the question concisely."
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════
|
||
# ReportAgent main class
|
||
# ═══════════════════════════════════════════════════════════════
|
||
|
||
|
||
class ReportAgent:
|
||
"""
|
||
Report Agent — Simulation report generation agent.
|
||
|
||
Uses the ReACT (Reasoning + Acting) pattern:
|
||
1. Planning stage: analyse simulation requirements and plan the report structure
|
||
2. Generation stage: generate content section by section; each section may call tools
|
||
multiple times to retrieve information
|
||
3. Reflection stage: check content completeness and accuracy
|
||
"""
|
||
|
||
# Maximum tool calls per section
|
||
MAX_TOOL_CALLS_PER_SECTION = 5
|
||
|
||
# Maximum reflection rounds
|
||
MAX_REFLECTION_ROUNDS = 3
|
||
|
||
# Maximum tool calls per chat turn
|
||
MAX_TOOL_CALLS_PER_CHAT = 2
|
||
|
||
def __init__(
|
||
self,
|
||
graph_id: str,
|
||
simulation_id: str,
|
||
simulation_requirement: str,
|
||
llm_client: Optional[LLMClient] = None,
|
||
zep_tools: Optional[ZepToolsService] = None
|
||
):
|
||
"""
|
||
Initialise the Report Agent.
|
||
|
||
Args:
|
||
graph_id: Graph ID
|
||
simulation_id: Simulation ID
|
||
simulation_requirement: Simulation requirement description
|
||
llm_client: LLM client (optional)
|
||
zep_tools: Zep tools service (optional)
|
||
"""
|
||
self.graph_id = graph_id
|
||
self.simulation_id = simulation_id
|
||
self.simulation_requirement = simulation_requirement
|
||
|
||
self.llm = llm_client or LLMClient()
|
||
self.zep_tools = zep_tools or ZepToolsService()
|
||
|
||
# Tool definitions
|
||
self.tools = self._define_tools()
|
||
|
||
# Structured log recorder (initialised in generate_report)
|
||
self.report_logger: Optional[ReportLogger] = None
|
||
# Console log recorder (initialised in generate_report)
|
||
self.console_logger: Optional[ReportConsoleLogger] = None
|
||
|
||
logger.info(t('report.agentInitDone', graphId=graph_id, simulationId=simulation_id))
|
||
|
||
def _define_tools(self) -> Dict[str, Dict[str, Any]]:
|
||
"""Define available tools"""
|
||
return {
|
||
"insight_forge": {
|
||
"name": "insight_forge",
|
||
"description": TOOL_DESC_INSIGHT_FORGE,
|
||
"parameters": {
|
||
"query": "The question or topic you want to analyse in depth",
|
||
"report_context": "Context of the current report section (optional; helps generate more precise sub-questions)"
|
||
}
|
||
},
|
||
"panorama_search": {
|
||
"name": "panorama_search",
|
||
"description": TOOL_DESC_PANORAMA_SEARCH,
|
||
"parameters": {
|
||
"query": "Search query for relevance ranking",
|
||
"include_expired": "Whether to include expired/historical content (default True)"
|
||
}
|
||
},
|
||
"quick_search": {
|
||
"name": "quick_search",
|
||
"description": TOOL_DESC_QUICK_SEARCH,
|
||
"parameters": {
|
||
"query": "Search query string",
|
||
"limit": "Number of results to return (optional, default 10)"
|
||
}
|
||
},
|
||
"interview_agents": {
|
||
"name": "interview_agents",
|
||
"description": TOOL_DESC_INTERVIEW_AGENTS,
|
||
"parameters": {
|
||
"interview_topic": "Interview topic or requirement description",
|
||
"max_agents": "Maximum number of agents to interview (optional, default 5, max 10)"
|
||
}
|
||
}
|
||
}
|
||
|
||
def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_context: str = "") -> str:
|
||
"""
|
||
Execute a tool call.
|
||
|
||
Args:
|
||
tool_name: Tool name
|
||
parameters: Tool parameters
|
||
report_context: Report context (used by InsightForge)
|
||
|
||
Returns:
|
||
Tool execution result (text format)
|
||
"""
|
||
logger.info(t('report.executingTool', toolName=tool_name, params=parameters))
|
||
|
||
try:
|
||
if tool_name == "insight_forge":
|
||
query = parameters.get("query", "")
|
||
ctx = parameters.get("report_context", "") or report_context
|
||
result = self.zep_tools.insight_forge(
|
||
graph_id=self.graph_id,
|
||
query=query,
|
||
simulation_requirement=self.simulation_requirement,
|
||
report_context=ctx
|
||
)
|
||
return result.to_text()
|
||
|
||
elif tool_name == "panorama_search":
|
||
# Panorama search - get the full picture
|
||
query = parameters.get("query", "")
|
||
include_expired = parameters.get("include_expired", True)
|
||
if isinstance(include_expired, str):
|
||
include_expired = include_expired.lower() in ['true', '1', 'yes']
|
||
result = self.zep_tools.panorama_search(
|
||
graph_id=self.graph_id,
|
||
query=query,
|
||
include_expired=include_expired
|
||
)
|
||
return result.to_text()
|
||
|
||
elif tool_name == "quick_search":
|
||
# Quick search - fast retrieval
|
||
query = parameters.get("query", "")
|
||
limit = parameters.get("limit", 10)
|
||
if isinstance(limit, str):
|
||
limit = int(limit)
|
||
result = self.zep_tools.quick_search(
|
||
graph_id=self.graph_id,
|
||
query=query,
|
||
limit=limit
|
||
)
|
||
return result.to_text()
|
||
|
||
elif tool_name == "interview_agents":
|
||
# In-depth interview - calls the real OASIS interview API to get simulation agent responses (dual platform)
|
||
interview_topic = parameters.get("interview_topic", parameters.get("query", ""))
|
||
max_agents = parameters.get("max_agents", 5)
|
||
if isinstance(max_agents, str):
|
||
max_agents = int(max_agents)
|
||
max_agents = min(max_agents, 10)
|
||
result = self.zep_tools.interview_agents(
|
||
simulation_id=self.simulation_id,
|
||
interview_requirement=interview_topic,
|
||
simulation_requirement=self.simulation_requirement,
|
||
max_agents=max_agents
|
||
)
|
||
return result.to_text()
|
||
|
||
# ========== Legacy tools for backward compatibility (internally redirect to new tools) ==========
|
||
|
||
elif tool_name == "search_graph":
|
||
# Redirect to quick_search
|
||
logger.info(t('report.redirectToQuickSearch'))
|
||
return self._execute_tool("quick_search", parameters, report_context)
|
||
|
||
elif tool_name == "get_graph_statistics":
|
||
result = self.zep_tools.get_graph_statistics(self.graph_id)
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
elif tool_name == "get_entity_summary":
|
||
entity_name = parameters.get("entity_name", "")
|
||
result = self.zep_tools.get_entity_summary(
|
||
graph_id=self.graph_id,
|
||
entity_name=entity_name
|
||
)
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
elif tool_name == "get_simulation_context":
|
||
# Redirect to insight_forge as it is more powerful
|
||
logger.info(t('report.redirectToInsightForge'))
|
||
query = parameters.get("query", self.simulation_requirement)
|
||
return self._execute_tool("insight_forge", {"query": query}, report_context)
|
||
|
||
elif tool_name == "get_entities_by_type":
|
||
entity_type = parameters.get("entity_type", "")
|
||
nodes = self.zep_tools.get_entities_by_type(
|
||
graph_id=self.graph_id,
|
||
entity_type=entity_type
|
||
)
|
||
result = [n.to_dict() for n in nodes]
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
else:
|
||
return f"Unknown tool: {tool_name}. Please use one of: insight_forge, panorama_search, quick_search"
|
||
|
||
except Exception as e:
|
||
logger.error(t('report.toolExecFailed', toolName=tool_name, error=str(e)))
|
||
return f"Tool execution failed: {str(e)}"
|
||
|
||
# Valid tool names; used when validating the bare-JSON fallback parse
|
||
VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
|
||
|
||
def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]:
|
||
"""
|
||
Parse tool calls from an LLM response.
|
||
|
||
Supported formats (in priority order):
|
||
1. <tool_call>{"name": "tool_name", "parameters": {...}}</tool_call>
|
||
2. Bare JSON (the entire response or a single line is a tool call JSON object)
|
||
"""
|
||
tool_calls = []
|
||
|
||
# Format 1: XML-style (standard format)
|
||
xml_pattern = r'<tool_call>\s*(\{.*?\})\s*</tool_call>'
|
||
for match in re.finditer(xml_pattern, response, re.DOTALL):
|
||
try:
|
||
call_data = json.loads(match.group(1))
|
||
tool_calls.append(call_data)
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
if tool_calls:
|
||
return tool_calls
|
||
|
||
# Format 2: fallback — LLM outputs bare JSON directly (without <tool_call> tags)
|
||
# Only attempted when Format 1 did not match, to avoid false positives in body text
|
||
stripped = response.strip()
|
||
if stripped.startswith('{') and stripped.endswith('}'):
|
||
try:
|
||
call_data = json.loads(stripped)
|
||
if self._is_valid_tool_call(call_data):
|
||
tool_calls.append(call_data)
|
||
return tool_calls
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
# Response may contain reasoning text + bare JSON; try to extract the last JSON object
|
||
json_pattern = r'(\{"(?:name|tool)"\s*:.*?\})\s*$'
|
||
match = re.search(json_pattern, stripped, re.DOTALL)
|
||
if match:
|
||
try:
|
||
call_data = json.loads(match.group(1))
|
||
if self._is_valid_tool_call(call_data):
|
||
tool_calls.append(call_data)
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
return tool_calls
|
||
|
||
def _is_valid_tool_call(self, data: dict) -> bool:
|
||
"""Validate whether the parsed JSON is a valid tool call"""
|
||
# Supports both {"name": ..., "parameters": ...} and {"tool": ..., "params": ...} key forms
|
||
tool_name = data.get("name") or data.get("tool")
|
||
if tool_name and tool_name in self.VALID_TOOL_NAMES:
|
||
# Normalise keys to name / parameters
|
||
if "tool" in data:
|
||
data["name"] = data.pop("tool")
|
||
if "params" in data and "parameters" not in data:
|
||
data["parameters"] = data.pop("params")
|
||
return True
|
||
return False
|
||
|
||
def _get_tools_description(self) -> str:
|
||
"""Generate tool description text"""
|
||
desc_parts = ["Available tools:"]
|
||
for name, tool in self.tools.items():
|
||
params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()])
|
||
desc_parts.append(f"- {name}: {tool['description']}")
|
||
if params_desc:
|
||
desc_parts.append(f" Parameters: {params_desc}")
|
||
return "\n".join(desc_parts)
|
||
|
||
def plan_outline(
|
||
self,
|
||
progress_callback: Optional[Callable] = None
|
||
) -> ReportOutline:
|
||
"""
|
||
Plan the report outline.
|
||
|
||
Uses an LLM to analyse the simulation requirements and plan the report structure.
|
||
|
||
Args:
|
||
progress_callback: Progress callback function
|
||
|
||
Returns:
|
||
ReportOutline: Report outline
|
||
"""
|
||
logger.info(t('report.startPlanningOutline'))
|
||
|
||
if progress_callback:
|
||
progress_callback("planning", 0, t('progress.analyzingRequirements'))
|
||
|
||
# Obtain the simulation context first
|
||
context = self.zep_tools.get_simulation_context(
|
||
graph_id=self.graph_id,
|
||
simulation_requirement=self.simulation_requirement
|
||
)
|
||
|
||
if progress_callback:
|
||
progress_callback("planning", 30, t('progress.generatingOutline'))
|
||
|
||
system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}"
|
||
user_prompt = PLAN_USER_PROMPT_TEMPLATE.format(
|
||
simulation_requirement=self.simulation_requirement,
|
||
total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0),
|
||
total_edges=context.get('graph_statistics', {}).get('total_edges', 0),
|
||
entity_types=list(context.get('graph_statistics', {}).get('entity_types', {}).keys()),
|
||
total_entities=context.get('total_entities', 0),
|
||
related_facts_json=json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2),
|
||
)
|
||
|
||
try:
|
||
response = self.llm.chat_json(
|
||
messages=[
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt}
|
||
],
|
||
temperature=0.3
|
||
)
|
||
|
||
if progress_callback:
|
||
progress_callback("planning", 80, t('progress.parsingOutline'))
|
||
|
||
# Parse the outline
|
||
sections = []
|
||
for section_data in response.get("sections", []):
|
||
sections.append(ReportSection(
|
||
title=section_data.get("title", ""),
|
||
content=""
|
||
))
|
||
|
||
outline = ReportOutline(
|
||
title=response.get("title", "Simulation Analysis Report"),
|
||
summary=response.get("summary", ""),
|
||
sections=sections
|
||
)
|
||
|
||
if progress_callback:
|
||
progress_callback("planning", 100, t('progress.outlinePlanComplete'))
|
||
|
||
logger.info(t('report.outlinePlanDone', count=len(sections)))
|
||
return outline
|
||
|
||
except Exception as e:
|
||
logger.error(t('report.outlinePlanFailed', error=str(e)))
|
||
# Return a default outline (3 sections, as fallback)
|
||
return ReportOutline(
|
||
title="Future Forecast Report",
|
||
summary="Future trends and risk analysis based on simulation forecasts",
|
||
sections=[
|
||
ReportSection(title="Predicted Scenario and Core Findings"),
|
||
ReportSection(title="Population Behaviour Prediction Analysis"),
|
||
ReportSection(title="Trend Outlook and Risk Warnings")
|
||
]
|
||
)
|
||
|
||
def _generate_section_react(
|
||
self,
|
||
section: ReportSection,
|
||
outline: ReportOutline,
|
||
previous_sections: List[str],
|
||
progress_callback: Optional[Callable] = None,
|
||
section_index: int = 0
|
||
) -> str:
|
||
"""
|
||
Generate a single section using the ReACT pattern.
|
||
|
||
ReACT loop:
|
||
1. Thought — analyse what information is needed
|
||
2. Action — call a tool to retrieve information
|
||
3. Observation — analyse the tool's return value
|
||
4. Repeat until enough information is gathered or the maximum count is reached
|
||
5. Final Answer — generate the section content
|
||
|
||
Args:
|
||
section: The section to generate
|
||
outline: The full outline
|
||
previous_sections: Content of previously generated sections (for coherence)
|
||
progress_callback: Progress callback
|
||
section_index: Section index (used for logging)
|
||
|
||
Returns:
|
||
Section content (Markdown format)
|
||
"""
|
||
logger.info(t('report.reactGenerateSection', title=section.title))
|
||
|
||
# Log section start
|
||
if self.report_logger:
|
||
self.report_logger.log_section_start(section.title, section_index)
|
||
|
||
system_prompt = SECTION_SYSTEM_PROMPT_TEMPLATE.format(
|
||
report_title=outline.title,
|
||
report_summary=outline.summary,
|
||
simulation_requirement=self.simulation_requirement,
|
||
section_title=section.title,
|
||
tools_description=self._get_tools_description(),
|
||
)
|
||
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
|
||
|
||
# Build the user prompt — each already-completed section is capped at 4000 characters
|
||
if previous_sections:
|
||
previous_parts = []
|
||
for sec in previous_sections:
|
||
# Each section is limited to 4000 characters
|
||
truncated = sec[:4000] + "..." if len(sec) > 4000 else sec
|
||
previous_parts.append(truncated)
|
||
previous_content = "\n\n---\n\n".join(previous_parts)
|
||
else:
|
||
previous_content = "(This is the first section)"
|
||
|
||
user_prompt = SECTION_USER_PROMPT_TEMPLATE.format(
|
||
previous_content=previous_content,
|
||
section_title=section.title,
|
||
)
|
||
|
||
messages = [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt}
|
||
]
|
||
|
||
# ReACT loop
|
||
tool_calls_count = 0
|
||
max_iterations = 5 # maximum iterations
|
||
min_tool_calls = 3 # minimum tool calls required
|
||
conflict_retries = 0 # consecutive conflict count (tool call + Final Answer in same reply)
|
||
used_tools = set() # tracks tool names that have been called
|
||
all_tools = {"insight_forge", "panorama_search", "quick_search", "interview_agents"}
|
||
|
||
# Report context used by InsightForge for sub-question generation
|
||
report_context = f"Section title: {section.title}\nSimulation requirement: {self.simulation_requirement}"
|
||
|
||
for iteration in range(max_iterations):
|
||
if progress_callback:
|
||
progress_callback(
|
||
"generating",
|
||
int((iteration / max_iterations) * 100),
|
||
t('progress.deepSearchAndWrite', current=tool_calls_count, max=self.MAX_TOOL_CALLS_PER_SECTION)
|
||
)
|
||
|
||
# Call the LLM
|
||
response = self.llm.chat(
|
||
messages=messages,
|
||
temperature=0.5,
|
||
max_tokens=4096
|
||
)
|
||
|
||
# Check whether the LLM returned None (API error or empty content)
|
||
if response is None:
|
||
logger.warning(t('report.sectionIterNone', title=section.title, iteration=iteration + 1))
|
||
# If iterations remain, append a message and retry
|
||
if iteration < max_iterations - 1:
|
||
messages.append({"role": "assistant", "content": "(empty response)"})
|
||
messages.append({"role": "user", "content": "Please continue generating content."})
|
||
continue
|
||
# Last iteration also returned None; break out of the loop and force a conclusion
|
||
break
|
||
|
||
logger.debug(f"LLM response: {response[:200]}...")
|
||
|
||
# Parse once and reuse the result
|
||
tool_calls = self._parse_tool_calls(response)
|
||
has_tool_calls = bool(tool_calls)
|
||
has_final_answer = "Final Answer:" in response
|
||
|
||
# ── Conflict handling: LLM output both a tool call and a Final Answer ──
|
||
if has_tool_calls and has_final_answer:
|
||
conflict_retries += 1
|
||
logger.warning(
|
||
t('report.sectionConflict', title=section.title, iteration=iteration+1, conflictCount=conflict_retries)
|
||
)
|
||
|
||
if conflict_retries <= 2:
|
||
# First two occurrences: discard this response and ask the LLM to reply again
|
||
messages.append({"role": "assistant", "content": response})
|
||
messages.append({
|
||
"role": "user",
|
||
"content": (
|
||
"[Format error] Your reply contained both a tool call and a Final Answer, which is not allowed.\n"
|
||
"Each reply may do only one of the following two things:\n"
|
||
"- Call a tool (output a <tool_call> block; do not write Final Answer)\n"
|
||
"- Output the final content (begin with 'Final Answer:'; do not include <tool_call>)\n"
|
||
"Please reply again and do only one of the two."
|
||
),
|
||
})
|
||
continue
|
||
else:
|
||
# Third occurrence: downgrade — truncate to the first tool call and force execution
|
||
logger.warning(
|
||
t('report.sectionConflictDowngrade', title=section.title, conflictCount=conflict_retries)
|
||
)
|
||
first_tool_end = response.find('</tool_call>')
|
||
if first_tool_end != -1:
|
||
response = response[:first_tool_end + len('</tool_call>')]
|
||
tool_calls = self._parse_tool_calls(response)
|
||
has_tool_calls = bool(tool_calls)
|
||
has_final_answer = False
|
||
conflict_retries = 0
|
||
|
||
# Log the LLM response
|
||
if self.report_logger:
|
||
self.report_logger.log_llm_response(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
response=response,
|
||
iteration=iteration + 1,
|
||
has_tool_calls=has_tool_calls,
|
||
has_final_answer=has_final_answer
|
||
)
|
||
|
||
# ── Case 1: LLM output a Final Answer ──
|
||
if has_final_answer:
|
||
# Insufficient tool calls — reject and require more tool usage
|
||
if tool_calls_count < min_tool_calls:
|
||
messages.append({"role": "assistant", "content": response})
|
||
unused_tools = all_tools - used_tools
|
||
unused_hint = f"(These tools have not been used yet, consider trying them: {', '.join(unused_tools)})" if unused_tools else ""
|
||
messages.append({
|
||
"role": "user",
|
||
"content": REACT_INSUFFICIENT_TOOLS_MSG.format(
|
||
tool_calls_count=tool_calls_count,
|
||
min_tool_calls=min_tool_calls,
|
||
unused_hint=unused_hint,
|
||
),
|
||
})
|
||
continue
|
||
|
||
# Normal completion
|
||
final_answer = response.split("Final Answer:")[-1].strip()
|
||
logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count))
|
||
|
||
if self.report_logger:
|
||
self.report_logger.log_section_content(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
content=final_answer,
|
||
tool_calls_count=tool_calls_count
|
||
)
|
||
return final_answer
|
||
|
||
# ── Case 2: LLM attempted a tool call ──
|
||
if has_tool_calls:
|
||
# Tool quota exhausted → notify clearly and require Final Answer output
|
||
if tool_calls_count >= self.MAX_TOOL_CALLS_PER_SECTION:
|
||
messages.append({"role": "assistant", "content": response})
|
||
messages.append({
|
||
"role": "user",
|
||
"content": REACT_TOOL_LIMIT_MSG.format(
|
||
tool_calls_count=tool_calls_count,
|
||
max_tool_calls=self.MAX_TOOL_CALLS_PER_SECTION,
|
||
),
|
||
})
|
||
continue
|
||
|
||
# Execute only the first tool call
|
||
call = tool_calls[0]
|
||
if len(tool_calls) > 1:
|
||
logger.info(t('report.multiToolOnlyFirst', total=len(tool_calls), toolName=call['name']))
|
||
|
||
if self.report_logger:
|
||
self.report_logger.log_tool_call(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
tool_name=call["name"],
|
||
parameters=call.get("parameters", {}),
|
||
iteration=iteration + 1
|
||
)
|
||
|
||
result = self._execute_tool(
|
||
call["name"],
|
||
call.get("parameters", {}),
|
||
report_context=report_context
|
||
)
|
||
|
||
if self.report_logger:
|
||
self.report_logger.log_tool_result(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
tool_name=call["name"],
|
||
result=result,
|
||
iteration=iteration + 1
|
||
)
|
||
|
||
tool_calls_count += 1
|
||
used_tools.add(call['name'])
|
||
|
||
# Build the unused-tools hint
|
||
unused_tools = all_tools - used_tools
|
||
unused_hint = ""
|
||
if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION:
|
||
unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list=", ".join(unused_tools))
|
||
|
||
messages.append({"role": "assistant", "content": response})
|
||
messages.append({
|
||
"role": "user",
|
||
"content": REACT_OBSERVATION_TEMPLATE.format(
|
||
tool_name=call["name"],
|
||
result=result,
|
||
tool_calls_count=tool_calls_count,
|
||
max_tool_calls=self.MAX_TOOL_CALLS_PER_SECTION,
|
||
used_tools_str=", ".join(used_tools),
|
||
unused_hint=unused_hint,
|
||
),
|
||
})
|
||
continue
|
||
|
||
# ── Case 3: Neither a tool call nor a Final Answer ──
|
||
messages.append({"role": "assistant", "content": response})
|
||
|
||
if tool_calls_count < min_tool_calls:
|
||
# Insufficient tool calls — recommend unused tools
|
||
unused_tools = all_tools - used_tools
|
||
unused_hint = f"(These tools have not been used yet, consider trying them: {', '.join(unused_tools)})" if unused_tools else ""
|
||
|
||
messages.append({
|
||
"role": "user",
|
||
"content": REACT_INSUFFICIENT_TOOLS_MSG_ALT.format(
|
||
tool_calls_count=tool_calls_count,
|
||
min_tool_calls=min_tool_calls,
|
||
unused_hint=unused_hint,
|
||
),
|
||
})
|
||
continue
|
||
|
||
# Enough tool calls have been made; the LLM output content without the "Final Answer:" prefix.
|
||
# Use this content as the final answer directly without further looping.
|
||
logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count))
|
||
final_answer = response.strip()
|
||
|
||
if self.report_logger:
|
||
self.report_logger.log_section_content(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
content=final_answer,
|
||
tool_calls_count=tool_calls_count
|
||
)
|
||
return final_answer
|
||
|
||
# Maximum iterations reached; force content generation
|
||
logger.warning(t('report.sectionMaxIter', title=section.title))
|
||
messages.append({"role": "user", "content": REACT_FORCE_FINAL_MSG})
|
||
|
||
response = self.llm.chat(
|
||
messages=messages,
|
||
temperature=0.5,
|
||
max_tokens=4096
|
||
)
|
||
|
||
# Check whether LLM returned None during forced conclusion
|
||
if response is None:
|
||
logger.error(t('report.sectionForceFailed', title=section.title))
|
||
final_answer = t('report.sectionGenFailedContent')
|
||
elif "Final Answer:" in response:
|
||
final_answer = response.split("Final Answer:")[-1].strip()
|
||
else:
|
||
final_answer = response
|
||
|
||
# Log section content generation completion
|
||
if self.report_logger:
|
||
self.report_logger.log_section_content(
|
||
section_title=section.title,
|
||
section_index=section_index,
|
||
content=final_answer,
|
||
tool_calls_count=tool_calls_count
|
||
)
|
||
|
||
return final_answer
|
||
|
||
def generate_report(
|
||
self,
|
||
progress_callback: Optional[Callable[[str, int, str], None]] = None,
|
||
report_id: Optional[str] = None
|
||
) -> Report:
|
||
"""
|
||
Generate the complete report (real-time section-by-section output).
|
||
|
||
Each section is saved to the folder immediately upon completion;
|
||
there is no need to wait for the entire report to finish.
|
||
File structure:
|
||
reports/{report_id}/
|
||
meta.json - Report metadata
|
||
outline.json - Report outline
|
||
progress.json - Generation progress
|
||
section_01.md - Section 1
|
||
section_02.md - Section 2
|
||
...
|
||
full_report.md - Complete report
|
||
|
||
Args:
|
||
progress_callback: Progress callback function (stage, progress, message)
|
||
report_id: Report ID (optional; auto-generated if not provided)
|
||
|
||
Returns:
|
||
Report: The complete report
|
||
"""
|
||
import uuid
|
||
|
||
# Auto-generate report_id if not provided
|
||
if not report_id:
|
||
report_id = f"report_{uuid.uuid4().hex[:12]}"
|
||
start_time = datetime.now()
|
||
|
||
report = Report(
|
||
report_id=report_id,
|
||
simulation_id=self.simulation_id,
|
||
graph_id=self.graph_id,
|
||
simulation_requirement=self.simulation_requirement,
|
||
status=ReportStatus.PENDING,
|
||
created_at=datetime.now().isoformat()
|
||
)
|
||
|
||
# List of completed section titles (for progress tracking)
|
||
completed_section_titles = []
|
||
|
||
try:
|
||
# Initialise: create the report folder and save the initial state
|
||
ReportManager._ensure_report_folder(report_id)
|
||
|
||
# Initialise the structured log recorder (agent_log.jsonl)
|
||
self.report_logger = ReportLogger(report_id)
|
||
self.report_logger.log_start(
|
||
simulation_id=self.simulation_id,
|
||
graph_id=self.graph_id,
|
||
simulation_requirement=self.simulation_requirement
|
||
)
|
||
|
||
# Initialise the console log recorder (console_log.txt)
|
||
self.console_logger = ReportConsoleLogger(report_id)
|
||
|
||
ReportManager.update_progress(
|
||
report_id, "pending", 0, t('progress.initReport'),
|
||
completed_sections=[]
|
||
)
|
||
ReportManager.save_report(report)
|
||
|
||
# Stage 1: Plan the outline
|
||
report.status = ReportStatus.PLANNING
|
||
ReportManager.update_progress(
|
||
report_id, "planning", 5, t('progress.startPlanningOutline'),
|
||
completed_sections=[]
|
||
)
|
||
|
||
# Log planning start
|
||
self.report_logger.log_planning_start()
|
||
|
||
if progress_callback:
|
||
progress_callback("planning", 0, t('progress.startPlanningOutline'))
|
||
|
||
outline = self.plan_outline(
|
||
progress_callback=lambda stage, prog, msg:
|
||
progress_callback(stage, prog // 5, msg) if progress_callback else None
|
||
)
|
||
report.outline = outline
|
||
|
||
# Log planning completion
|
||
self.report_logger.log_planning_complete(outline.to_dict())
|
||
|
||
# Save the outline to file
|
||
ReportManager.save_outline(report_id, outline)
|
||
ReportManager.update_progress(
|
||
report_id, "planning", 15, t('progress.outlineDone', count=len(outline.sections)),
|
||
completed_sections=[]
|
||
)
|
||
ReportManager.save_report(report)
|
||
|
||
logger.info(t('report.outlineSavedToFile', reportId=report_id))
|
||
|
||
# Stage 2: Generate section by section (save as each section is completed)
|
||
report.status = ReportStatus.GENERATING
|
||
|
||
total_sections = len(outline.sections)
|
||
generated_sections = [] # saved content used for context
|
||
|
||
for i, section in enumerate(outline.sections):
|
||
section_num = i + 1
|
||
base_progress = 20 + int((i / total_sections) * 70)
|
||
|
||
# Update progress
|
||
ReportManager.update_progress(
|
||
report_id, "generating", base_progress,
|
||
t('progress.generatingSection', title=section.title, current=section_num, total=total_sections),
|
||
current_section=section.title,
|
||
completed_sections=completed_section_titles
|
||
)
|
||
|
||
if progress_callback:
|
||
progress_callback(
|
||
"generating",
|
||
base_progress,
|
||
t('progress.generatingSection', title=section.title, current=section_num, total=total_sections)
|
||
)
|
||
|
||
# Generate the main section content
|
||
section_content = self._generate_section_react(
|
||
section=section,
|
||
outline=outline,
|
||
previous_sections=generated_sections,
|
||
progress_callback=lambda stage, prog, msg:
|
||
progress_callback(
|
||
stage,
|
||
base_progress + int(prog * 0.7 / total_sections),
|
||
msg
|
||
) if progress_callback else None,
|
||
section_index=section_num
|
||
)
|
||
|
||
section.content = section_content
|
||
generated_sections.append(f"## {section.title}\n\n{section_content}")
|
||
|
||
# Save the section
|
||
ReportManager.save_section(report_id, section_num, section)
|
||
completed_section_titles.append(section.title)
|
||
|
||
# Log section completion
|
||
full_section_content = f"## {section.title}\n\n{section_content}"
|
||
|
||
if self.report_logger:
|
||
self.report_logger.log_section_full_complete(
|
||
section_title=section.title,
|
||
section_index=section_num,
|
||
full_content=full_section_content.strip()
|
||
)
|
||
|
||
logger.info(t('report.sectionSaved', reportId=report_id, sectionNum=f"{section_num:02d}"))
|
||
|
||
# Update progress
|
||
ReportManager.update_progress(
|
||
report_id, "generating",
|
||
base_progress + int(70 / total_sections),
|
||
t('progress.sectionDone', title=section.title),
|
||
current_section=None,
|
||
completed_sections=completed_section_titles
|
||
)
|
||
|
||
# Stage 3: Assemble the complete report
|
||
if progress_callback:
|
||
progress_callback("generating", 95, t('progress.assemblingReport'))
|
||
|
||
ReportManager.update_progress(
|
||
report_id, "generating", 95, t('progress.assemblingReport'),
|
||
completed_sections=completed_section_titles
|
||
)
|
||
|
||
# Use ReportManager to assemble the complete report
|
||
report.markdown_content = ReportManager.assemble_full_report(report_id, outline)
|
||
report.status = ReportStatus.COMPLETED
|
||
report.completed_at = datetime.now().isoformat()
|
||
|
||
# Calculate total elapsed time
|
||
total_time_seconds = (datetime.now() - start_time).total_seconds()
|
||
|
||
# Log report completion
|
||
if self.report_logger:
|
||
self.report_logger.log_report_complete(
|
||
total_sections=total_sections,
|
||
total_time_seconds=total_time_seconds
|
||
)
|
||
|
||
# Save the final report
|
||
ReportManager.save_report(report)
|
||
ReportManager.update_progress(
|
||
report_id, "completed", 100, t('progress.reportComplete'),
|
||
completed_sections=completed_section_titles
|
||
)
|
||
|
||
if progress_callback:
|
||
progress_callback("completed", 100, t('progress.reportComplete'))
|
||
|
||
logger.info(t('report.reportGenDone', reportId=report_id))
|
||
|
||
# Close the console log recorder
|
||
if self.console_logger:
|
||
self.console_logger.close()
|
||
self.console_logger = None
|
||
|
||
return report
|
||
|
||
except Exception as e:
|
||
logger.error(t('report.reportGenFailed', error=str(e)))
|
||
report.status = ReportStatus.FAILED
|
||
report.error = str(e)
|
||
|
||
# Log the error
|
||
if self.report_logger:
|
||
self.report_logger.log_error(str(e), "failed")
|
||
|
||
# Save the failed state
|
||
try:
|
||
ReportManager.save_report(report)
|
||
ReportManager.update_progress(
|
||
report_id, "failed", -1, t('progress.reportFailed', error=str(e)),
|
||
completed_sections=completed_section_titles
|
||
)
|
||
except Exception:
|
||
pass # Ignore errors when saving the failed state
|
||
|
||
# Close the console log recorder
|
||
if self.console_logger:
|
||
self.console_logger.close()
|
||
self.console_logger = None
|
||
|
||
return report
|
||
|
||
def chat(
|
||
self,
|
||
message: str,
|
||
chat_history: List[Dict[str, str]] = None
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
Chat with the Report Agent.
|
||
|
||
During the conversation the Agent may autonomously call retrieval tools to answer questions.
|
||
|
||
Args:
|
||
message: User message
|
||
chat_history: Conversation history
|
||
|
||
Returns:
|
||
{
|
||
"response": "Agent reply",
|
||
"tool_calls": [list of tools called],
|
||
"sources": [information sources]
|
||
}
|
||
"""
|
||
logger.info(t('report.agentChat', message=message[:50]))
|
||
|
||
chat_history = chat_history or []
|
||
|
||
# Retrieve the already-generated report content
|
||
report_content = ""
|
||
try:
|
||
report = ReportManager.get_report_by_simulation(self.simulation_id)
|
||
if report and report.markdown_content:
|
||
# Limit report length to avoid overly long context
|
||
report_content = report.markdown_content[:15000]
|
||
if len(report.markdown_content) > 15000:
|
||
report_content += "\n\n... [Report content truncated] ..."
|
||
except Exception as e:
|
||
logger.warning(t('report.fetchReportFailed', error=e))
|
||
|
||
system_prompt = CHAT_SYSTEM_PROMPT_TEMPLATE.format(
|
||
simulation_requirement=self.simulation_requirement,
|
||
report_content=report_content if report_content else "(No report available yet)",
|
||
tools_description=self._get_tools_description(),
|
||
)
|
||
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
|
||
|
||
# Build the messages list
|
||
messages = [{"role": "system", "content": system_prompt}]
|
||
|
||
# Append conversation history
|
||
for h in chat_history[-10:]: # limit history length
|
||
messages.append(h)
|
||
|
||
# Append the user message
|
||
messages.append({
|
||
"role": "user",
|
||
"content": message
|
||
})
|
||
|
||
# ReACT loop (simplified)
|
||
tool_calls_made = []
|
||
max_iterations = 2 # reduced number of iterations
|
||
|
||
for iteration in range(max_iterations):
|
||
response = self.llm.chat(
|
||
messages=messages,
|
||
temperature=0.5
|
||
)
|
||
|
||
# Parse tool calls
|
||
tool_calls = self._parse_tool_calls(response)
|
||
|
||
if not tool_calls:
|
||
# No tool calls — return the response directly
|
||
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', response, flags=re.DOTALL)
|
||
clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response)
|
||
|
||
return {
|
||
"response": clean_response.strip(),
|
||
"tool_calls": tool_calls_made,
|
||
"sources": [tc.get("parameters", {}).get("query", "") for tc in tool_calls_made]
|
||
}
|
||
|
||
# Execute tool calls (with count limit)
|
||
tool_results = []
|
||
for call in tool_calls[:1]: # at most 1 tool call per round
|
||
if len(tool_calls_made) >= self.MAX_TOOL_CALLS_PER_CHAT:
|
||
break
|
||
result = self._execute_tool(call["name"], call.get("parameters", {}))
|
||
tool_results.append({
|
||
"tool": call["name"],
|
||
"result": result[:1500] # limit result length
|
||
})
|
||
tool_calls_made.append(call)
|
||
|
||
# Append the results to the messages
|
||
messages.append({"role": "assistant", "content": response})
|
||
observation = "\n".join([f"[{r['tool']} result]\n{r['result']}" for r in tool_results])
|
||
messages.append({
|
||
"role": "user",
|
||
"content": observation + CHAT_OBSERVATION_SUFFIX
|
||
})
|
||
|
||
# Maximum iterations reached; get the final response
|
||
final_response = self.llm.chat(
|
||
messages=messages,
|
||
temperature=0.5
|
||
)
|
||
|
||
# Clean the response
|
||
clean_response = re.sub(r'<tool_call>.*?</tool_call>', '', final_response, flags=re.DOTALL)
|
||
clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response)
|
||
|
||
return {
|
||
"response": clean_response.strip(),
|
||
"tool_calls": tool_calls_made,
|
||
"sources": [tc.get("parameters", {}).get("query", "") for tc in tool_calls_made]
|
||
}
|
||
|
||
|
||
class ReportManager:
|
||
"""
|
||
Report Manager.
|
||
|
||
Responsible for the persistent storage and retrieval of reports.
|
||
|
||
File structure (section-by-section output):
|
||
reports/
|
||
{report_id}/
|
||
meta.json - Report metadata and status
|
||
outline.json - Report outline
|
||
progress.json - Generation progress
|
||
section_01.md - Section 1
|
||
section_02.md - Section 2
|
||
...
|
||
full_report.md - Complete report
|
||
"""
|
||
|
||
# Report storage directory
|
||
REPORTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'reports')
|
||
|
||
@classmethod
|
||
def _ensure_reports_dir(cls):
|
||
"""Ensure the reports root directory exists"""
|
||
os.makedirs(cls.REPORTS_DIR, exist_ok=True)
|
||
|
||
@classmethod
|
||
def _get_report_folder(cls, report_id: str) -> str:
|
||
"""Get the report folder path"""
|
||
return os.path.join(cls.REPORTS_DIR, report_id)
|
||
|
||
@classmethod
|
||
def _ensure_report_folder(cls, report_id: str) -> str:
|
||
"""Ensure the report folder exists and return its path"""
|
||
folder = cls._get_report_folder(report_id)
|
||
os.makedirs(folder, exist_ok=True)
|
||
return folder
|
||
|
||
@classmethod
|
||
def _get_report_path(cls, report_id: str) -> str:
|
||
"""Get the path to the report metadata file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "meta.json")
|
||
|
||
@classmethod
|
||
def _get_report_markdown_path(cls, report_id: str) -> str:
|
||
"""Get the path to the full report Markdown file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "full_report.md")
|
||
|
||
@classmethod
|
||
def _get_outline_path(cls, report_id: str) -> str:
|
||
"""Get the path to the outline file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "outline.json")
|
||
|
||
@classmethod
|
||
def _get_progress_path(cls, report_id: str) -> str:
|
||
"""Get the path to the progress file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "progress.json")
|
||
|
||
@classmethod
|
||
def _get_section_path(cls, report_id: str, section_index: int) -> str:
|
||
"""Get the path to a section Markdown file"""
|
||
return os.path.join(cls._get_report_folder(report_id), f"section_{section_index:02d}.md")
|
||
|
||
@classmethod
|
||
def _get_agent_log_path(cls, report_id: str) -> str:
|
||
"""Get the path to the Agent log file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "agent_log.jsonl")
|
||
|
||
@classmethod
|
||
def _get_console_log_path(cls, report_id: str) -> str:
|
||
"""Get the path to the console log file"""
|
||
return os.path.join(cls._get_report_folder(report_id), "console_log.txt")
|
||
|
||
@classmethod
|
||
def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]:
|
||
"""
|
||
Retrieve console log content.
|
||
|
||
This is the console output log (INFO, WARNING, etc.) produced during report
|
||
generation — distinct from the structured log in agent_log.jsonl.
|
||
|
||
Args:
|
||
report_id: Report ID
|
||
from_line: Line number to start reading from (for incremental retrieval; 0 = from the beginning)
|
||
|
||
Returns:
|
||
{
|
||
"logs": [list of log lines],
|
||
"total_lines": total line count,
|
||
"from_line": starting line number,
|
||
"has_more": whether more logs are available
|
||
}
|
||
"""
|
||
log_path = cls._get_console_log_path(report_id)
|
||
|
||
if not os.path.exists(log_path):
|
||
return {
|
||
"logs": [],
|
||
"total_lines": 0,
|
||
"from_line": 0,
|
||
"has_more": False
|
||
}
|
||
|
||
logs = []
|
||
total_lines = 0
|
||
|
||
with open(log_path, 'r', encoding='utf-8') as f:
|
||
for i, line in enumerate(f):
|
||
total_lines = i + 1
|
||
if i >= from_line:
|
||
# Keep the original log line, stripping the trailing newline
|
||
logs.append(line.rstrip('\n\r'))
|
||
|
||
return {
|
||
"logs": logs,
|
||
"total_lines": total_lines,
|
||
"from_line": from_line,
|
||
"has_more": False # read through to the end
|
||
}
|
||
|
||
@classmethod
|
||
def get_console_log_stream(cls, report_id: str) -> List[str]:
|
||
"""
|
||
Retrieve the complete console log (all at once).
|
||
|
||
Args:
|
||
report_id: Report ID
|
||
|
||
Returns:
|
||
List of log lines
|
||
"""
|
||
result = cls.get_console_log(report_id, from_line=0)
|
||
return result["logs"]
|
||
|
||
@classmethod
|
||
def get_agent_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]:
|
||
"""
|
||
Retrieve Agent log content.
|
||
|
||
Args:
|
||
report_id: Report ID
|
||
from_line: Line number to start reading from (for incremental retrieval; 0 = from the beginning)
|
||
|
||
Returns:
|
||
{
|
||
"logs": [list of log entries],
|
||
"total_lines": total line count,
|
||
"from_line": starting line number,
|
||
"has_more": whether more logs are available
|
||
}
|
||
"""
|
||
log_path = cls._get_agent_log_path(report_id)
|
||
|
||
if not os.path.exists(log_path):
|
||
return {
|
||
"logs": [],
|
||
"total_lines": 0,
|
||
"from_line": 0,
|
||
"has_more": False
|
||
}
|
||
|
||
logs = []
|
||
total_lines = 0
|
||
|
||
with open(log_path, 'r', encoding='utf-8') as f:
|
||
for i, line in enumerate(f):
|
||
total_lines = i + 1
|
||
if i >= from_line:
|
||
try:
|
||
log_entry = json.loads(line.strip())
|
||
logs.append(log_entry)
|
||
except json.JSONDecodeError:
|
||
# Skip lines that fail to parse
|
||
continue
|
||
|
||
return {
|
||
"logs": logs,
|
||
"total_lines": total_lines,
|
||
"from_line": from_line,
|
||
"has_more": False # read through to the end
|
||
}
|
||
|
||
@classmethod
|
||
def get_agent_log_stream(cls, report_id: str) -> List[Dict[str, Any]]:
|
||
"""
|
||
Retrieve the complete Agent log (all at once).
|
||
|
||
Args:
|
||
report_id: Report ID
|
||
|
||
Returns:
|
||
List of log entries
|
||
"""
|
||
result = cls.get_agent_log(report_id, from_line=0)
|
||
return result["logs"]
|
||
|
||
@classmethod
|
||
def save_outline(cls, report_id: str, outline: ReportOutline) -> None:
|
||
"""
|
||
Save the report outline.
|
||
|
||
Called immediately after the planning stage completes.
|
||
"""
|
||
cls._ensure_report_folder(report_id)
|
||
|
||
with open(cls._get_outline_path(report_id), 'w', encoding='utf-8') as f:
|
||
json.dump(outline.to_dict(), f, ensure_ascii=False, indent=2)
|
||
|
||
logger.info(t('report.outlineSaved', reportId=report_id))
|
||
|
||
@classmethod
|
||
def save_section(
|
||
cls,
|
||
report_id: str,
|
||
section_index: int,
|
||
section: ReportSection
|
||
) -> str:
|
||
"""
|
||
Save a single section.
|
||
|
||
Called immediately after each section is generated to enable section-by-section output.
|
||
|
||
Args:
|
||
report_id: Report ID
|
||
section_index: Section index (starting from 1)
|
||
section: Section object
|
||
|
||
Returns:
|
||
Path of the saved file
|
||
"""
|
||
cls._ensure_report_folder(report_id)
|
||
|
||
# Build the section Markdown content — clean up any duplicate headings
|
||
cleaned_content = cls._clean_section_content(section.content, section.title)
|
||
md_content = f"## {section.title}\n\n"
|
||
if cleaned_content:
|
||
md_content += f"{cleaned_content}\n\n"
|
||
|
||
# Save the file
|
||
file_suffix = f"section_{section_index:02d}.md"
|
||
file_path = os.path.join(cls._get_report_folder(report_id), file_suffix)
|
||
with open(file_path, 'w', encoding='utf-8') as f:
|
||
f.write(md_content)
|
||
|
||
logger.info(t('report.sectionFileSaved', reportId=report_id, fileSuffix=file_suffix))
|
||
return file_path
|
||
|
||
@classmethod
|
||
def _clean_section_content(cls, content: str, section_title: str) -> str:
|
||
"""
|
||
Clean section content.
|
||
|
||
1. Remove any Markdown heading at the start of the content that duplicates the section title.
|
||
2. Convert all headings at level ### and below to bold text.
|
||
|
||
Args:
|
||
content: Raw content
|
||
section_title: Section title
|
||
|
||
Returns:
|
||
Cleaned content
|
||
"""
|
||
import re
|
||
|
||
if not content:
|
||
return content
|
||
|
||
content = content.strip()
|
||
lines = content.split('\n')
|
||
cleaned_lines = []
|
||
skip_next_empty = False
|
||
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
|
||
# Check whether this is a Markdown heading line
|
||
heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
|
||
|
||
if heading_match:
|
||
level = len(heading_match.group(1))
|
||
title_text = heading_match.group(2).strip()
|
||
|
||
# Check for a duplicate of the section title (within the first 5 lines)
|
||
if i < 5:
|
||
if title_text == section_title or title_text.replace(' ', '') == section_title.replace(' ', ''):
|
||
skip_next_empty = True
|
||
continue
|
||
|
||
# Convert all heading levels (#, ##, ###, #### etc.) to bold text,
|
||
# since section headings are added by the system and content should contain none.
|
||
cleaned_lines.append(f"**{title_text}**")
|
||
cleaned_lines.append("") # add blank line
|
||
continue
|
||
|
||
# If the previous line was a skipped heading and this line is blank, skip it too
|
||
if skip_next_empty and stripped == '':
|
||
skip_next_empty = False
|
||
continue
|
||
|
||
skip_next_empty = False
|
||
cleaned_lines.append(line)
|
||
|
||
# Remove leading blank lines
|
||
while cleaned_lines and cleaned_lines[0].strip() == '':
|
||
cleaned_lines.pop(0)
|
||
|
||
# Remove leading horizontal rules
|
||
while cleaned_lines and cleaned_lines[0].strip() in ['---', '***', '___']:
|
||
cleaned_lines.pop(0)
|
||
# Also remove any blank lines immediately after the horizontal rule
|
||
while cleaned_lines and cleaned_lines[0].strip() == '':
|
||
cleaned_lines.pop(0)
|
||
|
||
return '\n'.join(cleaned_lines)
|
||
|
||
@classmethod
|
||
def update_progress(
|
||
cls,
|
||
report_id: str,
|
||
status: str,
|
||
progress: int,
|
||
message: str,
|
||
current_section: str = None,
|
||
completed_sections: List[str] = None
|
||
) -> None:
|
||
"""
|
||
Update the report generation progress.
|
||
|
||
The frontend can read progress.json to obtain real-time progress.
|
||
"""
|
||
cls._ensure_report_folder(report_id)
|
||
|
||
progress_data = {
|
||
"status": status,
|
||
"progress": progress,
|
||
"message": message,
|
||
"current_section": current_section,
|
||
"completed_sections": completed_sections or [],
|
||
"updated_at": datetime.now().isoformat()
|
||
}
|
||
|
||
with open(cls._get_progress_path(report_id), 'w', encoding='utf-8') as f:
|
||
json.dump(progress_data, f, ensure_ascii=False, indent=2)
|
||
|
||
@classmethod
|
||
def get_progress(cls, report_id: str) -> Optional[Dict[str, Any]]:
|
||
"""Get the report generation progress"""
|
||
path = cls._get_progress_path(report_id)
|
||
|
||
if not os.path.exists(path):
|
||
return None
|
||
|
||
with open(path, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
|
||
@classmethod
|
||
def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]:
|
||
"""
|
||
Get the list of already-generated sections.
|
||
|
||
Returns information about all saved section files.
|
||
"""
|
||
folder = cls._get_report_folder(report_id)
|
||
|
||
if not os.path.exists(folder):
|
||
return []
|
||
|
||
sections = []
|
||
for filename in sorted(os.listdir(folder)):
|
||
if filename.startswith('section_') and filename.endswith('.md'):
|
||
file_path = os.path.join(folder, filename)
|
||
with open(file_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# Parse the section index from the filename
|
||
parts = filename.replace('.md', '').split('_')
|
||
section_index = int(parts[1])
|
||
|
||
sections.append({
|
||
"filename": filename,
|
||
"section_index": section_index,
|
||
"content": content
|
||
})
|
||
|
||
return sections
|
||
|
||
@classmethod
|
||
def assemble_full_report(cls, report_id: str, outline: ReportOutline) -> str:
|
||
"""
|
||
Assemble the complete report.
|
||
|
||
Assembles the full report from the saved section files and performs heading clean-up.
|
||
"""
|
||
folder = cls._get_report_folder(report_id)
|
||
|
||
# Build the report header
|
||
md_content = f"# {outline.title}\n\n"
|
||
md_content += f"> {outline.summary}\n\n"
|
||
md_content += f"---\n\n"
|
||
|
||
# Read all section files in order
|
||
sections = cls.get_generated_sections(report_id)
|
||
for section_info in sections:
|
||
md_content += section_info["content"]
|
||
|
||
# Post-processing: clean up heading issues across the full report
|
||
md_content = cls._post_process_report(md_content, outline)
|
||
|
||
# Save the complete report
|
||
full_path = cls._get_report_markdown_path(report_id)
|
||
with open(full_path, 'w', encoding='utf-8') as f:
|
||
f.write(md_content)
|
||
|
||
logger.info(t('report.fullReportAssembled', reportId=report_id))
|
||
return md_content
|
||
|
||
@classmethod
|
||
def _post_process_report(cls, content: str, outline: ReportOutline) -> str:
|
||
"""
|
||
Post-process report content.
|
||
|
||
1. Remove duplicate headings.
|
||
2. Retain the report main title (#) and section titles (##); remove other heading
|
||
levels (###, #### etc.).
|
||
3. Clean up excessive blank lines and horizontal rules.
|
||
|
||
Args:
|
||
content: Raw report content
|
||
outline: Report outline
|
||
|
||
Returns:
|
||
Processed content
|
||
"""
|
||
import re
|
||
|
||
lines = content.split('\n')
|
||
processed_lines = []
|
||
prev_was_heading = False
|
||
|
||
# Collect all section titles from the outline
|
||
section_titles = set()
|
||
for section in outline.sections:
|
||
section_titles.add(section.title)
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i]
|
||
stripped = line.strip()
|
||
|
||
# Check whether this is a heading line
|
||
heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped)
|
||
|
||
if heading_match:
|
||
level = len(heading_match.group(1))
|
||
title = heading_match.group(2).strip()
|
||
|
||
# Check for a duplicate heading (same text appearing within the previous 5 lines)
|
||
is_duplicate = False
|
||
for j in range(max(0, len(processed_lines) - 5), len(processed_lines)):
|
||
prev_line = processed_lines[j].strip()
|
||
prev_match = re.match(r'^(#{1,6})\s+(.+)$', prev_line)
|
||
if prev_match:
|
||
prev_title = prev_match.group(2).strip()
|
||
if prev_title == title:
|
||
is_duplicate = True
|
||
break
|
||
|
||
if is_duplicate:
|
||
# Skip the duplicate heading and any blank lines that follow it
|
||
i += 1
|
||
while i < len(lines) and lines[i].strip() == '':
|
||
i += 1
|
||
continue
|
||
|
||
# Heading level handling:
|
||
# - # (level=1) — keep only the report main title
|
||
# - ## (level=2) — keep section titles
|
||
# - ### and below (level>=3) — convert to bold text
|
||
|
||
if level == 1:
|
||
if title == outline.title:
|
||
# Keep the report main title
|
||
processed_lines.append(line)
|
||
prev_was_heading = True
|
||
elif title in section_titles:
|
||
# Section title incorrectly used #; correct to ##
|
||
processed_lines.append(f"## {title}")
|
||
prev_was_heading = True
|
||
else:
|
||
# Other level-1 headings become bold text
|
||
processed_lines.append(f"**{title}**")
|
||
processed_lines.append("")
|
||
prev_was_heading = False
|
||
elif level == 2:
|
||
if title in section_titles or title == outline.title:
|
||
# Keep section titles
|
||
processed_lines.append(line)
|
||
prev_was_heading = True
|
||
else:
|
||
# Non-section level-2 headings become bold text
|
||
processed_lines.append(f"**{title}**")
|
||
processed_lines.append("")
|
||
prev_was_heading = False
|
||
else:
|
||
# Headings at level ### and below are converted to bold text
|
||
processed_lines.append(f"**{title}**")
|
||
processed_lines.append("")
|
||
prev_was_heading = False
|
||
|
||
i += 1
|
||
continue
|
||
|
||
elif stripped == '---' and prev_was_heading:
|
||
# Skip horizontal rules that immediately follow a heading
|
||
i += 1
|
||
continue
|
||
|
||
elif stripped == '' and prev_was_heading:
|
||
# After a heading, keep only one blank line
|
||
if processed_lines and processed_lines[-1].strip() != '':
|
||
processed_lines.append(line)
|
||
prev_was_heading = False
|
||
|
||
else:
|
||
processed_lines.append(line)
|
||
prev_was_heading = False
|
||
|
||
i += 1
|
||
|
||
# Clean up consecutive blank lines (retain at most 2)
|
||
result_lines = []
|
||
empty_count = 0
|
||
for line in processed_lines:
|
||
if line.strip() == '':
|
||
empty_count += 1
|
||
if empty_count <= 2:
|
||
result_lines.append(line)
|
||
else:
|
||
empty_count = 0
|
||
result_lines.append(line)
|
||
|
||
return '\n'.join(result_lines)
|
||
|
||
@classmethod
|
||
def save_report(cls, report: Report) -> None:
|
||
"""Save report metadata and the complete report"""
|
||
cls._ensure_report_folder(report.report_id)
|
||
|
||
# Save metadata JSON
|
||
with open(cls._get_report_path(report.report_id), 'w', encoding='utf-8') as f:
|
||
json.dump(report.to_dict(), f, ensure_ascii=False, indent=2)
|
||
|
||
# Save the outline
|
||
if report.outline:
|
||
cls.save_outline(report.report_id, report.outline)
|
||
|
||
# Save the complete Markdown report
|
||
if report.markdown_content:
|
||
with open(cls._get_report_markdown_path(report.report_id), 'w', encoding='utf-8') as f:
|
||
f.write(report.markdown_content)
|
||
|
||
logger.info(t('report.reportSaved', reportId=report.report_id))
|
||
|
||
@classmethod
|
||
def get_report(cls, report_id: str) -> Optional[Report]:
|
||
"""Get a report"""
|
||
path = cls._get_report_path(report_id)
|
||
|
||
if not os.path.exists(path):
|
||
# Backward compatibility: check for files stored directly in the reports directory
|
||
old_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
|
||
if os.path.exists(old_path):
|
||
path = old_path
|
||
else:
|
||
return None
|
||
|
||
with open(path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
|
||
# Reconstruct the Report object
|
||
outline = None
|
||
if data.get('outline'):
|
||
outline_data = data['outline']
|
||
sections = []
|
||
for s in outline_data.get('sections', []):
|
||
sections.append(ReportSection(
|
||
title=s['title'],
|
||
content=s.get('content', '')
|
||
))
|
||
outline = ReportOutline(
|
||
title=outline_data['title'],
|
||
summary=outline_data['summary'],
|
||
sections=sections
|
||
)
|
||
|
||
# If markdown_content is empty, try reading from full_report.md
|
||
markdown_content = data.get('markdown_content', '')
|
||
if not markdown_content:
|
||
full_report_path = cls._get_report_markdown_path(report_id)
|
||
if os.path.exists(full_report_path):
|
||
with open(full_report_path, 'r', encoding='utf-8') as f:
|
||
markdown_content = f.read()
|
||
|
||
return Report(
|
||
report_id=data['report_id'],
|
||
simulation_id=data['simulation_id'],
|
||
graph_id=data['graph_id'],
|
||
simulation_requirement=data['simulation_requirement'],
|
||
status=ReportStatus(data['status']),
|
||
outline=outline,
|
||
markdown_content=markdown_content,
|
||
created_at=data.get('created_at', ''),
|
||
completed_at=data.get('completed_at', ''),
|
||
error=data.get('error')
|
||
)
|
||
|
||
@classmethod
|
||
def get_report_by_simulation(cls, simulation_id: str) -> Optional[Report]:
|
||
"""Get a report by simulation ID"""
|
||
cls._ensure_reports_dir()
|
||
|
||
for item in os.listdir(cls.REPORTS_DIR):
|
||
item_path = os.path.join(cls.REPORTS_DIR, item)
|
||
# New format: folder
|
||
if os.path.isdir(item_path):
|
||
report = cls.get_report(item)
|
||
if report and report.simulation_id == simulation_id:
|
||
return report
|
||
# Backward compatibility: JSON file
|
||
elif item.endswith('.json'):
|
||
report_id = item[:-5]
|
||
report = cls.get_report(report_id)
|
||
if report and report.simulation_id == simulation_id:
|
||
return report
|
||
|
||
return None
|
||
|
||
@classmethod
|
||
def list_reports(cls, simulation_id: Optional[str] = None, limit: int = 50) -> List[Report]:
|
||
"""List reports"""
|
||
cls._ensure_reports_dir()
|
||
|
||
reports = []
|
||
for item in os.listdir(cls.REPORTS_DIR):
|
||
item_path = os.path.join(cls.REPORTS_DIR, item)
|
||
# New format: folder
|
||
if os.path.isdir(item_path):
|
||
report = cls.get_report(item)
|
||
if report:
|
||
if simulation_id is None or report.simulation_id == simulation_id:
|
||
reports.append(report)
|
||
# Backward compatibility: JSON file
|
||
elif item.endswith('.json'):
|
||
report_id = item[:-5]
|
||
report = cls.get_report(report_id)
|
||
if report:
|
||
if simulation_id is None or report.simulation_id == simulation_id:
|
||
reports.append(report)
|
||
|
||
# Sort by creation time, descending
|
||
reports.sort(key=lambda r: r.created_at, reverse=True)
|
||
|
||
return reports[:limit]
|
||
|
||
@classmethod
|
||
def delete_report(cls, report_id: str) -> bool:
|
||
"""Delete a report (the entire folder)"""
|
||
import shutil
|
||
|
||
folder_path = cls._get_report_folder(report_id)
|
||
|
||
# New format: delete the entire folder
|
||
if os.path.exists(folder_path) and os.path.isdir(folder_path):
|
||
shutil.rmtree(folder_path)
|
||
logger.info(t('report.reportFolderDeleted', reportId=report_id))
|
||
return True
|
||
|
||
# Backward compatibility: delete individual files
|
||
deleted = False
|
||
old_json_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json")
|
||
old_md_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.md")
|
||
|
||
if os.path.exists(old_json_path):
|
||
os.remove(old_json_path)
|
||
deleted = True
|
||
if os.path.exists(old_md_path):
|
||
os.remove(old_md_path)
|
||
deleted = True
|
||
|
||
return deleted
|