MicroFish/backend/app/services/simulation_config_generator.py

995 lines
41 KiB
Python

"""
Intelligent simulation-configuration generator.
Uses an LLM to derive detailed simulation parameters from the simulation
requirement, document content, and knowledge-graph information, fully
automating parameter setup without manual intervention.
Employs a step-wise generation strategy to avoid failures caused by
producing too much content in a single call:
1. Generate time configuration
2. Generate event configuration
3. Generate agent configurations in batches
4. Generate platform configuration
"""
import json
import math
from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass, field, asdict
from datetime import datetime
from openai import OpenAI
from ..config import Config
from ..utils.logger import get_logger
from ..utils.locale import get_language_instruction, t
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
# Daily-rhythm config for China (Beijing time, UTC+8).
CHINA_TIMEZONE_CONFIG = {
# Late-night hours: almost no activity.
"dead_hours": [0, 1, 2, 3, 4, 5],
# Morning hours: gradually waking up.
"morning_hours": [6, 7, 8],
# Working hours.
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
# Evening peak: most active.
"peak_hours": [19, 20, 21, 22],
# Late-evening hours: activity declining.
"night_hours": [23],
# Activity multipliers.
"activity_multipliers": {
"dead": 0.05, # Overnight: almost no one online.
"morning": 0.4, # Morning ramp-up.
"work": 0.7, # Working hours: moderate activity.
"peak": 1.5, # Evening peak.
"night": 0.5 # Late-night decline.
}
}
@dataclass
class AgentActivityConfig:
"""Activity configuration for a single agent."""
agent_id: int
entity_uuid: str
entity_name: str
entity_type: str
# Activity configuration (0.0-1.0).
activity_level: float = 0.5 # Overall activity level.
# Posting frequency (expected posts per hour).
posts_per_hour: float = 1.0
comments_per_hour: float = 2.0
# Active hours (24-hour clock, 0-23).
active_hours: List[int] = field(default_factory=lambda: list(range(8, 23)))
# Response speed: latency to react to hot events, in simulated minutes.
response_delay_min: int = 5
response_delay_max: int = 60
# Sentiment bias (-1.0 to 1.0, negative to positive).
sentiment_bias: float = 0.0
# Stance: attitude toward a given topic.
stance: str = "neutral" # supportive, opposing, neutral, observer
# Influence weight: probability of an agent's post being seen by others.
influence_weight: float = 1.0
@dataclass
class TimeSimulationConfig:
"""Time-simulation configuration (modelled on a Chinese daily rhythm)."""
# Total simulated duration (simulated hours).
total_simulation_hours: int = 72 # Default: 72 simulated hours (3 days).
# Time represented by each round (simulated minutes); default 60 (1 hour) to speed up the simulated clock.
minutes_per_round: int = 60
# Range of agents activated per hour.
agents_per_hour_min: int = 5
agents_per_hour_max: int = 20
# Peak hours (evenings 19:00-22:00, most active for the modelled audience).
peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22])
peak_activity_multiplier: float = 1.5
# Off-peak hours (00:00-05:00, almost no activity).
off_peak_hours: List[int] = field(default_factory=lambda: [0, 1, 2, 3, 4, 5])
off_peak_activity_multiplier: float = 0.05 # Overnight activity is very low.
# Morning hours.
morning_hours: List[int] = field(default_factory=lambda: [6, 7, 8])
morning_activity_multiplier: float = 0.4
# Working hours.
work_hours: List[int] = field(default_factory=lambda: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
work_activity_multiplier: float = 0.7
@dataclass
class EventConfig:
"""Event configuration."""
# Initial events: triggers fired when the simulation begins.
initial_posts: List[Dict[str, Any]] = field(default_factory=list)
# Scheduled events: events fired at specific times.
scheduled_events: List[Dict[str, Any]] = field(default_factory=list)
# Hot-topic keywords.
hot_topics: List[str] = field(default_factory=list)
# Narrative direction for public-opinion guidance.
narrative_direction: str = ""
@dataclass
class PlatformConfig:
"""Platform-specific configuration."""
platform: str # twitter or reddit
# Recommendation-algorithm weights.
recency_weight: float = 0.4 # Recency.
popularity_weight: float = 0.3 # Popularity.
relevance_weight: float = 0.3 # Relevance.
# Viral-spread threshold: number of interactions required to trigger spreading.
viral_threshold: int = 10
# Echo-chamber strength: how strongly similar viewpoints cluster together.
echo_chamber_strength: float = 0.5
@dataclass
class SimulationParameters:
"""Complete simulation-parameter configuration."""
# Basic identifiers.
simulation_id: str
project_id: str
graph_id: str
simulation_requirement: str
# Time configuration.
time_config: TimeSimulationConfig = field(default_factory=TimeSimulationConfig)
# Agent configuration list.
agent_configs: List[AgentActivityConfig] = field(default_factory=list)
# Event configuration.
event_config: EventConfig = field(default_factory=EventConfig)
# Platform configurations.
twitter_config: Optional[PlatformConfig] = None
reddit_config: Optional[PlatformConfig] = None
# LLM configuration.
llm_model: str = ""
llm_base_url: str = ""
# Generation metadata.
generated_at: str = field(default_factory=lambda: datetime.now().isoformat())
generation_reasoning: str = "" # LLM-provided rationale.
def to_dict(self) -> Dict[str, Any]:
"""Return the parameters as a dictionary."""
time_dict = asdict(self.time_config)
return {
"simulation_id": self.simulation_id,
"project_id": self.project_id,
"graph_id": self.graph_id,
"simulation_requirement": self.simulation_requirement,
"time_config": time_dict,
"agent_configs": [asdict(a) for a in self.agent_configs],
"event_config": asdict(self.event_config),
"twitter_config": asdict(self.twitter_config) if self.twitter_config else None,
"reddit_config": asdict(self.reddit_config) if self.reddit_config else None,
"llm_model": self.llm_model,
"llm_base_url": self.llm_base_url,
"generated_at": self.generated_at,
"generation_reasoning": self.generation_reasoning,
}
def to_json(self, indent: int = 2) -> str:
"""Return the parameters as a JSON string."""
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
class SimulationConfigGenerator:
"""
Intelligent simulation-configuration generator.
Uses an LLM to analyse the simulation requirement, document content,
and graph entity information to automatically derive the best
simulation parameter configuration.
Step-wise generation strategy:
1. Generate time and event configurations (lightweight).
2. Generate agent configurations in batches (10-20 per batch).
3. Generate platform configuration.
"""
# Maximum context length (characters).
MAX_CONTEXT_LENGTH = 50000
# Number of agents generated per batch.
AGENTS_PER_BATCH = 15
# Per-step context truncation lengths (characters).
TIME_CONFIG_CONTEXT_LENGTH = 10000 # Time configuration.
EVENT_CONFIG_CONTEXT_LENGTH = 8000 # Event configuration.
ENTITY_SUMMARY_LENGTH = 300 # Entity summary.
AGENT_SUMMARY_LENGTH = 300 # Entity summary used in agent configs.
ENTITIES_PER_TYPE_DISPLAY = 20 # Number of entities displayed per type.
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model_name: Optional[str] = None
):
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model_name = model_name or Config.LLM_MODEL_NAME
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
def generate_config(
self,
simulation_id: str,
project_id: str,
graph_id: str,
simulation_requirement: str,
document_text: str,
entities: List[EntityNode],
enable_twitter: bool = True,
enable_reddit: bool = True,
progress_callback: Optional[Callable[[int, int, str], None]] = None,
) -> SimulationParameters:
"""Intelligently generate a complete simulation configuration (step-wise).
Args:
simulation_id: Simulation ID.
project_id: Project ID.
graph_id: Graph ID.
simulation_requirement: Description of the simulation requirement.
document_text: Original document content.
entities: Filtered list of entities.
enable_twitter: Whether to enable Twitter.
enable_reddit: Whether to enable Reddit.
progress_callback: Progress callback (current_step, total_steps, message).
Returns:
SimulationParameters: The complete simulation parameters.
"""
logger.info(t("log.simulation_config.m001", simulation_id=simulation_id, len=len(entities)))
# Compute total step count.
num_batches = math.ceil(len(entities) / self.AGENTS_PER_BATCH)
total_steps = 3 + num_batches # Time config + event config + N agent batches + platform config.
current_step = 0
def report_progress(step: int, message: str):
nonlocal current_step
current_step = step
if progress_callback:
progress_callback(step, total_steps, message)
logger.info(f"[{step}/{total_steps}] {message}")
# 1. Build base context information.
context = self._build_context(
simulation_requirement=simulation_requirement,
document_text=document_text,
entities=entities
)
reasoning_parts = []
# ========== Step 1: generate time configuration ==========
report_progress(1, t('progress.generatingTimeConfig'))
num_entities = len(entities)
time_config_result = self._generate_time_config(context, num_entities)
time_config = self._parse_time_config(time_config_result, num_entities)
reasoning_parts.append(f"{t('progress.timeConfigLabel')}: {time_config_result.get('reasoning', t('common.success'))}")
# ========== Step 2: generate event configuration ==========
report_progress(2, t('progress.generatingEventConfig'))
event_config_result = self._generate_event_config(context, simulation_requirement, entities)
event_config = self._parse_event_config(event_config_result)
reasoning_parts.append(f"{t('progress.eventConfigLabel')}: {event_config_result.get('reasoning', t('common.success'))}")
# ========== Steps 3-N: generate agent configurations in batches ==========
all_agent_configs = []
for batch_idx in range(num_batches):
start_idx = batch_idx * self.AGENTS_PER_BATCH
end_idx = min(start_idx + self.AGENTS_PER_BATCH, len(entities))
batch_entities = entities[start_idx:end_idx]
report_progress(
3 + batch_idx,
t('progress.generatingAgentConfig', start=start_idx + 1, end=end_idx, total=len(entities))
)
batch_configs = self._generate_agent_configs_batch(
context=context,
entities=batch_entities,
start_idx=start_idx,
simulation_requirement=simulation_requirement
)
all_agent_configs.extend(batch_configs)
reasoning_parts.append(t('progress.agentConfigResult', count=len(all_agent_configs)))
# ========== Assign poster agents to initial posts ==========
logger.info(t("log.simulation_config.m002"))
event_config = self._assign_initial_post_agents(event_config, all_agent_configs)
assigned_count = len([p for p in event_config.initial_posts if p.get("poster_agent_id") is not None])
reasoning_parts.append(t('progress.postAssignResult', count=assigned_count))
# ========== Final step: generate platform configuration ==========
report_progress(total_steps, t('progress.generatingPlatformConfig'))
twitter_config = None
reddit_config = None
if enable_twitter:
twitter_config = PlatformConfig(
platform="twitter",
recency_weight=0.4,
popularity_weight=0.3,
relevance_weight=0.3,
viral_threshold=10,
echo_chamber_strength=0.5
)
if enable_reddit:
reddit_config = PlatformConfig(
platform="reddit",
recency_weight=0.3,
popularity_weight=0.4,
relevance_weight=0.3,
viral_threshold=15,
echo_chamber_strength=0.6
)
# Build final parameters.
params = SimulationParameters(
simulation_id=simulation_id,
project_id=project_id,
graph_id=graph_id,
simulation_requirement=simulation_requirement,
time_config=time_config,
agent_configs=all_agent_configs,
event_config=event_config,
twitter_config=twitter_config,
reddit_config=reddit_config,
llm_model=self.model_name,
llm_base_url=self.base_url,
generation_reasoning=" | ".join(reasoning_parts)
)
logger.info(t("log.simulation_config.m003", len=len(params.agent_configs)))
return params
def _build_context(
self,
simulation_requirement: str,
document_text: str,
entities: List[EntityNode]
) -> str:
"""Build the LLM context, truncated to the maximum length."""
# Entity summary.
entity_summary = self._summarize_entities(entities)
# Build the context.
context_parts = [
f"## Simulation Requirement\n{simulation_requirement}",
f"\n## Entities ({len(entities)})\n{entity_summary}",
]
current_length = sum(len(p) for p in context_parts)
remaining_length = self.MAX_CONTEXT_LENGTH - current_length - 500 # Reserve 500-char headroom.
if remaining_length > 0 and document_text:
doc_text = document_text[:remaining_length]
if len(document_text) > remaining_length:
doc_text += "\n...(document truncated)"
context_parts.append(f"\n## Source Document Content\n{doc_text}")
return "\n".join(context_parts)
def _summarize_entities(self, entities: List[EntityNode]) -> str:
"""Generate an entity summary."""
lines = []
# Group by type.
by_type: Dict[str, List[EntityNode]] = {}
for e in entities:
t = e.get_entity_type() or "Unknown"
if t not in by_type:
by_type[t] = []
by_type[t].append(e)
for entity_type, type_entities in by_type.items():
lines.append(f"\n### {entity_type} ({len(type_entities)})")
# Use configured display count and summary length.
display_count = self.ENTITIES_PER_TYPE_DISPLAY
summary_len = self.ENTITY_SUMMARY_LENGTH
for e in type_entities[:display_count]:
summary_preview = (e.summary[:summary_len] + "...") if len(e.summary) > summary_len else e.summary
lines.append(f"- {e.name}: {summary_preview}")
if len(type_entities) > display_count:
lines.append(f" ... and {len(type_entities) - display_count} more")
return "\n".join(lines)
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""LLM call with retries, including JSON repair logic."""
import re
max_attempts = 3
last_error = None
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # Lower temperature on each retry.
# max_tokens is intentionally unset so the LLM can use its full budget.
)
content = response.choices[0].message.content
finish_reason = response.choices[0].finish_reason
# Detect truncation.
if finish_reason == 'length':
logger.warning(t("log.simulation_config.m004", attempt=attempt + 1))
content = self._fix_truncated_json(content)
# Attempt to parse JSON.
try:
return json.loads(content)
except json.JSONDecodeError as e:
logger.warning(t("log.simulation_config.m005", attempt=attempt + 1, str=str(e)[:80]))
# Attempt to repair the JSON.
fixed = self._try_fix_config_json(content)
if fixed:
return fixed
last_error = e
except Exception as e:
logger.warning(t("log.simulation_config.m006", attempt=attempt + 1, str=str(e)[:80]))
last_error = e
import time
time.sleep(2 * (attempt + 1))
raise last_error or Exception("LLM调用失败")
def _fix_truncated_json(self, content: str) -> str:
"""Repair truncated JSON."""
content = content.strip()
# Count unclosed brackets.
open_braces = content.count('{') - content.count('}')
open_brackets = content.count('[') - content.count(']')
# Check for an unclosed string.
if content and content[-1] not in '",}]':
content += '"'
# Close brackets.
content += ']' * open_brackets
content += '}' * open_braces
return content
def _try_fix_config_json(self, content: str) -> Optional[Dict[str, Any]]:
"""Attempt to repair a configuration JSON payload."""
import re
# Repair truncation first.
content = self._fix_truncated_json(content)
# Extract the JSON portion.
json_match = re.search(r'\{[\s\S]*\}', content)
if json_match:
json_str = json_match.group()
# Remove line breaks from inside strings.
def fix_string(match):
s = match.group(0)
s = s.replace('\n', ' ').replace('\r', ' ')
s = re.sub(r'\s+', ' ', s)
return s
json_str = re.sub(r'"[^"\\]*(?:\\.[^"\\]*)*"', fix_string, json_str)
try:
return json.loads(json_str)
except:
# Strip all control characters and try again.
json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str)
json_str = re.sub(r'\s+', ' ', json_str)
try:
return json.loads(json_str)
except:
pass
return None
def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, Any]:
"""Generate the time configuration."""
# Use the configured context truncation length.
context_truncated = context[:self.TIME_CONFIG_CONTEXT_LENGTH]
# Compute the upper bound (90% of the agent count).
max_agents_allowed = max(1, int(num_entities * 0.9))
prompt = f"""Based on the simulation requirement below, generate a time-simulation configuration.
{context_truncated}
## Task
Produce a time-configuration JSON.
### Guiding principles (illustrative only — adapt to the specific event and audience):
- Infer the timezone and daily rhythm of the target audience from the simulation scenario. The following are example values for the UTC+8 timezone.
- 00:00-05:00: almost no activity (activity multiplier 0.05).
- 06:00-08:00: gradually waking up (activity multiplier 0.4).
- 09:00-18:00: working hours, moderate activity (activity multiplier 0.7).
- 19:00-22:00: evening peak (activity multiplier 1.5).
- After 23:00: activity declines (activity multiplier 0.5).
- General rule of thumb: low overnight, ramping up in the morning, moderate during working hours, peaking in the evening.
- **Important**: the example values above are only a reference. Tailor the schedule to the nature of the event and the audience's habits.
- For example: a student-heavy audience may peak from 21:00-23:00; news outlets may stay active all day; official agencies are only active during working hours.
- For example: a breaking-news topic may keep discussion going late at night, in which case off_peak_hours can be shortened.
### Return strict JSON (no markdown)
Example:
{{
"total_simulation_hours": 72,
"minutes_per_round": 60,
"agents_per_hour_min": 5,
"agents_per_hour_max": 50,
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "Time configuration rationale for this event"
}}
Field guide:
- total_simulation_hours (int): total simulated duration, 24-168 hours; short for breaking events, long for sustained topics.
- minutes_per_round (int): minutes per round, 30-120; recommended 60.
- agents_per_hour_min (int): minimum number of agents activated per hour (allowed range: 1-{max_agents_allowed}).
- agents_per_hour_max (int): maximum number of agents activated per hour (allowed range: 1-{max_agents_allowed}).
- peak_hours (int array): peak hours, adjusted to the audience.
- off_peak_hours (int array): off-peak hours, typically overnight.
- morning_hours (int array): morning hours.
- work_hours (int array): working hours.
- reasoning (string): brief explanation of why this configuration was chosen."""
system_prompt = "You are a social-media simulation expert. Return plain JSON. The time configuration should match the daily rhythm of the simulation's target audience."
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}"
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(t("log.simulation_config.m007", e=e))
return self._get_default_time_config(num_entities)
def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
"""Return the default time configuration (Chinese daily rhythm)."""
return {
"total_simulation_hours": 72,
"minutes_per_round": 60, # 1 hour per round to speed up the simulated clock.
"agents_per_hour_min": max(1, num_entities // 15),
"agents_per_hour_max": max(5, num_entities // 5),
"peak_hours": [19, 20, 21, 22],
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "Default circadian-pattern config (1h per round)"
}
def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig:
"""Parse the time-configuration result and ensure agents_per_hour values do not exceed the total agent count."""
# Pull raw values.
agents_per_hour_min = result.get("agents_per_hour_min", max(1, num_entities // 15))
agents_per_hour_max = result.get("agents_per_hour_max", max(5, num_entities // 5))
# Validate and correct: ensure values do not exceed the total agent count.
if agents_per_hour_min > num_entities:
logger.warning(t("log.simulation_config.m008", agents_per_hour_min=agents_per_hour_min, num_entities=num_entities))
agents_per_hour_min = max(1, num_entities // 10)
if agents_per_hour_max > num_entities:
logger.warning(t("log.simulation_config.m009", agents_per_hour_max=agents_per_hour_max, num_entities=num_entities))
agents_per_hour_max = max(agents_per_hour_min + 1, num_entities // 2)
# Ensure min < max.
if agents_per_hour_min >= agents_per_hour_max:
agents_per_hour_min = max(1, agents_per_hour_max // 2)
logger.warning(t("log.simulation_config.m010", agents_per_hour_min=agents_per_hour_min))
return TimeSimulationConfig(
total_simulation_hours=result.get("total_simulation_hours", 72),
minutes_per_round=result.get("minutes_per_round", 60), # Default: 1 simulated hour per round.
agents_per_hour_min=agents_per_hour_min,
agents_per_hour_max=agents_per_hour_max,
peak_hours=result.get("peak_hours", [19, 20, 21, 22]),
off_peak_hours=result.get("off_peak_hours", [0, 1, 2, 3, 4, 5]),
off_peak_activity_multiplier=0.05, # Overnight: almost no one online.
morning_hours=result.get("morning_hours", [6, 7, 8]),
morning_activity_multiplier=0.4,
work_hours=result.get("work_hours", list(range(9, 19))),
work_activity_multiplier=0.7,
peak_activity_multiplier=1.5
)
def _generate_event_config(
self,
context: str,
simulation_requirement: str,
entities: List[EntityNode]
) -> Dict[str, Any]:
"""Generate the event configuration."""
# Build the list of available entity types for the LLM to reference.
entity_types_available = list(set(
e.get_entity_type() or "Unknown" for e in entities
))
# Collect representative entity names per type.
type_examples = {}
for e in entities:
etype = e.get_entity_type() or "Unknown"
if etype not in type_examples:
type_examples[etype] = []
if len(type_examples[etype]) < 3:
type_examples[etype].append(e.name)
type_info = "\n".join([
f"- {t}: {', '.join(examples)}"
for t, examples in type_examples.items()
])
# Use the configured context truncation length.
context_truncated = context[:self.EVENT_CONFIG_CONTEXT_LENGTH]
prompt = f"""Based on the simulation requirement below, generate an event configuration.
Simulation requirement: {simulation_requirement}
{context_truncated}
## Available entity types and examples
{type_info}
## Task
Produce an event-configuration JSON:
- Extract hot-topic keywords.
- Describe the direction in which public opinion is expected to evolve.
- Design the initial posts. **Every post must specify a poster_type (the type of the entity that publishes it).**
**Important**: poster_type MUST be one of the values listed in "Available entity types" above so each initial post can be assigned to an appropriate agent.
For example: official statements should be published by Official/University, news by MediaOutlet, student opinions by Student.
Return strict JSON (no markdown):
{{
"hot_topics": ["keyword 1", "keyword 2", ...],
"narrative_direction": "<description of how public opinion is expected to evolve>",
"initial_posts": [
{{"content": "post content", "poster_type": "entity type (must be one of the available types)"}},
...
],
"reasoning": "<brief rationale>"
}}"""
system_prompt = "You are a public-opinion analyst. Return plain JSON. Note that poster_type must exactly match one of the available entity types."
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'poster_type' field value MUST be in English PascalCase exactly matching the available entity types. Only 'content', 'narrative_direction', 'hot_topics' and 'reasoning' fields should use the specified language."
try:
return self._call_llm_with_retry(prompt, system_prompt)
except Exception as e:
logger.warning(t("log.simulation_config.m011", e=e))
return {
"hot_topics": [],
"narrative_direction": "",
"initial_posts": [],
"reasoning": "Used default config"
}
def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig:
"""Parse the event-configuration result."""
return EventConfig(
initial_posts=result.get("initial_posts", []),
scheduled_events=[],
hot_topics=result.get("hot_topics", []),
narrative_direction=result.get("narrative_direction", "")
)
def _assign_initial_post_agents(
self,
event_config: EventConfig,
agent_configs: List[AgentActivityConfig]
) -> EventConfig:
"""Assign a suitable poster agent to each initial post.
Matches the most appropriate agent_id for each post based on its
poster_type.
"""
if not event_config.initial_posts:
return event_config
# Build an agent index keyed by entity type.
agents_by_type: Dict[str, List[AgentActivityConfig]] = {}
for agent in agent_configs:
etype = agent.entity_type.lower()
if etype not in agents_by_type:
agents_by_type[etype] = []
agents_by_type[etype].append(agent)
# Type alias map (handles the different formats the LLM might emit).
type_aliases = {
"official": ["official", "university", "governmentagency", "government"],
"university": ["university", "official"],
"mediaoutlet": ["mediaoutlet", "media"],
"student": ["student", "person"],
"professor": ["professor", "expert", "teacher"],
"alumni": ["alumni", "person"],
"organization": ["organization", "ngo", "company", "group"],
"person": ["person", "student", "alumni"],
}
# Track the next agent index used per type to avoid reusing the same agent twice.
used_indices: Dict[str, int] = {}
updated_posts = []
for post in event_config.initial_posts:
poster_type = post.get("poster_type", "").lower()
content = post.get("content", "")
# Try to find a matching agent.
matched_agent_id = None
# 1. Direct match.
if poster_type in agents_by_type:
agents = agents_by_type[poster_type]
idx = used_indices.get(poster_type, 0) % len(agents)
matched_agent_id = agents[idx].agent_id
used_indices[poster_type] = idx + 1
else:
# 2. Match via aliases.
for alias_key, aliases in type_aliases.items():
if poster_type in aliases or alias_key == poster_type:
for alias in aliases:
if alias in agents_by_type:
agents = agents_by_type[alias]
idx = used_indices.get(alias, 0) % len(agents)
matched_agent_id = agents[idx].agent_id
used_indices[alias] = idx + 1
break
if matched_agent_id is not None:
break
# 3. If still unresolved, fall back to the most influential agent.
if matched_agent_id is None:
logger.warning(t("log.simulation_config.m012", poster_type=poster_type))
if agent_configs:
# Sort by influence and pick the highest.
sorted_agents = sorted(agent_configs, key=lambda a: a.influence_weight, reverse=True)
matched_agent_id = sorted_agents[0].agent_id
else:
matched_agent_id = 0
updated_posts.append({
"content": content,
"poster_type": post.get("poster_type", "Unknown"),
"poster_agent_id": matched_agent_id
})
logger.info(t("log.simulation_config.m013", poster_type=poster_type, matched_agent_id=matched_agent_id))
event_config.initial_posts = updated_posts
return event_config
def _generate_agent_configs_batch(
self,
context: str,
entities: List[EntityNode],
start_idx: int,
simulation_requirement: str
) -> List[AgentActivityConfig]:
"""Generate agent configurations in batches."""
# Build entity information (using the configured summary length).
entity_list = []
summary_len = self.AGENT_SUMMARY_LENGTH
for i, e in enumerate(entities):
entity_list.append({
"agent_id": start_idx + i,
"entity_name": e.name,
"entity_type": e.get_entity_type() or "Unknown",
"summary": e.summary[:summary_len] if e.summary else ""
})
prompt = f"""Based on the information below, generate a social-media activity configuration for each entity.
Simulation requirement: {simulation_requirement}
## Entity list
```json
{json.dumps(entity_list, ensure_ascii=False, indent=2)}
```
## Task
Generate an activity configuration for each entity. Notes:
- **Times must match the daily rhythm of the target audience** — the following are reference values for the UTC+8 timezone; adapt them to the simulation scenario.
- **Officials** (University/GovernmentAgency): low activity (0.1-0.3), active during working hours (9-17), slow response (60-240 minutes), high influence (2.5-3.0).
- **Media** (MediaOutlet): medium activity (0.4-0.6), active throughout the day (8-23), fast response (5-30 minutes), high influence (2.0-2.5).
- **Individuals** (Student/Person/Alumni): high activity (0.6-0.9), mainly active in the evening (18-23), fast response (1-15 minutes), low influence (0.8-1.2).
- **Public figures / experts**: medium activity (0.4-0.6), mid-to-high influence (1.5-2.0).
Return strict JSON (no markdown):
{{
"agent_configs": [
{{
"agent_id": <must match the input>,
"activity_level": <0.0-1.0>,
"posts_per_hour": <posting frequency>,
"comments_per_hour": <commenting frequency>,
"active_hours": [<list of active hours, matching the audience's daily rhythm>],
"response_delay_min": <minimum response delay in minutes>,
"response_delay_max": <maximum response delay in minutes>,
"sentiment_bias": <-1.0 to 1.0>,
"stance": "<supportive/opposing/neutral/observer>",
"influence_weight": <influence weight>
}},
...
]
}}"""
system_prompt = "You are a social-media behaviour analyst. Return plain JSON. The configuration should match the daily rhythm of the simulation's target audience."
system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'stance' field value MUST be one of the English strings: 'supportive', 'opposing', 'neutral', 'observer'. All JSON field names and numeric values must remain unchanged. Only natural language text fields should use the specified language."
try:
result = self._call_llm_with_retry(prompt, system_prompt)
llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])}
except Exception as e:
logger.warning(t("log.simulation_config.m014", e=e))
llm_configs = {}
# Build AgentActivityConfig objects.
configs = []
for i, entity in enumerate(entities):
agent_id = start_idx + i
cfg = llm_configs.get(agent_id, {})
# If the LLM did not produce a config, fall back to rule-based generation.
if not cfg:
cfg = self._generate_agent_config_by_rule(entity)
config = AgentActivityConfig(
agent_id=agent_id,
entity_uuid=entity.uuid,
entity_name=entity.name,
entity_type=entity.get_entity_type() or "Unknown",
activity_level=cfg.get("activity_level", 0.5),
posts_per_hour=cfg.get("posts_per_hour", 0.5),
comments_per_hour=cfg.get("comments_per_hour", 1.0),
active_hours=cfg.get("active_hours", list(range(9, 23))),
response_delay_min=cfg.get("response_delay_min", 5),
response_delay_max=cfg.get("response_delay_max", 60),
sentiment_bias=cfg.get("sentiment_bias", 0.0),
stance=cfg.get("stance", "neutral"),
influence_weight=cfg.get("influence_weight", 1.0)
)
configs.append(config)
return configs
def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]:
"""Rule-based generation for a single agent's configuration (Chinese daily rhythm)."""
entity_type = (entity.get_entity_type() or "Unknown").lower()
if entity_type in ["university", "governmentagency", "ngo"]:
# Official institutions: active during working hours, low frequency, high influence.
return {
"activity_level": 0.2,
"posts_per_hour": 0.1,
"comments_per_hour": 0.05,
"active_hours": list(range(9, 18)), # 09:00-17:59
"response_delay_min": 60,
"response_delay_max": 240,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 3.0
}
elif entity_type in ["mediaoutlet"]:
# Media: active throughout the day, medium frequency, high influence.
return {
"activity_level": 0.5,
"posts_per_hour": 0.8,
"comments_per_hour": 0.3,
"active_hours": list(range(7, 24)), # 07:00-23:59
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "observer",
"influence_weight": 2.5
}
elif entity_type in ["professor", "expert", "official"]:
# Experts / professors: active during work and evening, medium frequency.
return {
"activity_level": 0.4,
"posts_per_hour": 0.3,
"comments_per_hour": 0.5,
"active_hours": list(range(8, 22)), # 08:00-21:59
"response_delay_min": 15,
"response_delay_max": 90,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 2.0
}
elif entity_type in ["student"]:
# Students: mostly evening, high frequency.
return {
"activity_level": 0.8,
"posts_per_hour": 0.6,
"comments_per_hour": 1.5,
"active_hours": [8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # Morning + evening.
"response_delay_min": 1,
"response_delay_max": 15,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 0.8
}
elif entity_type in ["alumni"]:
# Alumni: mostly evening.
return {
"activity_level": 0.6,
"posts_per_hour": 0.4,
"comments_per_hour": 0.8,
"active_hours": [12, 13, 19, 20, 21, 22, 23], # Lunch break + evening.
"response_delay_min": 5,
"response_delay_max": 30,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}
else:
# General public: evening peak.
return {
"activity_level": 0.7,
"posts_per_hour": 0.5,
"comments_per_hour": 1.2,
"active_hours": [9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # Daytime + evening.
"response_delay_min": 2,
"response_delay_max": 20,
"sentiment_bias": 0.0,
"stance": "neutral",
"influence_weight": 1.0
}