""" OASIS simulation manager Manages parallel simulation on both Twitter and Reddit platforms. Uses preset scripts with LLM-generated configuration parameters. """ import os import json import shutil from typing import Dict, Any, List, Optional from dataclasses import dataclass, field from datetime import datetime from enum import Enum from ..config import Config from ..utils.logger import get_logger from .zep_entity_reader import ZepEntityReader, FilteredEntities from .oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile from .simulation_config_generator import SimulationConfigGenerator, SimulationParameters from ..utils.locale import t logger = get_logger('mirofish.simulation') class SimulationStatus(str, Enum): """Simulation status""" CREATED = "created" PREPARING = "preparing" READY = "ready" RUNNING = "running" PAUSED = "paused" STOPPED = "stopped" # Simulation manually stopped COMPLETED = "completed" # Simulation naturally completed FAILED = "failed" class PlatformType(str, Enum): """Platform type""" TWITTER = "twitter" REDDIT = "reddit" @dataclass class SimulationState: """Simulation state""" simulation_id: str project_id: str graph_id: str # Platform enable flags enable_twitter: bool = True enable_reddit: bool = True # Status status: SimulationStatus = SimulationStatus.CREATED # Preparation phase data entities_count: int = 0 profiles_count: int = 0 entity_types: List[str] = field(default_factory=list) # Config generation info config_generated: bool = False config_reasoning: str = "" # Runtime data current_round: int = 0 twitter_status: str = "not_started" reddit_status: str = "not_started" # Timestamps created_at: str = field(default_factory=lambda: datetime.now().isoformat()) updated_at: str = field(default_factory=lambda: datetime.now().isoformat()) # Error message error: Optional[str] = None def to_dict(self) -> Dict[str, Any]: """Full state dictionary (internal use)""" return { "simulation_id": self.simulation_id, "project_id": self.project_id, "graph_id": self.graph_id, "enable_twitter": self.enable_twitter, "enable_reddit": self.enable_reddit, "status": self.status.value, "entities_count": self.entities_count, "profiles_count": self.profiles_count, "entity_types": self.entity_types, "config_generated": self.config_generated, "config_reasoning": self.config_reasoning, "current_round": self.current_round, "twitter_status": self.twitter_status, "reddit_status": self.reddit_status, "created_at": self.created_at, "updated_at": self.updated_at, "error": self.error, } def to_simple_dict(self) -> Dict[str, Any]: """Simplified state dictionary (used for API responses)""" return { "simulation_id": self.simulation_id, "project_id": self.project_id, "graph_id": self.graph_id, "status": self.status.value, "entities_count": self.entities_count, "profiles_count": self.profiles_count, "entity_types": self.entity_types, "config_generated": self.config_generated, "error": self.error, } class SimulationManager: """ Simulation manager Core functions: 1. Read and filter entities from the Zep graph 2. Generate OASIS Agent Profiles 3. Use LLM to intelligently generate simulation configuration parameters 4. Prepare all files required by the preset scripts """ # Simulation data storage directory SIMULATION_DATA_DIR = os.path.join( os.path.dirname(__file__), '../../uploads/simulations' ) def __init__(self): # Ensure directory exists os.makedirs(self.SIMULATION_DATA_DIR, exist_ok=True) # In-memory simulation state cache self._simulations: Dict[str, SimulationState] = {} def _get_simulation_dir(self, simulation_id: str) -> str: """Get the simulation data directory""" sim_dir = os.path.join(self.SIMULATION_DATA_DIR, simulation_id) os.makedirs(sim_dir, exist_ok=True) return sim_dir def _save_simulation_state(self, state: SimulationState): """Save simulation state to file""" sim_dir = self._get_simulation_dir(state.simulation_id) state_file = os.path.join(sim_dir, "state.json") state.updated_at = datetime.now().isoformat() with open(state_file, 'w', encoding='utf-8') as f: json.dump(state.to_dict(), f, ensure_ascii=False, indent=2) self._simulations[state.simulation_id] = state def _load_simulation_state(self, simulation_id: str) -> Optional[SimulationState]: """Load simulation state from file""" if simulation_id in self._simulations: return self._simulations[simulation_id] sim_dir = self._get_simulation_dir(simulation_id) state_file = os.path.join(sim_dir, "state.json") if not os.path.exists(state_file): return None with open(state_file, 'r', encoding='utf-8') as f: data = json.load(f) state = SimulationState( simulation_id=simulation_id, project_id=data.get("project_id", ""), graph_id=data.get("graph_id", ""), enable_twitter=data.get("enable_twitter", True), enable_reddit=data.get("enable_reddit", True), status=SimulationStatus(data.get("status", "created")), entities_count=data.get("entities_count", 0), profiles_count=data.get("profiles_count", 0), entity_types=data.get("entity_types", []), config_generated=data.get("config_generated", False), config_reasoning=data.get("config_reasoning", ""), current_round=data.get("current_round", 0), twitter_status=data.get("twitter_status", "not_started"), reddit_status=data.get("reddit_status", "not_started"), created_at=data.get("created_at", datetime.now().isoformat()), updated_at=data.get("updated_at", datetime.now().isoformat()), error=data.get("error"), ) self._simulations[simulation_id] = state return state def create_simulation( self, project_id: str, graph_id: str, enable_twitter: bool = True, enable_reddit: bool = True, ) -> SimulationState: """ Create a new simulation. Args: project_id: project ID graph_id: Zep graph ID enable_twitter: whether to enable Twitter simulation enable_reddit: whether to enable Reddit simulation Returns: SimulationState """ import uuid simulation_id = f"sim_{uuid.uuid4().hex[:12]}" state = SimulationState( simulation_id=simulation_id, project_id=project_id, graph_id=graph_id, enable_twitter=enable_twitter, enable_reddit=enable_reddit, status=SimulationStatus.CREATED, ) self._save_simulation_state(state) logger.info(f"Simulation created: {simulation_id}, project={project_id}, graph={graph_id}") return state def prepare_simulation( self, simulation_id: str, simulation_requirement: str, document_text: str, defined_entity_types: Optional[List[str]] = None, use_llm_for_profiles: bool = True, progress_callback: Optional[callable] = None, parallel_profile_count: int = 3 ) -> SimulationState: """ Prepare the simulation environment (fully automated). Steps: 1. Read and filter entities from the Zep graph 2. Generate an OASIS Agent Profile for each entity (optional LLM enhancement, supports parallelism) 3. Use LLM to intelligently generate simulation configuration parameters (time, activity level, posting frequency, etc.) 4. Save configuration files and profile files 5. Copy preset scripts to the simulation directory Args: simulation_id: simulation ID simulation_requirement: simulation requirement description (used for LLM config generation) document_text: original document content (used for LLM background understanding) defined_entity_types: predefined entity types (optional) use_llm_for_profiles: whether to use LLM to generate detailed personas progress_callback: progress callback function (stage, progress, message) parallel_profile_count: number of profiles to generate in parallel, default 3 Returns: SimulationState """ state = self._load_simulation_state(simulation_id) if not state: raise ValueError(f"Simulation not found: {simulation_id}") try: state.status = SimulationStatus.PREPARING self._save_simulation_state(state) sim_dir = self._get_simulation_dir(simulation_id) # ========== Stage 1: Read and filter entities ========== if progress_callback: progress_callback("reading", 0, t('progress.connectingZepGraph')) reader = ZepEntityReader() if progress_callback: progress_callback("reading", 30, t('progress.readingNodeData')) filtered = reader.filter_defined_entities( graph_id=state.graph_id, defined_entity_types=defined_entity_types, enrich_with_edges=True ) state.entities_count = filtered.filtered_count state.entity_types = list(filtered.entity_types) if progress_callback: progress_callback( "reading", 100, t('progress.readingComplete', count=filtered.filtered_count), current=filtered.filtered_count, total=filtered.filtered_count ) if filtered.filtered_count == 0: state.status = SimulationStatus.FAILED state.error = "No qualifying entities found. Please check that the graph was built correctly." self._save_simulation_state(state) return state # ========== Stage 2: Generate Agent Profiles ========== total_entities = len(filtered.entities) if progress_callback: progress_callback( "generating_profiles", 0, t('progress.startGenerating'), current=0, total=total_entities ) # Pass graph_id to enable Zep retrieval for richer context generator = OasisProfileGenerator(graph_id=state.graph_id) def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_profiles", int(current / total * 100), msg, current=current, total=total, item_name=msg ) # Set real-time save path (prefer Reddit JSON format) realtime_output_path = None realtime_platform = "reddit" if state.enable_reddit: realtime_output_path = os.path.join(sim_dir, "reddit_profiles.json") realtime_platform = "reddit" elif state.enable_twitter: realtime_output_path = os.path.join(sim_dir, "twitter_profiles.csv") realtime_platform = "twitter" profiles = generator.generate_profiles_from_entities( entities=filtered.entities, use_llm=use_llm_for_profiles, progress_callback=profile_progress, graph_id=state.graph_id, # Pass graph_id for Zep retrieval parallel_count=parallel_profile_count, # Parallel generation count realtime_output_path=realtime_output_path, # Real-time save path output_platform=realtime_platform # Output format ) state.profiles_count = len(profiles) # Save profile files (note: Twitter uses CSV format, Reddit uses JSON format) # Reddit has already been saved incrementally during generation; save once more to ensure completeness if progress_callback: progress_callback( "generating_profiles", 95, t('progress.savingProfiles'), current=total_entities, total=total_entities ) if state.enable_reddit: generator.save_profiles( profiles=profiles, file_path=os.path.join(sim_dir, "reddit_profiles.json"), platform="reddit" ) if state.enable_twitter: # Twitter uses CSV format — this is a requirement of OASIS generator.save_profiles( profiles=profiles, file_path=os.path.join(sim_dir, "twitter_profiles.csv"), platform="twitter" ) if progress_callback: progress_callback( "generating_profiles", 100, t('progress.profilesComplete', count=len(profiles)), current=len(profiles), total=len(profiles) ) # ========== Stage 3: LLM intelligent simulation configuration generation ========== if progress_callback: progress_callback( "generating_config", 0, t('progress.analyzingRequirements'), current=0, total=3 ) config_generator = SimulationConfigGenerator() if progress_callback: progress_callback( "generating_config", 30, t('progress.callingLLMConfig'), current=1, total=3 ) sim_params = config_generator.generate_config( simulation_id=simulation_id, project_id=state.project_id, graph_id=state.graph_id, simulation_requirement=simulation_requirement, document_text=document_text, entities=filtered.entities, enable_twitter=state.enable_twitter, enable_reddit=state.enable_reddit ) if progress_callback: progress_callback( "generating_config", 70, t('progress.savingConfigFiles'), current=2, total=3 ) # Save configuration file config_path = os.path.join(sim_dir, "simulation_config.json") with open(config_path, 'w', encoding='utf-8') as f: f.write(sim_params.to_json()) state.config_generated = True state.config_reasoning = sim_params.generation_reasoning if progress_callback: progress_callback( "generating_config", 100, t('progress.configComplete'), current=3, total=3 ) # Note: run scripts remain in backend/scripts/; they are not copied to the simulation directory. # When starting a simulation, simulation_runner runs scripts from the scripts/ directory. # Update status state.status = SimulationStatus.READY self._save_simulation_state(state) logger.info(f"Simulation preparation complete: {simulation_id}, " f"entities={state.entities_count}, profiles={state.profiles_count}") return state except Exception as e: logger.error(f"Simulation preparation failed: {simulation_id}, error={str(e)}") import traceback logger.error(traceback.format_exc()) state.status = SimulationStatus.FAILED state.error = str(e) self._save_simulation_state(state) raise def get_simulation(self, simulation_id: str) -> Optional[SimulationState]: """Get simulation state""" return self._load_simulation_state(simulation_id) def list_simulations(self, project_id: Optional[str] = None) -> List[SimulationState]: """List all simulations""" simulations = [] if os.path.exists(self.SIMULATION_DATA_DIR): for sim_id in os.listdir(self.SIMULATION_DATA_DIR): # Skip hidden files (e.g. .DS_Store) and non-directory entries sim_path = os.path.join(self.SIMULATION_DATA_DIR, sim_id) if sim_id.startswith('.') or not os.path.isdir(sim_path): continue state = self._load_simulation_state(sim_id) if state: if project_id is None or state.project_id == project_id: simulations.append(state) return simulations def get_profiles(self, simulation_id: str, platform: str = "reddit") -> List[Dict[str, Any]]: """Get agent profiles for a simulation""" state = self._load_simulation_state(simulation_id) if not state: raise ValueError(f"Simulation not found: {simulation_id}") sim_dir = self._get_simulation_dir(simulation_id) profile_path = os.path.join(sim_dir, f"{platform}_profiles.json") if not os.path.exists(profile_path): return [] with open(profile_path, 'r', encoding='utf-8') as f: return json.load(f) def get_simulation_config(self, simulation_id: str) -> Optional[Dict[str, Any]]: """Get simulation configuration""" sim_dir = self._get_simulation_dir(simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") if not os.path.exists(config_path): return None with open(config_path, 'r', encoding='utf-8') as f: return json.load(f) def get_run_instructions(self, simulation_id: str) -> Dict[str, str]: """Get run instructions""" sim_dir = self._get_simulation_dir(simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") scripts_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../scripts')) return { "simulation_dir": sim_dir, "scripts_dir": scripts_dir, "config_file": config_path, "commands": { "twitter": f"python {scripts_dir}/run_twitter_simulation.py --config {config_path}", "reddit": f"python {scripts_dir}/run_reddit_simulation.py --config {config_path}", "parallel": f"python {scripts_dir}/run_parallel_simulation.py --config {config_path}", }, "instructions": ( f"1. Activate conda environment: conda activate MiroFish\n" f"2. Run simulation (scripts located at {scripts_dir}):\n" f" - Twitter only: python {scripts_dir}/run_twitter_simulation.py --config {config_path}\n" f" - Reddit only: python {scripts_dir}/run_reddit_simulation.py --config {config_path}\n" f" - Both platforms in parallel: python {scripts_dir}/run_parallel_simulation.py --config {config_path}" ) }