MicroFish/backend/app/services/private_impact_runner.py

904 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Private Impact Runner
Orchestrates the Private Impact simulation via subprocess, monitors
private/actions.jsonl for real-time state updates, and exposes the
interface used by the Flask /api/private-impact blueprint.
Equivalent of simulation_runner.py for the Private Impact mode.
Key differences from SimulationRunner:
- Single platform: "private" (no Twitter/Reddit split)
- Action log: {sim_dir}/private/actions.jsonl
- Config file: private_simulation_config.json
- Script: backend/scripts/run_private_simulation.py
- Time unit: simulated days (not hours)
- Cleanup removes private_simulation.db + private/ directory
Note on SimulationLogManager.get_private_logger():
SimulationLogManager (backend/scripts/action_logger.py) does NOT currently
expose get_private_logger(). run_private_simulation.py falls back directly
to PlatformActionLogger("private", simulation_dir). This method must be added
to action_logger.py in a future prompt — see CONTEXT.md.
"""
import json
import os
import shutil
import signal
import subprocess
import sys
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from ..utils.logger import get_logger
from ..utils.locale import get_locale, set_locale
from .zep_graph_memory_updater import ZepGraphMemoryManager
logger = get_logger('mirofish.private_impact_runner')
IS_WINDOWS = sys.platform == 'win32'
# ── Enums ─────────────────────────────────────────────────────────────────────
class PrivateRunnerStatus(str, Enum):
"""Run state of the Private Impact simulation subprocess."""
IDLE = "idle"
STARTING = "starting"
RUNNING = "running"
STOPPING = "stopping"
STOPPED = "stopped"
COMPLETED = "completed"
FAILED = "failed"
# ── Dataclasses ───────────────────────────────────────────────────────────────
@dataclass
class PrivateAgentAction:
"""
Single relational action record parsed from private/actions.jsonl.
Equivalent of AgentAction for the private simulation mode.
No platform split — all actions are platform="private".
"""
round_num: int
timestamp: str
agent_id: int
agent_name: str
action_type: str # REACT_PRIVATELY | CONFRONT | COALITION_BUILD |
# SILENT_LEAVE | VOCAL_SUPPORT | DO_NOTHING
action_args: Dict[str, Any] = field(default_factory=dict)
result: Optional[str] = None
success: bool = True
def to_dict(self) -> Dict[str, Any]:
return {
"round_num": self.round_num,
"timestamp": self.timestamp,
"platform": "private",
"agent_id": self.agent_id,
"agent_name": self.agent_name,
"action_type": self.action_type,
"action_args": self.action_args,
"result": self.result,
"success": self.success,
}
@dataclass
class PrivateSimulationRunState:
"""
Real-time run state for a Private Impact simulation.
Equivalent of SimulationRunState for the private mode.
Uses private_* field names — no twitter_* / reddit_* split.
"""
simulation_id: str
runner_status: PrivateRunnerStatus = PrivateRunnerStatus.IDLE
# Progress
private_current_round: int = 0
private_total_rounds: int = 0
private_simulated_days: int = 0
private_total_days: int = 0
# Platform state (single: private)
private_running: bool = False
private_actions_count: int = 0
private_completed: bool = False
# Error
private_error: Optional[str] = None
# Recent actions for frontend live display
recent_actions: List[PrivateAgentAction] = field(default_factory=list)
max_recent_actions: int = 50
# Timestamps
started_at: Optional[str] = None
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
completed_at: Optional[str] = None
# Subprocess PID
process_pid: Optional[int] = None
def add_action(self, action: PrivateAgentAction) -> None:
"""Prepend action to recent_actions and increment actions counter."""
self.recent_actions.insert(0, action)
if len(self.recent_actions) > self.max_recent_actions:
self.recent_actions = self.recent_actions[:self.max_recent_actions]
self.private_actions_count += 1
self.updated_at = datetime.now().isoformat()
def to_dict(self) -> Dict[str, Any]:
total = max(self.private_total_rounds, 1)
return {
"simulation_id": self.simulation_id,
"runner_status": self.runner_status.value,
"private_current_round": self.private_current_round,
"private_total_rounds": self.private_total_rounds,
"private_simulated_days": self.private_simulated_days,
"private_total_days": self.private_total_days,
"progress_percent": round(self.private_current_round / total * 100, 1),
"private_running": self.private_running,
"private_actions_count": self.private_actions_count,
"private_completed": self.private_completed,
"private_error": self.private_error,
"started_at": self.started_at,
"updated_at": self.updated_at,
"completed_at": self.completed_at,
"process_pid": self.process_pid,
}
def to_detail_dict(self) -> Dict[str, Any]:
"""Extended dict including recent actions."""
result = self.to_dict()
result["recent_actions"] = [a.to_dict() for a in self.recent_actions]
return result
# ── PrivateImpactRunner ────────────────────────────────────────────────────────
class PrivateImpactRunner:
"""
Orchestrates Private Impact simulations.
Equivalent of SimulationRunner for the private relational mode.
Launches run_private_simulation.py as a subprocess, monitors
private/actions.jsonl for state updates, and exposes the interface
consumed by the Flask /api/private-impact blueprint.
Directory layout (under RUN_STATE_DIR/{simulation_id}/):
private_simulation_config.json — PrivateSimulationParameters.to_dict()
private/actions.jsonl — relational action log
simulation.log — subprocess stdout + stderr
run_state.json — persisted PrivateSimulationRunState
"""
RUN_STATE_DIR = os.path.join(
os.path.dirname(__file__),
'../../uploads/simulations'
)
SCRIPTS_DIR = os.path.join(
os.path.dirname(__file__),
'../../scripts'
)
CONFIG_FILENAME = "private_simulation_config.json"
SCRIPT_NAME = "run_private_simulation.py"
# Class-level in-memory state (same pattern as SimulationRunner)
_run_states: Dict[str, PrivateSimulationRunState] = {}
_processes: Dict[str, subprocess.Popen] = {}
_monitor_threads: Dict[str, threading.Thread] = {}
_stdout_files: Dict[str, Any] = {}
_graph_memory_enabled: Dict[str, bool] = {}
# ── Public API ─────────────────────────────────────────────────────────────
@classmethod
def get_status(cls, simulation_id: str) -> Optional[PrivateSimulationRunState]:
"""
Return the current run state for a simulation.
Checks in-memory cache first, then falls back to disk
(same pattern as SimulationRunner.get_run_state).
Args:
simulation_id: Simulation identifier.
Returns:
PrivateSimulationRunState or None if not found.
"""
if simulation_id in cls._run_states:
return cls._run_states[simulation_id]
return cls._load_run_state(simulation_id)
@classmethod
def start_simulation(
cls,
simulation_id: str,
max_rounds: Optional[int] = None,
enable_graph_memory_update: bool = False,
graph_id: Optional[str] = None,
) -> PrivateSimulationRunState:
"""
Launch the private impact simulation subprocess.
Same mechanics as SimulationRunner.start_simulation (L.387399):
- Reads private_simulation_config.json from the simulation directory
- Spawns run_private_simulation.py with start_new_session=True
- Redirects stdout/stderr to simulation.log
- Launches a background monitor thread
Args:
simulation_id: Unique simulation identifier.
max_rounds: Optional upper bound on simulation rounds.
enable_graph_memory_update: Push activity updates to Zep graph.
graph_id: Required when enable_graph_memory_update=True.
Returns:
PrivateSimulationRunState with status=STARTING.
Raises:
ValueError: If already running, config missing, or graph_id absent.
"""
existing = cls.get_status(simulation_id)
if existing and existing.runner_status in (
PrivateRunnerStatus.RUNNING, PrivateRunnerStatus.STARTING
):
raise ValueError(f"Private simulation already running: {simulation_id}")
sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id)
config_path = os.path.join(sim_dir, cls.CONFIG_FILENAME)
if not os.path.exists(config_path):
raise ValueError(
f"Private simulation config not found: {config_path}. "
"Call /prepare first to generate the config."
)
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
time_cfg = config.get("time_config", {})
total_days = time_cfg.get("total_simulation_days", 30)
rounds_per_day = time_cfg.get("rounds_per_day", 3)
total_rounds = total_days * rounds_per_day
if max_rounds is not None and max_rounds > 0:
total_rounds = min(total_rounds, max_rounds)
logger.info(
f"[PRIVATE] Rounds capped to {total_rounds} "
f"(max_rounds={max_rounds})"
)
state = PrivateSimulationRunState(
simulation_id=simulation_id,
runner_status=PrivateRunnerStatus.STARTING,
private_total_rounds=total_rounds,
private_total_days=total_days,
private_running=True,
started_at=datetime.now().isoformat(),
)
cls._save_run_state(state)
# Optional Zep graph memory update
if enable_graph_memory_update:
if not graph_id:
raise ValueError(
"graph_id is required when enable_graph_memory_update=True"
)
try:
ZepGraphMemoryManager.create_updater(simulation_id, graph_id)
cls._graph_memory_enabled[simulation_id] = True
logger.info(
f"[PRIVATE] Graph memory update enabled: "
f"simulation_id={simulation_id}, graph_id={graph_id}"
)
except Exception as e:
logger.error(f"[PRIVATE] Failed to create graph memory updater: {e}")
cls._graph_memory_enabled[simulation_id] = False
else:
cls._graph_memory_enabled[simulation_id] = False
script_path = os.path.join(cls.SCRIPTS_DIR, cls.SCRIPT_NAME)
if not os.path.exists(script_path):
raise ValueError(f"Script not found: {script_path}")
try:
cmd = [sys.executable, script_path, "--config", config_path]
if max_rounds is not None and max_rounds > 0:
cmd.extend(["--max-rounds", str(max_rounds)])
main_log_path = os.path.join(sim_dir, "simulation.log")
main_log_file = open(main_log_path, 'w', encoding='utf-8')
env = os.environ.copy()
env['PYTHONUTF8'] = '1'
env['PYTHONIOENCODING'] = 'utf-8'
process = subprocess.Popen(
cmd,
cwd=sim_dir,
stdout=main_log_file,
stderr=subprocess.STDOUT,
text=True,
encoding='utf-8',
bufsize=1,
env=env,
start_new_session=True,
)
cls._stdout_files[simulation_id] = main_log_file
state.process_pid = process.pid
state.runner_status = PrivateRunnerStatus.RUNNING
cls._processes[simulation_id] = process
cls._save_run_state(state)
current_locale = get_locale()
monitor_thread = threading.Thread(
target=cls._monitor_simulation,
args=(simulation_id, current_locale),
daemon=True,
)
monitor_thread.start()
cls._monitor_threads[simulation_id] = monitor_thread
logger.info(
f"[PRIVATE] Simulation started: {simulation_id}, "
f"pid={process.pid}, total_rounds={total_rounds}, "
f"total_days={total_days}"
)
except Exception as e:
state.runner_status = PrivateRunnerStatus.FAILED
state.private_error = str(e)
state.private_running = False
cls._save_run_state(state)
raise
return state
@classmethod
def stop_simulation(cls, simulation_id: str) -> PrivateSimulationRunState:
"""
Stop a running private simulation with a clean SIGTERM.
Same mechanics as SimulationRunner.stop_simulation.
Args:
simulation_id: Simulation identifier.
Returns:
Updated PrivateSimulationRunState with status=STOPPED.
Raises:
ValueError: If simulation does not exist or is not running.
"""
state = cls.get_status(simulation_id)
if not state:
raise ValueError(f"Private simulation not found: {simulation_id}")
if state.runner_status not in (
PrivateRunnerStatus.RUNNING, PrivateRunnerStatus.STARTING
):
raise ValueError(
f"Private simulation is not running: "
f"{simulation_id}, status={state.runner_status}"
)
state.runner_status = PrivateRunnerStatus.STOPPING
cls._save_run_state(state)
process = cls._processes.get(simulation_id)
if process and process.poll() is None:
try:
cls._terminate_process(process, simulation_id)
except ProcessLookupError:
pass
except Exception as e:
logger.error(f"[PRIVATE] Terminate failed: {simulation_id}, {e}")
try:
process.terminate()
process.wait(timeout=5)
except Exception:
process.kill()
state.runner_status = PrivateRunnerStatus.STOPPED
state.private_running = False
state.completed_at = datetime.now().isoformat()
cls._save_run_state(state)
if cls._graph_memory_enabled.get(simulation_id, False):
try:
ZepGraphMemoryManager.stop_updater(simulation_id)
except Exception as e:
logger.error(f"[PRIVATE] Failed to stop graph updater: {e}")
cls._graph_memory_enabled.pop(simulation_id, None)
logger.info(f"[PRIVATE] Simulation stopped: {simulation_id}")
return state
@classmethod
def get_all_actions(
cls,
simulation_id: str,
agent_id: Optional[int] = None,
round_num: Optional[int] = None,
) -> List[PrivateAgentAction]:
"""
Read the complete private/actions.jsonl action history.
Args:
simulation_id: Simulation identifier.
agent_id: Optional filter by agent ID.
round_num: Optional filter by round number.
Returns:
List of PrivateAgentAction sorted by timestamp descending.
"""
sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id)
log_path = os.path.join(sim_dir, "private", "actions.jsonl")
actions = cls._read_actions_from_file(
log_path, agent_id=agent_id, round_num=round_num
)
actions.sort(key=lambda a: a.timestamp, reverse=True)
return actions
@classmethod
def cleanup(cls, simulation_id: str) -> Dict[str, Any]:
"""
Remove private simulation artifacts to allow a fresh restart.
Deletes:
- run_state.json
- simulation.log
- private_simulation.db
- private/ directory (contains actions.jsonl)
Does NOT delete: private_simulation_config.json, profile files.
Args:
simulation_id: Simulation identifier.
Returns:
Dict with keys: success (bool), cleaned_files (list), errors (list|None).
"""
sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id)
if not os.path.exists(sim_dir):
return {"success": True, "cleaned_files": [], "errors": None}
cleaned: List[str] = []
errors: List[str] = []
for filename in ("run_state.json", "simulation.log", "private_simulation.db"):
path = os.path.join(sim_dir, filename)
if os.path.exists(path):
try:
os.remove(path)
cleaned.append(filename)
except Exception as e:
errors.append(f"Failed to delete {filename}: {e}")
private_dir = os.path.join(sim_dir, "private")
if os.path.exists(private_dir):
try:
shutil.rmtree(private_dir)
cleaned.append("private/")
except Exception as e:
errors.append(f"Failed to delete private/: {e}")
cls._run_states.pop(simulation_id, None)
logger.info(
f"[PRIVATE] Cleanup done: {simulation_id}, removed={cleaned}"
)
return {
"success": len(errors) == 0,
"cleaned_files": cleaned,
"errors": errors or None,
}
# ── Internal: monitor thread ───────────────────────────────────────────────
@classmethod
def _monitor_simulation(cls, simulation_id: str, locale: str = 'en') -> None:
"""
Background thread: poll private/actions.jsonl until subprocess exits.
Same pattern as SimulationRunner._monitor_simulation (L.482581):
- Loops while process is alive, reading new log lines every 2 s
- Performs a final read after process exit
- Sets COMPLETED or FAILED based on exit code
- Stops graph memory updater in finally block
Args:
simulation_id: Simulation identifier.
locale: Locale inherited from the calling thread.
"""
set_locale(locale)
sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id)
private_log = os.path.join(sim_dir, "private", "actions.jsonl")
process = cls._processes.get(simulation_id)
state = cls.get_status(simulation_id)
if not process or not state:
return
log_position = 0
try:
while process.poll() is None:
if os.path.exists(private_log):
log_position = cls._read_action_log(
private_log, log_position, state
)
cls._save_run_state(state)
time.sleep(2)
# Final read after process exits
if os.path.exists(private_log):
cls._read_action_log(private_log, log_position, state)
exit_code = process.returncode
if exit_code == 0:
state.runner_status = PrivateRunnerStatus.COMPLETED
state.completed_at = datetime.now().isoformat()
logger.info(f"[PRIVATE] Simulation completed: {simulation_id}")
else:
state.runner_status = PrivateRunnerStatus.FAILED
main_log = os.path.join(sim_dir, "simulation.log")
error_tail = ""
try:
if os.path.exists(main_log):
with open(main_log, 'r', encoding='utf-8') as f:
error_tail = f.read()[-2000:]
except Exception:
pass
state.private_error = (
f"Process exited with code {exit_code}. "
f"Last log output: {error_tail}"
)
logger.error(
f"[PRIVATE] Simulation failed: {simulation_id}, "
f"exit_code={exit_code}"
)
state.private_running = False
cls._save_run_state(state)
except Exception as e:
logger.error(f"[PRIVATE] Monitor thread error: {simulation_id}, {e}")
state.runner_status = PrivateRunnerStatus.FAILED
state.private_error = str(e)
cls._save_run_state(state)
finally:
if cls._graph_memory_enabled.get(simulation_id, False):
try:
ZepGraphMemoryManager.stop_updater(simulation_id)
logger.info(
f"[PRIVATE] Graph memory updater stopped: {simulation_id}"
)
except Exception as e:
logger.error(
f"[PRIVATE] Failed to stop graph updater: {e}"
)
cls._graph_memory_enabled.pop(simulation_id, None)
cls._processes.pop(simulation_id, None)
if simulation_id in cls._stdout_files:
try:
cls._stdout_files[simulation_id].close()
except Exception:
pass
cls._stdout_files.pop(simulation_id, None)
# ── Internal: log reader ───────────────────────────────────────────────────
@classmethod
def _read_action_log(
cls,
log_path: str,
position: int,
state: PrivateSimulationRunState,
) -> int:
"""
Incremental read of private/actions.jsonl from a byte offset.
Same pattern as SimulationRunner._read_action_log (L.683684):
- Seeks to last read position, reads new lines only
- Calls ZepGraphMemoryUpdater.add_activity_from_dict(data, "private")
- Handles round_end and simulation_end event entries
Args:
log_path: Absolute path to private/actions.jsonl.
position: Byte offset of the previous read.
state: Mutable run state to update in place.
Returns:
New byte offset after reading.
"""
graph_memory_enabled = cls._graph_memory_enabled.get(
state.simulation_id, False
)
graph_updater = None
if graph_memory_enabled:
graph_updater = ZepGraphMemoryManager.get_updater(state.simulation_id)
try:
with open(log_path, 'r', encoding='utf-8') as f:
f.seek(position)
for line in f:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
# Structured event entries (no agent_id)
if "event_type" in data:
event_type = data["event_type"]
if event_type == "simulation_end":
state.private_completed = True
state.private_running = False
state.runner_status = PrivateRunnerStatus.COMPLETED
state.completed_at = datetime.now().isoformat()
logger.info(
f"[PRIVATE] simulation_end received: "
f"{state.simulation_id}, "
f"total_rounds={data.get('total_rounds')}, "
f"total_actions={data.get('total_actions')}"
)
elif event_type == "round_end":
round_num = data.get("round", 0)
if round_num > state.private_current_round:
state.private_current_round = round_num
# simulated_day may be written by run_private_simulation.py
simulated_day = data.get("simulated_day", 0)
if simulated_day > state.private_simulated_days:
state.private_simulated_days = simulated_day
continue
# Skip non-agent entries
if "agent_id" not in data:
continue
action = PrivateAgentAction(
round_num=data.get("round", 0),
timestamp=data.get(
"timestamp", datetime.now().isoformat()
),
agent_id=data.get("agent_id", 0),
agent_name=data.get("agent_name", ""),
action_type=data.get("action_type", ""),
action_args=data.get("action_args", {}),
result=data.get("result"),
success=data.get("success", True),
)
state.add_action(action)
if action.round_num > state.private_current_round:
state.private_current_round = action.round_num
# Push to Zep graph memory with platform="private"
if graph_updater:
graph_updater.add_activity_from_dict(data, "private")
except json.JSONDecodeError:
pass
return f.tell()
except Exception as e:
logger.warning(
f"[PRIVATE] Failed to read action log: {log_path}, {e}"
)
return position
# ── Internal: persistence ─────────────────────────────────────────────────
@classmethod
def _save_run_state(cls, state: PrivateSimulationRunState) -> None:
"""Persist run state to run_state.json and update in-memory cache."""
sim_dir = os.path.join(cls.RUN_STATE_DIR, state.simulation_id)
os.makedirs(sim_dir, exist_ok=True)
state_file = os.path.join(sim_dir, "run_state.json")
with open(state_file, 'w', encoding='utf-8') as f:
json.dump(state.to_detail_dict(), f, ensure_ascii=False, indent=2)
cls._run_states[state.simulation_id] = state
@classmethod
def _load_run_state(
cls, simulation_id: str
) -> Optional[PrivateSimulationRunState]:
"""
Load run state from disk.
Same pattern as SimulationRunner._load_run_state.
Args:
simulation_id: Simulation identifier.
Returns:
PrivateSimulationRunState or None on failure / missing file.
"""
state_file = os.path.join(
cls.RUN_STATE_DIR, simulation_id, "run_state.json"
)
if not os.path.exists(state_file):
return None
try:
with open(state_file, 'r', encoding='utf-8') as f:
data = json.load(f)
state = PrivateSimulationRunState(
simulation_id=simulation_id,
runner_status=PrivateRunnerStatus(
data.get("runner_status", "idle")
),
private_current_round=data.get("private_current_round", 0),
private_total_rounds=data.get("private_total_rounds", 0),
private_simulated_days=data.get("private_simulated_days", 0),
private_total_days=data.get("private_total_days", 0),
private_running=data.get("private_running", False),
private_actions_count=data.get("private_actions_count", 0),
private_completed=data.get("private_completed", False),
private_error=data.get("private_error"),
started_at=data.get("started_at"),
updated_at=data.get("updated_at", datetime.now().isoformat()),
completed_at=data.get("completed_at"),
process_pid=data.get("process_pid"),
)
for a in data.get("recent_actions", []):
state.recent_actions.append(PrivateAgentAction(
round_num=a.get("round_num", 0),
timestamp=a.get("timestamp", ""),
agent_id=a.get("agent_id", 0),
agent_name=a.get("agent_name", ""),
action_type=a.get("action_type", ""),
action_args=a.get("action_args", {}),
result=a.get("result"),
success=a.get("success", True),
))
cls._run_states[simulation_id] = state
return state
except Exception as e:
logger.error(
f"[PRIVATE] Failed to load run state: {simulation_id}, {e}"
)
return None
@classmethod
def _read_actions_from_file(
cls,
file_path: str,
agent_id: Optional[int] = None,
round_num: Optional[int] = None,
) -> List[PrivateAgentAction]:
"""
Read all agent actions from a JSONL file with optional filters.
Args:
file_path: Path to actions.jsonl.
agent_id: Optional filter by agent ID.
round_num: Optional filter by round number.
Returns:
List of PrivateAgentAction instances.
"""
if not os.path.exists(file_path):
return []
actions: List[PrivateAgentAction] = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
if "event_type" in data:
continue
if "agent_id" not in data:
continue
if agent_id is not None and data.get("agent_id") != agent_id:
continue
if round_num is not None and data.get("round") != round_num:
continue
actions.append(PrivateAgentAction(
round_num=data.get("round", 0),
timestamp=data.get("timestamp", ""),
agent_id=data.get("agent_id", 0),
agent_name=data.get("agent_name", ""),
action_type=data.get("action_type", ""),
action_args=data.get("action_args", {}),
result=data.get("result"),
success=data.get("success", True),
))
except json.JSONDecodeError:
continue
return actions
# ── Internal: process management ──────────────────────────────────────────
@classmethod
def _terminate_process(
cls,
process: subprocess.Popen,
simulation_id: str,
timeout: int = 10,
) -> None:
"""
Terminate subprocess and its children cross-platform.
Same implementation as SimulationRunner._terminate_process:
- Windows: taskkill /PID /T, then /F if unresponsive
- Unix: SIGTERM to process group, SIGKILL on timeout
Args:
process: Subprocess to terminate.
simulation_id: Simulation ID for logging.
timeout: Seconds to wait before force-killing.
"""
if IS_WINDOWS:
logger.info(
f"[PRIVATE] Terminating process tree (Windows): "
f"simulation={simulation_id}, pid={process.pid}"
)
try:
subprocess.run(
['taskkill', '/PID', str(process.pid), '/T'],
capture_output=True, timeout=5
)
try:
process.wait(timeout=timeout)
except subprocess.TimeoutExpired:
logger.warning(
f"[PRIVATE] Process unresponsive, force killing: "
f"{simulation_id}"
)
subprocess.run(
['taskkill', '/F', '/PID', str(process.pid), '/T'],
capture_output=True, timeout=5
)
process.wait(timeout=5)
except Exception as e:
logger.warning(f"[PRIVATE] taskkill failed, falling back: {e}")
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
else:
pgid = os.getpgid(process.pid)
logger.info(
f"[PRIVATE] Terminating process group (Unix): "
f"simulation={simulation_id}, pgid={pgid}"
)
os.killpg(pgid, signal.SIGTERM)
try:
process.wait(timeout=timeout)
except subprocess.TimeoutExpired:
logger.warning(
f"[PRIVATE] Process group unresponsive, force killing: "
f"{simulation_id}"
)
os.killpg(pgid, signal.SIGKILL)
process.wait(timeout=5)