Merge 895a5fbaee into 96096ea0ff
This commit is contained in:
commit
609ea6c5bd
|
|
@ -48,6 +48,20 @@ def create_app(config_class=Config):
|
|||
if should_log_startup:
|
||||
logger.info("已注册模拟进程清理函数")
|
||||
|
||||
# Install interview lifecycle hooks on the SimulationManager class.
|
||||
# Hooks are stored on the class itself (not on a particular instance), so
|
||||
# any fresh `SimulationManager()` constructed later (e.g. per request in
|
||||
# the Flask API) will see them. We still bridge `_notify_on_completed`
|
||||
# into SimulationRunner via a transient instance so the runner's monitor
|
||||
# thread fires the completed hooks when a simulation process exits.
|
||||
from .services.simulation_manager import SimulationManager
|
||||
from .services.interviews.lifecycle import install_hooks
|
||||
|
||||
install_hooks(SimulationManager)
|
||||
SimulationRunner.register_on_completed(SimulationManager()._notify_on_completed)
|
||||
if should_log_startup:
|
||||
logger.info("已安装面试生命周期钩子")
|
||||
|
||||
# 请求日志中间件
|
||||
@app.before_request
|
||||
def log_request():
|
||||
|
|
@ -63,10 +77,8 @@ def create_app(config_class=Config):
|
|||
return response
|
||||
|
||||
# 注册蓝图
|
||||
from .api import graph_bp, simulation_bp, report_bp
|
||||
app.register_blueprint(graph_bp, url_prefix='/api/graph')
|
||||
app.register_blueprint(simulation_bp, url_prefix='/api/simulation')
|
||||
app.register_blueprint(report_bp, url_prefix='/api/report')
|
||||
from .api import register_blueprints
|
||||
register_blueprints(app)
|
||||
|
||||
# 健康检查
|
||||
@app.route('/health')
|
||||
|
|
|
|||
|
|
@ -2,13 +2,22 @@
|
|||
API路由模块
|
||||
"""
|
||||
|
||||
from flask import Blueprint
|
||||
from flask import Blueprint, Flask
|
||||
|
||||
graph_bp = Blueprint('graph', __name__)
|
||||
simulation_bp = Blueprint('simulation', __name__)
|
||||
report_bp = Blueprint('report', __name__)
|
||||
interview_bp = Blueprint('interview', __name__)
|
||||
|
||||
from . import graph # noqa: E402, F401
|
||||
from . import simulation # noqa: E402, F401
|
||||
from . import report # noqa: E402, F401
|
||||
from . import interview # noqa: E402, F401
|
||||
|
||||
|
||||
def register_blueprints(app: Flask) -> None:
|
||||
"""Register all API blueprints on *app* with their canonical URL prefixes."""
|
||||
app.register_blueprint(graph_bp, url_prefix='/api/graph')
|
||||
app.register_blueprint(simulation_bp, url_prefix='/api/simulation')
|
||||
app.register_blueprint(report_bp, url_prefix='/api/report')
|
||||
app.register_blueprint(interview_bp, url_prefix='/api/interview')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,225 @@
|
|||
from __future__ import annotations
|
||||
import threading
|
||||
import traceback
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from flask import Blueprint, jsonify, request, send_file
|
||||
from app.config import Config
|
||||
from app.models.interview import SubagentKind, InterviewPhase
|
||||
from app.services.interviews.adapters import FileSystemPersonaProvider, ZepMemoryProvider
|
||||
from app.services.interviews.zep_writer import InterviewZepWriter
|
||||
from app.services.interview_orchestrator import InterviewOrchestrator
|
||||
from app.services.interview_synthesizer import InterviewSynthesizer
|
||||
from app.services.interviews.storage import InterviewStore
|
||||
from app.utils.llm_client import LLMClient
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
from . import interview_bp
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _NullUpdater:
|
||||
"""No-op stand-in for ``ZepGraphMemoryUpdater`` used when Zep is unavailable.
|
||||
|
||||
Exposes ``add_text_episode`` so ``InterviewZepWriter._emit`` succeeds silently —
|
||||
the interview pipeline still produces local artefacts; Zep just isn't updated.
|
||||
"""
|
||||
|
||||
def add_text_episode(self, graph_id, text): # noqa: ARG002 - matches real API
|
||||
return None
|
||||
|
||||
|
||||
class _NullMemory:
|
||||
"""Fallback memory provider that always reports unavailable digests."""
|
||||
|
||||
def get_digest(self, agent_id, max_chars=2000): # noqa: ARG002 - matches Protocol
|
||||
from app.services.interviews.base import MemoryDigest
|
||||
return MemoryDigest(text="[memory unavailable]", available=False)
|
||||
|
||||
_TASKS: dict[str, dict] = {}
|
||||
_LOCK = threading.Lock()
|
||||
|
||||
INSTRUMENT_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
|
||||
|
||||
|
||||
def _uploads_root() -> Path:
|
||||
return Path(getattr(Config, "UPLOADS_DIR", "uploads"))
|
||||
|
||||
|
||||
def _load_graph_id(sim_id: str) -> str:
|
||||
"""Read the Zep ``graph_id`` for a simulation from its persisted state.
|
||||
|
||||
The graph_id is written by ``SimulationManager`` into
|
||||
``uploads/simulations/{sim_id}/state.json``. Returns ``""`` if the state
|
||||
file is missing or unreadable — callers should treat empty graph_id as
|
||||
"Zep unavailable" and fall back to the null memory/writer path.
|
||||
"""
|
||||
try:
|
||||
from app.services.simulation_manager import SimulationManager
|
||||
state = SimulationManager().get_simulation(sim_id)
|
||||
if state and state.graph_id:
|
||||
return state.graph_id
|
||||
except Exception as e: # pragma: no cover - defensive
|
||||
logger.warning(f"_load_graph_id({sim_id}) failed: {e!r}")
|
||||
return ""
|
||||
|
||||
|
||||
def _build_orchestrator(sim_id: str) -> InterviewOrchestrator:
|
||||
sim_dir = _uploads_root() / "simulations" / sim_id
|
||||
reddit = sim_dir / "reddit_profiles.json"
|
||||
twitter = sim_dir / "twitter_profiles.csv"
|
||||
personas = FileSystemPersonaProvider(
|
||||
reddit_path=reddit if reddit.exists() else None,
|
||||
twitter_path=twitter if twitter.exists() else None,
|
||||
)
|
||||
# Build agent_id -> Zep entity uuid map from the persisted profile files.
|
||||
agent_to_entity = personas.agent_to_entity()
|
||||
|
||||
# Resolve the graph_id from the simulation's persisted state — NOT from a
|
||||
# ``graph_id.txt`` (nothing in the codebase writes such a file).
|
||||
graph_id = _load_graph_id(sim_id)
|
||||
|
||||
memory: object
|
||||
zep_writer: InterviewZepWriter
|
||||
if not graph_id:
|
||||
logger.warning(
|
||||
f"interview: no graph_id for sim {sim_id} — Zep memory/writer disabled "
|
||||
"(simulation state missing or graph_id empty)"
|
||||
)
|
||||
memory = _NullMemory()
|
||||
zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
|
||||
else:
|
||||
try:
|
||||
from app.services.zep_entity_reader import ZepEntityReader
|
||||
from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
|
||||
|
||||
reader = ZepEntityReader()
|
||||
updater = ZepGraphMemoryUpdater(graph_id=graph_id)
|
||||
memory = ZepMemoryProvider(
|
||||
reader, graph_id=graph_id, agent_to_entity=agent_to_entity
|
||||
)
|
||||
zep_writer = InterviewZepWriter(memory_updater=updater, graph_id=graph_id)
|
||||
if not agent_to_entity:
|
||||
logger.warning(
|
||||
f"interview: empty agent_to_entity map for sim {sim_id} — "
|
||||
"memory digests will be unavailable. Check that profile files "
|
||||
"include `source_entity_uuid`."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"interview: Zep init failed for sim {sim_id} ({e!r}); "
|
||||
"falling back to null memory/writer"
|
||||
)
|
||||
memory = _NullMemory()
|
||||
zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
|
||||
llm = LLMClient(api_key=Config.LLM_API_KEY, base_url=Config.LLM_BASE_URL,
|
||||
model=Config.LLM_MODEL_NAME)
|
||||
return InterviewOrchestrator(
|
||||
llm=llm, memory=memory, personas=personas,
|
||||
instrument_dir=INSTRUMENT_DIR, store_root=_uploads_root(), sim_id=sim_id,
|
||||
zep_writer=zep_writer, max_workers=Config.INTERVIEW_MAX_WORKERS,
|
||||
language=Config.INTERVIEW_DEFAULT_LANGUAGE,
|
||||
)
|
||||
|
||||
|
||||
def _run_task(task_id: str, fn) -> None:
|
||||
with _LOCK:
|
||||
_TASKS[task_id] = {"status": "running", "progress": {}, "result": None, "error": None}
|
||||
try:
|
||||
result = fn(task_id)
|
||||
with _LOCK:
|
||||
_TASKS[task_id]["status"] = "completed"; _TASKS[task_id]["result"] = result
|
||||
except Exception as e:
|
||||
with _LOCK:
|
||||
_TASKS[task_id]["status"] = "failed"
|
||||
_TASKS[task_id]["error"] = repr(e)
|
||||
_TASKS[task_id]["traceback"] = traceback.format_exc()
|
||||
|
||||
|
||||
def _start_task(fn) -> str:
|
||||
task_id = uuid.uuid4().hex[:12]
|
||||
with _LOCK:
|
||||
_TASKS[task_id] = {"status": "queued", "progress": {}, "result": None, "error": None}
|
||||
threading.Thread(target=_run_task, args=(task_id, fn), daemon=True).start()
|
||||
return task_id
|
||||
|
||||
|
||||
def _envelope(data=None, error=None, status: int = 200):
|
||||
body = {"success": error is None, "data": data or {}, "error": error}
|
||||
return jsonify(body), status
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/pre", methods=["POST"])
|
||||
def post_pre(sim_id: str):
|
||||
orch = _build_orchestrator(sim_id)
|
||||
task_id = _start_task(lambda tid: orch.run_pre())
|
||||
return _envelope({"task_id": task_id})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/post", methods=["POST"])
|
||||
def post_post(sim_id: str):
|
||||
orch = _build_orchestrator(sim_id)
|
||||
def run(tid):
|
||||
out = orch.run_post()
|
||||
synth = InterviewSynthesizer(store=orch.store)
|
||||
out["synthesis"] = synth.run()[:1000] # short preview
|
||||
return out
|
||||
task_id = _start_task(run)
|
||||
return _envelope({"task_id": task_id})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/rerun", methods=["POST"])
|
||||
def post_rerun(sim_id: str):
|
||||
body = request.get_json(silent=True) or {}
|
||||
sub = body.get("subagent")
|
||||
try: subagent = SubagentKind(sub)
|
||||
except ValueError: return _envelope(error=f"unknown subagent {sub!r}", status=400)
|
||||
orch = _build_orchestrator(sim_id)
|
||||
task_id = _start_task(lambda tid: orch.rerun(subagent))
|
||||
return _envelope({"task_id": task_id})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/status", methods=["GET"])
|
||||
def get_status(sim_id: str):
|
||||
task_id = request.args.get("task_id")
|
||||
with _LOCK:
|
||||
task = _TASKS.get(task_id)
|
||||
if task is None: return _envelope(error="unknown task_id", status=404)
|
||||
return _envelope({"status": task["status"], "progress": task.get("progress", {}),
|
||||
"result": task.get("result"), "error": task.get("error")})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/results/<subagent>", methods=["GET"])
|
||||
def get_results(sim_id: str, subagent: str):
|
||||
try: sub = SubagentKind(subagent)
|
||||
except ValueError: return _envelope(error=f"unknown subagent {subagent!r}", status=400)
|
||||
store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
|
||||
phase = InterviewPhase.T1 if sub != SubagentKind.LONGITUDINAL else InterviewPhase.T1
|
||||
run = store.latest_run(phase, sub)
|
||||
if run is None: return _envelope(error="no results yet", status=404)
|
||||
agg = (run / "aggregate.json")
|
||||
if not agg.exists(): return _envelope(error="aggregate missing", status=404)
|
||||
import json as _j
|
||||
return _envelope({"aggregate": _j.loads(agg.read_text(encoding="utf-8")),
|
||||
"run_dir": str(run)})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/results/synthesis", methods=["GET"])
|
||||
def get_synthesis(sim_id: str):
|
||||
store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
|
||||
report = store.base / "synthesis" / "report.md"
|
||||
if not report.exists():
|
||||
synth = InterviewSynthesizer(store=store)
|
||||
synth.run()
|
||||
return _envelope({"report_markdown": report.read_text(encoding="utf-8")})
|
||||
|
||||
|
||||
@interview_bp.route("/<sim_id>/export.csv", methods=["GET"])
|
||||
def get_export_csv(sim_id: str):
|
||||
store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
|
||||
csv_path = store.base / "synthesis" / "exports" / "all_responses.csv"
|
||||
if not csv_path.exists():
|
||||
InterviewSynthesizer(store=store).run()
|
||||
return send_file(csv_path, mimetype="text/csv", as_attachment=True,
|
||||
download_name=f"{sim_id}_interviews.csv")
|
||||
|
|
@ -39,6 +39,8 @@ class Config:
|
|||
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB
|
||||
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
|
||||
ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'}
|
||||
# Root directory for simulation uploads (used by the interview subsystem)
|
||||
UPLOADS_DIR = os.environ.get("UPLOADS_DIR", os.path.join(os.path.dirname(__file__), '../uploads'))
|
||||
|
||||
# 文本处理配置
|
||||
DEFAULT_CHUNK_SIZE = 500 # 默认切块大小
|
||||
|
|
@ -63,6 +65,12 @@ class Config:
|
|||
REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2'))
|
||||
REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5'))
|
||||
|
||||
# Interview subsystem
|
||||
INTERVIEW_MAX_TOKENS_PER_RUN = int(os.environ.get("INTERVIEW_MAX_TOKENS_PER_RUN", 15_000_000))
|
||||
INTERVIEW_MAX_WORKERS = int(os.environ.get("INTERVIEW_MAX_WORKERS", 8))
|
||||
INTERVIEW_DEFAULT_LANGUAGE = os.environ.get("INTERVIEW_DEFAULT_LANGUAGE", "de")
|
||||
LLM_STUB_MODE = os.environ.get("LLM_STUB_MODE", "false").lower() == "true"
|
||||
|
||||
@classmethod
|
||||
def validate(cls) -> list[str]:
|
||||
"""验证必要配置"""
|
||||
|
|
|
|||
|
|
@ -0,0 +1,99 @@
|
|||
from __future__ import annotations
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
class InterviewPhase(str, Enum):
|
||||
T0 = "T0"
|
||||
T1 = "T1"
|
||||
|
||||
class SubagentKind(str, Enum):
|
||||
LONGITUDINAL = "longitudinal"
|
||||
DIVERSITY = "diversity"
|
||||
DELPHI = "delphi"
|
||||
SCENARIO = "scenario"
|
||||
|
||||
class LikertItem(BaseModel):
|
||||
item_id: str
|
||||
de: str
|
||||
en: str
|
||||
scale: int = Field(ge=3, le=7)
|
||||
family: Optional[str] = None
|
||||
reverse_coded: bool = False
|
||||
|
||||
@field_validator("scale")
|
||||
@classmethod
|
||||
def odd_scale(cls, v: int) -> int:
|
||||
if v not in (3, 5, 7):
|
||||
raise ValueError("scale must be 3, 5, or 7")
|
||||
return v
|
||||
|
||||
class LikertInstrument(BaseModel):
|
||||
name: str
|
||||
version: str = "1.0"
|
||||
language_default: str = "de"
|
||||
items: list[LikertItem]
|
||||
|
||||
@model_validator(mode="after")
|
||||
def unique_item_ids(self) -> "LikertInstrument":
|
||||
ids = [i.item_id for i in self.items]
|
||||
if len(set(ids)) != len(ids):
|
||||
raise ValueError("duplicate item_id in instrument")
|
||||
return self
|
||||
|
||||
class LikertResponse(BaseModel):
|
||||
agent_id: int
|
||||
phase: InterviewPhase
|
||||
responses: dict[str, int]
|
||||
confidence: dict[str, float] = Field(default_factory=dict)
|
||||
open_comment: Optional[str] = None
|
||||
memory_available: bool = True
|
||||
failed_items: list[str] = Field(default_factory=list)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def values_in_range(self) -> "LikertResponse":
|
||||
for k, v in self.responses.items():
|
||||
if not 1 <= v <= 5:
|
||||
raise ValueError(f"response {k}={v} out of 1..5 range")
|
||||
for k, v in self.confidence.items():
|
||||
if not 0.0 <= v <= 1.0:
|
||||
raise ValueError(f"confidence {k}={v} out of 0..1 range")
|
||||
return self
|
||||
|
||||
class QSortStatement(BaseModel):
|
||||
statement_id: str
|
||||
de: str
|
||||
en: str
|
||||
|
||||
class QSortInstrument(BaseModel):
|
||||
name: str
|
||||
version: str = "1.0"
|
||||
statements: list[QSortStatement]
|
||||
distribution: list[int] # e.g. [2,3,4,6,4,3,2] for -3..+3
|
||||
|
||||
class QSortResponse(BaseModel):
|
||||
agent_id: int
|
||||
placements: dict[str, int] # statement_id -> bucket (-3..+3)
|
||||
likert_axes: dict[str, int] # axis_id -> 1..7
|
||||
|
||||
class DelphiOpenResponse(BaseModel):
|
||||
agent_id: int
|
||||
round: int = 1
|
||||
answers: dict[str, str] # question_id -> free text
|
||||
|
||||
class DelphiRatingResponse(BaseModel):
|
||||
agent_id: int
|
||||
round: int
|
||||
ratings: dict[str, dict[str, int]] # theme_id -> {importance, plausibility}
|
||||
justification: Optional[str] = None
|
||||
|
||||
class ScenarioRating(BaseModel):
|
||||
desirability: int = Field(ge=1, le=7)
|
||||
plausibility: int = Field(ge=1, le=7)
|
||||
impact_on_my_group: int = Field(ge=1, le=7)
|
||||
fairness: int = Field(ge=1, le=7)
|
||||
if_woke_up_response: str
|
||||
|
||||
class ScenarioResponse(BaseModel):
|
||||
agent_id: int
|
||||
ratings: dict[str, ScenarioRating] # scenario_id -> rating
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
from __future__ import annotations
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
from typing import Protocol
|
||||
from app.models.interview import (
|
||||
InterviewPhase, SubagentKind, LikertResponse, QSortResponse,
|
||||
DelphiOpenResponse, DelphiRatingResponse, ScenarioResponse,
|
||||
)
|
||||
from app.services.interviews.base import PersonaRecord, SchemaValidationFailure
|
||||
from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate as longitudinal_aggregate
|
||||
from app.services.interviews.diversity import DiversitySubagent, run_typology
|
||||
from app.services.interviews.delphi import (
|
||||
DelphiSubagent, extract_themes, convergence_metrics, group_stats_from_r2,
|
||||
)
|
||||
from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
|
||||
from app.services.interviews.storage import InterviewStore
|
||||
from app.services.interviews.instrument_loader import freeze_snapshot
|
||||
|
||||
|
||||
class PersonaProvider(Protocol):
|
||||
def all(self) -> list[PersonaRecord]: ...
|
||||
|
||||
|
||||
class InterviewOrchestrator:
|
||||
def __init__(
|
||||
self, llm, memory, personas: PersonaProvider,
|
||||
instrument_dir: Path, store_root: Path, sim_id: str,
|
||||
zep_writer, max_workers: int = 8, language: str = "de",
|
||||
):
|
||||
self.llm = llm
|
||||
self.memory = memory
|
||||
self.personas = personas
|
||||
self.instrument_dir = Path(instrument_dir)
|
||||
self.store = InterviewStore(root=store_root, sim_id=sim_id)
|
||||
self.zep_writer = zep_writer
|
||||
self.max_workers = max_workers
|
||||
self.language = language
|
||||
# Freeze snapshot once per orchestrator lifetime
|
||||
freeze_snapshot(
|
||||
instruments={
|
||||
"longitudinal": self.instrument_dir / "longitudinal_v1.yaml",
|
||||
"diversity": self.instrument_dir / "diversity_v1.yaml",
|
||||
"delphi": self.instrument_dir / "delphi_v1.yaml",
|
||||
"scenario": self.instrument_dir / "scenario_v1.yaml",
|
||||
},
|
||||
out_path=self.store.base / "instruments_used.json",
|
||||
)
|
||||
|
||||
# --- Generic per-agent runner ---
|
||||
def _fan_out(self, run_dir, agent_fn, personas, audit_label):
|
||||
ok: list = []
|
||||
failed: list[int] = []
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
|
||||
futures = {pool.submit(agent_fn, p): p for p in personas}
|
||||
for fut in as_completed(futures):
|
||||
p = futures[fut]
|
||||
try:
|
||||
out = fut.result()
|
||||
ok.append(out)
|
||||
self.store.append_response(run_dir, out)
|
||||
except SchemaValidationFailure as e:
|
||||
failed.append(p.agent_id)
|
||||
self.store.audit(run_dir, agent_id=p.agent_id,
|
||||
event="schema_validation_failure",
|
||||
detail={"label": audit_label, "attempts": e.attempts})
|
||||
except Exception as e:
|
||||
failed.append(p.agent_id)
|
||||
self.store.audit(run_dir, agent_id=p.agent_id,
|
||||
event="agent_failed", detail=f"{audit_label}: {e!r}")
|
||||
return ok, failed
|
||||
|
||||
# --- Pre-phase (T0) ---
|
||||
def run_pre(self) -> dict:
|
||||
sub = LongitudinalSubagent(self.llm, self.memory,
|
||||
self.instrument_dir / "longitudinal_v1.yaml",
|
||||
language=self.language)
|
||||
run_dir = self.store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
|
||||
ok, failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T0),
|
||||
self.personas.all(), audit_label="longitudinal_T0",
|
||||
)
|
||||
for r in ok:
|
||||
persona = next(p for p in self.personas.all() if p.agent_id == r.agent_id)
|
||||
try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
|
||||
except Exception: pass
|
||||
self.store.mark_latest(run_dir)
|
||||
return {"longitudinal": {"n_responded": len(ok), "n_failed": len(failed),
|
||||
"run_dir": str(run_dir)}}
|
||||
|
||||
# --- Post-phase (T1) ---
|
||||
def run_post(self) -> dict:
|
||||
personas = self.personas.all()
|
||||
out: dict = {}
|
||||
with ThreadPoolExecutor(max_workers=4) as pool:
|
||||
futures = {
|
||||
"longitudinal": pool.submit(self._post_longitudinal, personas),
|
||||
"diversity": pool.submit(self._post_diversity, personas),
|
||||
"scenario": pool.submit(self._post_scenario, personas),
|
||||
}
|
||||
for name, fut in futures.items():
|
||||
try: out[name] = fut.result()
|
||||
except Exception as e: out[name] = {"error": repr(e)}
|
||||
# Delphi runs sequentially (R1 → R2 → R3) and uses the LLM for theme extraction
|
||||
try: out["delphi"] = self._post_delphi(personas)
|
||||
except Exception as e: out["delphi"] = {"error": repr(e)}
|
||||
return out
|
||||
|
||||
def _post_longitudinal(self, personas) -> dict:
|
||||
sub = LongitudinalSubagent(self.llm, self.memory,
|
||||
self.instrument_dir / "longitudinal_v1.yaml",
|
||||
language=self.language)
|
||||
run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
|
||||
ok, failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T1),
|
||||
personas, audit_label="longitudinal_T1",
|
||||
)
|
||||
# Aggregate using T0 + T1
|
||||
t0_path = self.store.latest_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
|
||||
t0_raw = self.store.read_responses(t0_path) if t0_path else []
|
||||
t0 = [LikertResponse(**d) for d in t0_raw]
|
||||
agg = longitudinal_aggregate(t0, ok)
|
||||
self.store.write_aggregate(run_dir, agg)
|
||||
for r in ok:
|
||||
persona = next(p for p in personas if p.agent_id == r.agent_id)
|
||||
try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
|
||||
except Exception: pass
|
||||
try: self.zep_writer.write_aggregate(SubagentKind.LONGITUDINAL,
|
||||
f"n_paired={agg['n_paired']}")
|
||||
except Exception: pass
|
||||
self.store.mark_latest(run_dir)
|
||||
return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
|
||||
|
||||
def _post_diversity(self, personas) -> dict:
|
||||
sub = DiversitySubagent(self.llm, self.memory,
|
||||
self.instrument_dir / "diversity_v1.yaml",
|
||||
language=self.language)
|
||||
run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DIVERSITY)
|
||||
ok, failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer(p), personas, audit_label="diversity",
|
||||
)
|
||||
typology = run_typology(ok)
|
||||
self.store.write_named(run_dir, "typology.json", typology)
|
||||
self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
|
||||
"clusters": typology["clusters"]})
|
||||
for r in ok:
|
||||
persona = next(p for p in personas if p.agent_id == r.agent_id)
|
||||
try: self.zep_writer.write_per_agent(SubagentKind.DIVERSITY, r, persona.name)
|
||||
except Exception: pass
|
||||
self.store.mark_latest(run_dir)
|
||||
return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
|
||||
|
||||
def _post_scenario(self, personas) -> dict:
|
||||
sub = ScenarioSubagent(self.llm, self.memory,
|
||||
self.instrument_dir / "scenario_v1.yaml",
|
||||
language=self.language)
|
||||
run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.SCENARIO)
|
||||
ok, failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer(p), personas, audit_label="scenario",
|
||||
)
|
||||
matrix = polarity_matrix(ok)
|
||||
self.store.write_named(run_dir, "polarity_matrix.json", matrix)
|
||||
self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
|
||||
"polarity": matrix})
|
||||
for r in ok:
|
||||
persona = next(p for p in personas if p.agent_id == r.agent_id)
|
||||
try: self.zep_writer.write_per_agent(SubagentKind.SCENARIO, r, persona.name)
|
||||
except Exception: pass
|
||||
self.store.mark_latest(run_dir)
|
||||
return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
|
||||
|
||||
def _post_delphi(self, personas) -> dict:
|
||||
sub = DelphiSubagent(self.llm, self.memory,
|
||||
self.instrument_dir / "delphi_v1.yaml",
|
||||
language=self.language)
|
||||
run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DELPHI)
|
||||
# Round 1
|
||||
r1_ok, r1_failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer_round1(p), personas, audit_label="delphi_r1",
|
||||
)
|
||||
# Move all R1 responses into a dedicated file
|
||||
for r in r1_ok: self.store.append_jsonl(run_dir, "round1_themes.jsonl", r)
|
||||
# Extract themes from R1
|
||||
themes = extract_themes(r1_ok, llm=self.llm)
|
||||
self.store.write_named(run_dir, "themes.json", {"themes": themes})
|
||||
# Round 2
|
||||
r2_ok, r2_failed = self._fan_out(
|
||||
run_dir, lambda p: sub.administer_round2(p, themes),
|
||||
[p for p in personas if p.agent_id in {r.agent_id for r in r1_ok}],
|
||||
audit_label="delphi_r2",
|
||||
)
|
||||
for r in r2_ok: self.store.append_jsonl(run_dir, "round2_ratings.jsonl", r)
|
||||
gstats = group_stats_from_r2(r2_ok)
|
||||
# Round 3
|
||||
r2_by = {r.agent_id: r for r in r2_ok}
|
||||
r3_personas = [p for p in personas if p.agent_id in r2_by]
|
||||
def r3_call(p): return sub.administer_round3(p, themes, gstats, r2_by[p.agent_id])
|
||||
r3_ok, r3_failed = self._fan_out(run_dir, r3_call, r3_personas, audit_label="delphi_r3")
|
||||
for r in r3_ok: self.store.append_jsonl(run_dir, "round3_revisions.jsonl", r)
|
||||
# Convergence
|
||||
conv = convergence_metrics(r2_ok, r3_ok)
|
||||
self.store.write_named(run_dir, "convergence.json", conv)
|
||||
self.store.write_aggregate(run_dir, {
|
||||
"n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
|
||||
"n_failed_r1": len(r1_failed), "n_failed_r2": len(r2_failed), "n_failed_r3": len(r3_failed),
|
||||
"themes": themes,
|
||||
})
|
||||
for r in r3_ok:
|
||||
persona = next(p for p in personas if p.agent_id == r.agent_id)
|
||||
try: self.zep_writer.write_per_agent(SubagentKind.DELPHI, r, persona.name)
|
||||
except Exception: pass
|
||||
self.store.mark_latest(run_dir)
|
||||
return {"n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
|
||||
"run_dir": str(run_dir)}
|
||||
|
||||
# --- Re-run a single subagent ---
|
||||
def rerun(self, subagent: SubagentKind) -> dict:
|
||||
personas = self.personas.all()
|
||||
if subagent == SubagentKind.LONGITUDINAL: return {"longitudinal": self._post_longitudinal(personas)}
|
||||
if subagent == SubagentKind.DIVERSITY: return {"diversity": self._post_diversity(personas)}
|
||||
if subagent == SubagentKind.SCENARIO: return {"scenario": self._post_scenario(personas)}
|
||||
if subagent == SubagentKind.DELPHI: return {"delphi": self._post_delphi(personas)}
|
||||
raise ValueError(f"unknown subagent {subagent}")
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
from __future__ import annotations
|
||||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
from app.models.interview import InterviewPhase, SubagentKind
|
||||
from app.services.interviews.storage import InterviewStore
|
||||
|
||||
|
||||
class InterviewSynthesizer:
|
||||
def __init__(self, store: InterviewStore):
|
||||
self.store = store
|
||||
|
||||
def _maybe(self, phase: InterviewPhase, sub: SubagentKind) -> dict | None:
|
||||
run = self.store.latest_run(phase, sub)
|
||||
if run is None:
|
||||
return None
|
||||
agg = run / "aggregate.json"
|
||||
if not agg.exists():
|
||||
return None
|
||||
return {"run_dir": str(run), "aggregate": json.loads(agg.read_text(encoding="utf-8"))}
|
||||
|
||||
def _instrument_hashes(self) -> dict:
|
||||
snap = self.store.base / "instruments_used.json"
|
||||
if not snap.exists():
|
||||
return {}
|
||||
try:
|
||||
data = json.loads(snap.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return {}
|
||||
return {k: v.get("hash") for k, v in data.items()}
|
||||
|
||||
def _limitations_text(self, present: dict[str, bool]) -> str:
|
||||
lines = [
|
||||
"## Limitations",
|
||||
"- **Simulated, not real stakeholders.** Responses reflect how the seed-document discourse "
|
||||
"and the LLM jointly encode each stakeholder type, not what an actual fisher or NGO "
|
||||
"staffer would say. The instrument measures the *model of the stakeholder*, not the stakeholder.",
|
||||
"- **Memory digest is lossy.** Each agent's experience of OASIS is summarised to bounded length; "
|
||||
"agents do not have full episodic recall.",
|
||||
"- **LLM acquiescence and centrality bias.** Likert scales with LLM respondents skew toward 3–4 "
|
||||
"of 5; check per-item distribution shape before drawing conclusions.",
|
||||
"- **N is what it is.** `n_responded` and `n_failed` are printed verbatim per subagent; no smoothing.",
|
||||
"- **Instrument provenance.** Hashes of frozen instruments are listed below; an identical run "
|
||||
"is reproducible from these snapshots.",
|
||||
]
|
||||
for k, ok in present.items():
|
||||
if not ok:
|
||||
lines.append(f"- *{k}* subagent results are missing for this run.")
|
||||
return "\n".join(lines)
|
||||
|
||||
def run(self) -> str:
|
||||
sections: list[str] = []
|
||||
sections.append("# Stakeholder Interview Synthesis\n")
|
||||
|
||||
long_t0 = self._maybe(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
|
||||
long_t1 = self._maybe(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
|
||||
if long_t1:
|
||||
agg = long_t1["aggregate"]
|
||||
sections.append("## Longitudinal opinion drift (T0 → T1)")
|
||||
sections.append(f"- N paired: {agg.get('n_paired', 'NA')}")
|
||||
per_item = agg.get("per_item", {})
|
||||
top = sorted(per_item.items(),
|
||||
key=lambda kv: abs(kv[1].get("mean_delta") or 0), reverse=True)[:5]
|
||||
sections.append("- Largest mean shifts:")
|
||||
for k, v in top:
|
||||
sections.append(f" - `{k}`: Δ̄ = {v.get('mean_delta'):+0.2f} (n={v.get('n')})")
|
||||
|
||||
diversity = self._maybe(InterviewPhase.T1, SubagentKind.DIVERSITY)
|
||||
if diversity:
|
||||
clusters = diversity["aggregate"].get("clusters", [])
|
||||
sections.append("## Stakeholder typology")
|
||||
sections.append(f"- N agents: {diversity['aggregate'].get('n', 'NA')}")
|
||||
sections.append(f"- Clusters: {len(clusters)}")
|
||||
for c in clusters:
|
||||
sections.append(f" - cluster {c['cluster_id']}: n={c['n']}, "
|
||||
f"top loadings = {list(c['top_loadings'].keys())[:5]}")
|
||||
|
||||
delphi = self._maybe(InterviewPhase.T1, SubagentKind.DELPHI)
|
||||
if delphi:
|
||||
agg = delphi["aggregate"]
|
||||
sections.append("## Delphi consensus")
|
||||
sections.append(f"- Rounds completed: R1={agg.get('n_r1')}, R2={agg.get('n_r2')}, R3={agg.get('n_r3')}")
|
||||
themes = agg.get("themes", [])
|
||||
sections.append(f"- Themes: {[t.get('label') for t in themes]}")
|
||||
|
||||
scenario = self._maybe(InterviewPhase.T1, SubagentKind.SCENARIO)
|
||||
if scenario:
|
||||
pol = scenario["aggregate"].get("polarity", {})
|
||||
sections.append("## Scenario evaluation")
|
||||
for sid in sorted(pol):
|
||||
v = pol[sid]
|
||||
if v.get("n", 0) == 0:
|
||||
continue
|
||||
sections.append(
|
||||
f"- **{sid}**: n={v['n']}, desirability {v['mean_desirability']:.2f}, "
|
||||
f"plausibility {v['mean_plausibility']:.2f}, impact {v['mean_impact']:.2f}, "
|
||||
f"fairness {v['mean_fairness']:.2f}")
|
||||
|
||||
sections.append("")
|
||||
sections.append(self._limitations_text({
|
||||
"longitudinal": bool(long_t1),
|
||||
"diversity": bool(diversity),
|
||||
"delphi": bool(delphi),
|
||||
"scenario": bool(scenario),
|
||||
}))
|
||||
sections.append("")
|
||||
sections.append("### Instrument provenance")
|
||||
for name, h in self._instrument_hashes().items():
|
||||
sections.append(f"- `{name}`: hash `{h}`")
|
||||
|
||||
report = "\n\n".join(sections)
|
||||
out_dir = self.store.base / "synthesis"
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "report.md").write_text(report, encoding="utf-8")
|
||||
self._write_tidy_csv(out_dir / "exports" / "all_responses.csv")
|
||||
return report
|
||||
|
||||
def _write_tidy_csv(self, csv_path: Path) -> None:
|
||||
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
rows: list[dict] = []
|
||||
for phase in (InterviewPhase.T0, InterviewPhase.T1):
|
||||
for sub in SubagentKind:
|
||||
run = self.store.latest_run(phase, sub)
|
||||
if run is None:
|
||||
continue
|
||||
files = ["responses.jsonl", "round1_themes.jsonl",
|
||||
"round2_ratings.jsonl", "round3_revisions.jsonl"]
|
||||
for fname in files:
|
||||
for rec in self.store.read_responses(run, fname):
|
||||
flat = self._flatten(rec, phase=phase.value, subagent=sub.value)
|
||||
rows.extend(flat)
|
||||
if not rows:
|
||||
csv_path.write_text("phase,subagent,agent_id,key,value\n", encoding="utf-8")
|
||||
return
|
||||
fieldnames = sorted({k for r in rows for k in r.keys()})
|
||||
with csv_path.open("w", encoding="utf-8", newline="") as f:
|
||||
w = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
w.writeheader()
|
||||
for r in rows:
|
||||
w.writerow(r)
|
||||
|
||||
def _flatten(self, rec: dict, *, phase: str, subagent: str) -> list[dict]:
|
||||
out: list[dict] = []
|
||||
aid = rec.get("agent_id")
|
||||
for key, val in rec.items():
|
||||
if key == "agent_id":
|
||||
continue
|
||||
if isinstance(val, dict):
|
||||
for k2, v2 in val.items():
|
||||
if isinstance(v2, dict):
|
||||
for k3, v3 in v2.items():
|
||||
out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
|
||||
"key": f"{key}.{k2}.{k3}", "value": v3})
|
||||
else:
|
||||
out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
|
||||
"key": f"{key}.{k2}", "value": v2})
|
||||
else:
|
||||
out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
|
||||
"key": key, "value": val})
|
||||
return out
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
from __future__ import annotations
|
||||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
|
||||
|
||||
class FileSystemPersonaProvider:
|
||||
"""Reads OASIS profiles from the simulation's `reddit_profiles.json` and/or `twitter_profiles.csv`.
|
||||
|
||||
If both are present, agents from `reddit_profiles.json` take precedence; twitter-only agents are appended.
|
||||
"""
|
||||
|
||||
def __init__(self, reddit_path: Optional[Path], twitter_path: Optional[Path]):
|
||||
self.reddit_path = Path(reddit_path) if reddit_path else None
|
||||
self.twitter_path = Path(twitter_path) if twitter_path else None
|
||||
|
||||
def _load_reddit(self) -> list[PersonaRecord]:
|
||||
if not self.reddit_path or not self.reddit_path.exists():
|
||||
return []
|
||||
data = json.loads(self.reddit_path.read_text(encoding="utf-8"))
|
||||
out = []
|
||||
for row in data:
|
||||
out.append(PersonaRecord(
|
||||
agent_id=int(row.get("user_id")),
|
||||
name=str(row.get("name") or row.get("user_name") or f"agent_{row.get('user_id')}"),
|
||||
persona=str(row.get("persona") or row.get("bio") or ""),
|
||||
profession=row.get("profession"),
|
||||
bio=row.get("bio"),
|
||||
))
|
||||
return out
|
||||
|
||||
def _load_twitter(self) -> list[PersonaRecord]:
|
||||
if not self.twitter_path or not self.twitter_path.exists():
|
||||
return []
|
||||
out = []
|
||||
with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
|
||||
for row in csv.DictReader(f):
|
||||
if not row.get("user_id"):
|
||||
continue
|
||||
out.append(PersonaRecord(
|
||||
agent_id=int(row["user_id"]),
|
||||
name=str(row.get("name") or row.get("user_name") or f"agent_{row['user_id']}"),
|
||||
persona=str(row.get("persona") or row.get("bio") or ""),
|
||||
profession=row.get("profession"),
|
||||
bio=row.get("bio"),
|
||||
))
|
||||
return out
|
||||
|
||||
def all(self) -> list[PersonaRecord]:
|
||||
reddit = self._load_reddit()
|
||||
seen = {p.agent_id for p in reddit}
|
||||
twitter = [p for p in self._load_twitter() if p.agent_id not in seen]
|
||||
return reddit + twitter
|
||||
|
||||
def agent_to_entity(self) -> dict[int, str]:
|
||||
"""Build the ``{agent_id: zep_entity_uuid}`` map from the persisted profile files.
|
||||
|
||||
Both writers (``oasis_profile_generator._save_reddit_json`` and
|
||||
``_save_twitter_csv``) emit ``source_entity_uuid`` per agent. Reddit takes
|
||||
precedence; rows with a missing/blank uuid are skipped.
|
||||
Returns an empty dict if neither file is present or no row has the field.
|
||||
"""
|
||||
mapping: dict[int, str] = {}
|
||||
|
||||
# Reddit JSON
|
||||
if self.reddit_path and self.reddit_path.exists():
|
||||
try:
|
||||
rows = json.loads(self.reddit_path.read_text(encoding="utf-8"))
|
||||
for row in rows:
|
||||
uid = row.get("user_id")
|
||||
uuid_ = row.get("source_entity_uuid")
|
||||
if uid is None or not uuid_:
|
||||
continue
|
||||
mapping[int(uid)] = str(uuid_)
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# Twitter CSV (only fills agents not already mapped)
|
||||
if self.twitter_path and self.twitter_path.exists():
|
||||
try:
|
||||
with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
|
||||
for row in csv.DictReader(f):
|
||||
uid = row.get("user_id")
|
||||
uuid_ = row.get("source_entity_uuid")
|
||||
if not uid or not uuid_:
|
||||
continue
|
||||
try:
|
||||
uid_int = int(uid)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if uid_int not in mapping:
|
||||
mapping[uid_int] = str(uuid_)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return mapping
|
||||
|
||||
|
||||
class ZepMemoryProvider:
|
||||
"""Builds a bounded memory digest per agent from Zep entity context.
|
||||
|
||||
Maps `agent_id` (OASIS user_id) to a Zep entity UUID; falls back to the agent_id as a string.
|
||||
"""
|
||||
|
||||
def __init__(self, entity_reader, graph_id: str, agent_to_entity: dict[int, str] | None = None):
|
||||
self.reader = entity_reader
|
||||
self.graph_id = graph_id
|
||||
self.map = dict(agent_to_entity or {})
|
||||
|
||||
def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest:
|
||||
entity_uuid = self.map.get(agent_id) or str(agent_id)
|
||||
try:
|
||||
ctx = self.reader.get_entity_with_context(self.graph_id, entity_uuid)
|
||||
except Exception:
|
||||
return MemoryDigest(text=f"[no memory for agent {agent_id}]", available=False)
|
||||
parts: list[str] = []
|
||||
name = getattr(ctx, "name", None)
|
||||
summary = getattr(ctx, "summary", None)
|
||||
if name:
|
||||
parts.append(f"Name: {name}")
|
||||
if summary:
|
||||
parts.append(f"Summary: {summary}")
|
||||
edges = getattr(ctx, "related_edges", []) or []
|
||||
for e in edges[:20]:
|
||||
fact = e.get("fact") if isinstance(e, dict) else getattr(e, "fact", None)
|
||||
if fact:
|
||||
parts.append(f"- {fact}")
|
||||
text = "\n".join(parts)
|
||||
if len(text) > max_chars:
|
||||
text = text[: max_chars - 1] + "…"
|
||||
return MemoryDigest(text=text or f"[empty memory for agent {agent_id}]", available=True)
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional, Protocol
|
||||
|
||||
|
||||
@dataclass
|
||||
class PersonaRecord:
|
||||
agent_id: int
|
||||
name: str
|
||||
persona: str
|
||||
profession: Optional[str] = None
|
||||
bio: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryDigest:
|
||||
text: str
|
||||
available: bool = True
|
||||
|
||||
|
||||
class MemoryProvider(Protocol):
|
||||
def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest: ...
|
||||
|
||||
|
||||
def coerce_int(value: Any) -> Optional[int]:
|
||||
"""Coerce LLM-returned Likert values into ints.
|
||||
|
||||
Real LLMs frequently return numeric Likert responses as JSON strings
|
||||
(e.g. "3" instead of 3). Returns the int if value is an int or a string
|
||||
that round-trips through int(); otherwise None. Bools are rejected so
|
||||
True/False aren't accepted as 1/0.
|
||||
"""
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
if s and s.lstrip("-").isdigit():
|
||||
try:
|
||||
return int(s)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
class SchemaValidationFailure(ValueError):
|
||||
def __init__(self, agent_id: int, attempts: list[dict]):
|
||||
super().__init__(f"agent {agent_id}: schema violation after retry")
|
||||
self.agent_id = agent_id
|
||||
self.attempts = attempts
|
||||
|
||||
|
||||
class StakeholderInterviewer:
|
||||
def __init__(self, llm, memory: MemoryProvider, language: str = "de"):
|
||||
self.llm = llm
|
||||
self.memory = memory
|
||||
self.language = language
|
||||
|
||||
def _system_prompt(self, persona: PersonaRecord, digest: MemoryDigest, schema_hint: str) -> str:
|
||||
memory_block = digest.text if digest.available else "[no simulation memory available]"
|
||||
lang_note = "Antworte ausschließlich auf Deutsch." if self.language == "de" else "Answer in English."
|
||||
return (
|
||||
f"You are {persona.name}. {persona.persona}\n\n"
|
||||
"You are answering a survey about the future of German fisheries. "
|
||||
"Answer strictly in character based on your background, values, and what you experienced "
|
||||
"during the simulated social media discourse summarised below.\n\n"
|
||||
f"--- simulation memory digest ---\n{memory_block}\n--- end ---\n\n"
|
||||
f"{lang_note} Return JSON ONLY matching this schema:\n{schema_hint}"
|
||||
)
|
||||
|
||||
def ask_in_character(
|
||||
self,
|
||||
persona: PersonaRecord,
|
||||
user_prompt: str,
|
||||
schema_hint: str,
|
||||
*,
|
||||
temperature: float = 0.3,
|
||||
max_tokens: Optional[int] = None,
|
||||
validate: Optional[Callable[[dict], Optional[dict]]] = None,
|
||||
) -> dict:
|
||||
digest = self.memory.get_digest(persona.agent_id)
|
||||
messages = [
|
||||
{"role": "system", "content": self._system_prompt(persona, digest, schema_hint)},
|
||||
{"role": "user", "content": user_prompt},
|
||||
]
|
||||
first = self.llm.chat_json(messages=messages, temperature=temperature, max_tokens=max_tokens)
|
||||
if validate is not None:
|
||||
validated = validate(first)
|
||||
if validated is not None:
|
||||
return validated
|
||||
messages.append({"role": "assistant", "content": str(first)})
|
||||
messages.append({"role": "user", "content":
|
||||
"Your previous response did not match the required schema. "
|
||||
f"Return ONLY valid JSON matching: {schema_hint}"})
|
||||
second = self.llm.chat_json(messages=messages, temperature=0.0, max_tokens=max_tokens)
|
||||
validated = validate(second)
|
||||
if validated is None:
|
||||
raise SchemaValidationFailure(
|
||||
persona.agent_id,
|
||||
attempts=[
|
||||
{"attempt": 1, "raw": first, "schema_hint": schema_hint},
|
||||
{"attempt": 2, "raw": second, "schema_hint": schema_hint},
|
||||
],
|
||||
)
|
||||
return validated
|
||||
return first
|
||||
|
|
@ -0,0 +1,203 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import yaml
|
||||
from app.models.interview import (
|
||||
DelphiOpenResponse, DelphiRatingResponse,
|
||||
)
|
||||
from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
|
||||
|
||||
|
||||
class DelphiSubagent:
|
||||
def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
|
||||
with Path(instrument_path).open("r", encoding="utf-8") as f:
|
||||
self.instrument = yaml.safe_load(f)
|
||||
self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
|
||||
self.llm = llm
|
||||
self.language = language
|
||||
|
||||
# --- Round 1: open questions ---
|
||||
def _r1_schema(self) -> str:
|
||||
return json.dumps({
|
||||
"answers": {q["question_id"]: "<string>" for q in self.instrument["questions"]}
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def _r1_prompt(self) -> str:
|
||||
lines = ["Bitte beantworten Sie offen:" if self.language == "de" else "Please answer openly:"]
|
||||
for q in self.instrument["questions"]:
|
||||
txt = q["de"] if self.language == "de" else q["en"]
|
||||
lines.append(f"[{q['question_id']}] {txt}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _r1_validate(self, raw: dict) -> Optional[dict]:
|
||||
if not isinstance(raw, dict): return None
|
||||
ans = raw.get("answers")
|
||||
if not isinstance(ans, dict): return None
|
||||
required = {q["question_id"] for q in self.instrument["questions"]}
|
||||
if not required.issubset(ans.keys()): return None
|
||||
return raw
|
||||
|
||||
def administer_round1(self, persona: PersonaRecord) -> DelphiOpenResponse:
|
||||
raw = self.interviewer.ask_in_character(
|
||||
persona, user_prompt=self._r1_prompt(),
|
||||
schema_hint=self._r1_schema(), validate=self._r1_validate,
|
||||
)
|
||||
return DelphiOpenResponse(agent_id=persona.agent_id, round=1,
|
||||
answers={k: str(v) for k, v in raw["answers"].items()})
|
||||
|
||||
# --- Round 2: rate themes ---
|
||||
def _r2_schema(self, theme_ids: list[str]) -> str:
|
||||
return json.dumps({
|
||||
"ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids}
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def _r2_prompt(self, themes: list[dict]) -> str:
|
||||
head = "Bewerten Sie jedes Thema nach Wichtigkeit (1-5) und Plausibilität (1-5):" if self.language == "de" \
|
||||
else "Rate each theme on importance (1-5) and plausibility (1-5):"
|
||||
body = [f"- [{t['theme_id']}] {t['label']}" for t in themes]
|
||||
return head + "\n" + "\n".join(body)
|
||||
|
||||
def _r2_validate(self, theme_ids: list[str]):
|
||||
def v(raw: dict) -> Optional[dict]:
|
||||
if not isinstance(raw, dict): return None
|
||||
ratings = raw.get("ratings", {})
|
||||
if set(ratings.keys()) != set(theme_ids): return None
|
||||
for tid, r in ratings.items():
|
||||
if not isinstance(r, dict): return None
|
||||
coerced: dict[str, int] = {}
|
||||
for key in ("importance", "plausibility"):
|
||||
iv = coerce_int(r.get(key))
|
||||
if iv is None or not 1 <= iv <= 5: return None
|
||||
coerced[key] = iv
|
||||
ratings[tid] = coerced
|
||||
return raw
|
||||
return v
|
||||
|
||||
def administer_round2(self, persona: PersonaRecord, themes: list[dict]) -> DelphiRatingResponse:
|
||||
theme_ids = [t["theme_id"] for t in themes]
|
||||
raw = self.interviewer.ask_in_character(
|
||||
persona, user_prompt=self._r2_prompt(themes),
|
||||
schema_hint=self._r2_schema(theme_ids), validate=self._r2_validate(theme_ids),
|
||||
)
|
||||
return DelphiRatingResponse(agent_id=persona.agent_id, round=2,
|
||||
ratings={k: dict(v) for k, v in raw["ratings"].items()})
|
||||
|
||||
# --- Round 3: revise after seeing group stats ---
|
||||
def administer_round3(
|
||||
self, persona: PersonaRecord, themes: list[dict], group_stats: dict, own_r2: DelphiRatingResponse
|
||||
) -> DelphiRatingResponse:
|
||||
theme_ids = [t["theme_id"] for t in themes]
|
||||
head = ("Sie sehen unten die anonymisierten Gruppenwerte (Median, IQR). "
|
||||
"Bitte überarbeiten Sie Ihre Bewertungen, wenn Sie möchten, und begründen Sie kurz.") \
|
||||
if self.language == "de" else \
|
||||
("Below are the anonymised group values (median, IQR). "
|
||||
"Please revise your ratings if you wish and add a short justification.")
|
||||
ctx_lines = []
|
||||
for t in themes:
|
||||
tid = t["theme_id"]
|
||||
gs = group_stats.get(tid, {})
|
||||
own = own_r2.ratings.get(tid, {})
|
||||
ctx_lines.append(
|
||||
f"[{tid}] {t['label']} — group importance median={gs.get('imp_median')}, "
|
||||
f"IQR={gs.get('imp_iqr')}; plausibility median={gs.get('plaus_median')}, "
|
||||
f"IQR={gs.get('plaus_iqr')}. Your R2: imp={own.get('importance')}, plaus={own.get('plausibility')}."
|
||||
)
|
||||
prompt = head + "\n\n" + "\n".join(ctx_lines)
|
||||
schema = json.dumps({
|
||||
"ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids},
|
||||
"justification": "<string>",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def validate(raw):
|
||||
if not isinstance(raw, dict): return None
|
||||
ratings = raw.get("ratings", {})
|
||||
if set(ratings.keys()) != set(theme_ids): return None
|
||||
for tid, r in ratings.items():
|
||||
if not isinstance(r, dict): return None
|
||||
coerced: dict[str, int] = {}
|
||||
for key in ("importance", "plausibility"):
|
||||
iv = coerce_int(r.get(key))
|
||||
if iv is None or not 1 <= iv <= 5: return None
|
||||
coerced[key] = iv
|
||||
ratings[tid] = coerced
|
||||
return raw
|
||||
|
||||
raw = self.interviewer.ask_in_character(persona, user_prompt=prompt,
|
||||
schema_hint=schema, validate=validate)
|
||||
return DelphiRatingResponse(
|
||||
agent_id=persona.agent_id, round=3,
|
||||
ratings={k: dict(v) for k, v in raw["ratings"].items()},
|
||||
justification=raw.get("justification"),
|
||||
)
|
||||
|
||||
|
||||
def extract_themes(round1: list[DelphiOpenResponse], llm) -> list[dict]:
|
||||
text_blocks = []
|
||||
for r in round1:
|
||||
for qid, ans in r.answers.items():
|
||||
text_blocks.append(f"[agent {r.agent_id} {qid}] {ans}")
|
||||
schema = json.dumps({"themes": [{"theme_id": "<string>", "label": "<short string>"}]}, ensure_ascii=False)
|
||||
messages = [
|
||||
{"role": "system", "content":
|
||||
"You extract distinct thematic codes from open-ended German fisheries survey responses. "
|
||||
f"Return JSON ONLY matching: {schema}. Use stable theme_ids of form theme_0, theme_1, …"},
|
||||
{"role": "user", "content": "Responses:\n" + "\n".join(text_blocks) + "\n\nReturn up to 12 distinct themes."},
|
||||
]
|
||||
raw = llm.chat_json(messages=messages, temperature=0.0)
|
||||
themes = raw.get("themes", []) if isinstance(raw, dict) else []
|
||||
out = []
|
||||
for i, t in enumerate(themes):
|
||||
if isinstance(t, dict) and "label" in t:
|
||||
out.append({"theme_id": t.get("theme_id") or f"theme_{i}", "label": str(t["label"])})
|
||||
return out
|
||||
|
||||
|
||||
def _iqr(xs: list[float]) -> float:
|
||||
if not xs: return 0.0
|
||||
xs = sorted(xs)
|
||||
q1 = statistics.quantiles(xs, n=4)[0] if len(xs) >= 4 else xs[0]
|
||||
q3 = statistics.quantiles(xs, n=4)[2] if len(xs) >= 4 else xs[-1]
|
||||
return q3 - q1
|
||||
|
||||
|
||||
def convergence_metrics(r2: list[DelphiRatingResponse], r3: list[DelphiRatingResponse]) -> dict:
|
||||
by_r2 = {r.agent_id: r for r in r2}
|
||||
by_r3 = {r.agent_id: r for r in r3}
|
||||
themes: set[str] = set()
|
||||
for r in r2 + r3:
|
||||
themes.update(r.ratings.keys())
|
||||
out: dict[str, dict] = {}
|
||||
for t in sorted(themes):
|
||||
imp_r2 = [by_r2[a].ratings[t]["importance"] for a in by_r2 if t in by_r2[a].ratings]
|
||||
imp_r3 = [by_r3[a].ratings[t]["importance"] for a in by_r3 if t in by_r3[a].ratings]
|
||||
plaus_r2 = [by_r2[a].ratings[t]["plausibility"] for a in by_r2 if t in by_r2[a].ratings]
|
||||
plaus_r3 = [by_r3[a].ratings[t]["plausibility"] for a in by_r3 if t in by_r3[a].ratings]
|
||||
out[t] = {
|
||||
"imp_median_r2": statistics.median(imp_r2) if imp_r2 else None,
|
||||
"imp_median_r3": statistics.median(imp_r3) if imp_r3 else None,
|
||||
"imp_iqr_r2": _iqr(imp_r2),
|
||||
"imp_iqr_r3": _iqr(imp_r3),
|
||||
"delta_iqr_importance": _iqr(imp_r3) - _iqr(imp_r2),
|
||||
"plaus_iqr_r2": _iqr(plaus_r2),
|
||||
"plaus_iqr_r3": _iqr(plaus_r3),
|
||||
"delta_iqr_plausibility": _iqr(plaus_r3) - _iqr(plaus_r2),
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
def group_stats_from_r2(r2: list[DelphiRatingResponse]) -> dict:
|
||||
themes: set[str] = set()
|
||||
for r in r2: themes.update(r.ratings.keys())
|
||||
stats: dict[str, dict] = {}
|
||||
for t in themes:
|
||||
imps = [r.ratings[t]["importance"] for r in r2 if t in r.ratings]
|
||||
plauss = [r.ratings[t]["plausibility"] for r in r2 if t in r.ratings]
|
||||
stats[t] = {
|
||||
"imp_median": statistics.median(imps) if imps else None,
|
||||
"imp_iqr": _iqr(imps),
|
||||
"plaus_median": statistics.median(plauss) if plauss else None,
|
||||
"plaus_iqr": _iqr(plauss),
|
||||
}
|
||||
return stats
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.cluster import KMeans
|
||||
import yaml
|
||||
from app.models.interview import QSortResponse
|
||||
from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
|
||||
from app.services.interviews.instrument_loader import InstrumentValidationError
|
||||
|
||||
|
||||
class DiversitySubagent:
|
||||
def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
|
||||
self.instrument = self._load(Path(instrument_path))
|
||||
self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
|
||||
self.language = language
|
||||
|
||||
def _load(self, path: Path) -> dict:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
if not isinstance(data, dict) or "statements" not in data or "distribution" not in data:
|
||||
raise InstrumentValidationError(f"invalid diversity instrument: {path}")
|
||||
if sum(data["distribution"]) != len(data["statements"]):
|
||||
raise InstrumentValidationError("distribution sum must equal number of statements")
|
||||
return data
|
||||
|
||||
def _schema_hint(self) -> str:
|
||||
return json.dumps({
|
||||
"placements": {s["statement_id"]: "<int in -3..+3>" for s in self.instrument["statements"]},
|
||||
"likert_axes": {a["axis_id"]: "<int 1-7>" for a in self.instrument["likert_axes"]},
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def _user_prompt(self) -> str:
|
||||
dist = self.instrument["distribution"]
|
||||
buckets = list(range(-3, 4))
|
||||
bucket_desc = ", ".join(f"{b}:{n}" for b, n in zip(buckets, dist))
|
||||
lines = [
|
||||
("Ordnen Sie jede Aussage genau einer Box von -3 (lehne stark ab) bis +3 (stimme stark zu) zu. "
|
||||
f"Die Verteilung ist erzwungen: {bucket_desc}.") if self.language == "de" else
|
||||
("Place every statement into exactly one box from -3 (strongly disagree) to +3 (strongly agree). "
|
||||
f"The distribution is forced: {bucket_desc}."),
|
||||
"",
|
||||
"Statements:",
|
||||
]
|
||||
for s in self.instrument["statements"]:
|
||||
txt = s["de"] if self.language == "de" else s["en"]
|
||||
lines.append(f"- [{s['statement_id']}] {txt}")
|
||||
lines += ["", "Then rate each axis from 1 to 7:"]
|
||||
for a in self.instrument["likert_axes"]:
|
||||
txt = a["de"] if self.language == "de" else a["en"]
|
||||
lines.append(f"- [{a['axis_id']}] {txt}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _validator(self, raw: dict) -> Optional[dict]:
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
placements = raw.get("placements", {})
|
||||
axes = raw.get("likert_axes", {})
|
||||
statements = {s["statement_id"] for s in self.instrument["statements"]}
|
||||
if set(placements.keys()) != statements:
|
||||
return None
|
||||
dist = self.instrument["distribution"]
|
||||
target = {b: n for b, n in zip(range(-3, 4), dist)}
|
||||
got: dict[int, int] = {}
|
||||
coerced_p: dict[str, int] = {}
|
||||
for k, v in placements.items():
|
||||
iv = coerce_int(v)
|
||||
if iv is None or not -3 <= iv <= 3:
|
||||
return None
|
||||
coerced_p[k] = iv
|
||||
got[iv] = got.get(iv, 0) + 1
|
||||
if got != target:
|
||||
return None
|
||||
coerced_a: dict[str, int] = {}
|
||||
for a in self.instrument["likert_axes"]:
|
||||
iv = coerce_int(axes.get(a["axis_id"]))
|
||||
if iv is None or not 1 <= iv <= 7:
|
||||
return None
|
||||
coerced_a[a["axis_id"]] = iv
|
||||
raw["placements"] = coerced_p
|
||||
raw["likert_axes"] = coerced_a
|
||||
return raw
|
||||
|
||||
def administer(self, persona: PersonaRecord) -> QSortResponse:
|
||||
raw = self.interviewer.ask_in_character(
|
||||
persona,
|
||||
user_prompt=self._user_prompt(),
|
||||
schema_hint=self._schema_hint(),
|
||||
validate=self._validator,
|
||||
)
|
||||
return QSortResponse(
|
||||
agent_id=persona.agent_id,
|
||||
placements={k: int(v) for k, v in raw["placements"].items()},
|
||||
likert_axes={k: int(v) for k, v in raw["likert_axes"].items()},
|
||||
)
|
||||
|
||||
|
||||
def _vectorize(r: QSortResponse, statements: list[str], axes: list[str]) -> np.ndarray:
|
||||
return np.array(
|
||||
[r.placements.get(s, 0) for s in statements] +
|
||||
[r.likert_axes.get(a, 4) for a in axes],
|
||||
dtype=float,
|
||||
)
|
||||
|
||||
|
||||
def run_typology(responses: list[QSortResponse], n_clusters: int = 4) -> dict:
|
||||
if not responses:
|
||||
return {"n": 0, "clusters": [], "pca": {"components": [], "explained_variance": []}}
|
||||
statements = sorted({k for r in responses for k in r.placements})
|
||||
axes = sorted({k for r in responses for k in r.likert_axes})
|
||||
X = np.vstack([_vectorize(r, statements, axes) for r in responses])
|
||||
n_clusters = min(n_clusters, len(responses))
|
||||
pca = PCA(n_components=min(5, X.shape[1], X.shape[0]))
|
||||
pcs = pca.fit_transform(X)
|
||||
km = KMeans(n_clusters=n_clusters, n_init=10, random_state=0)
|
||||
labels = km.fit_predict(X)
|
||||
clusters = []
|
||||
for c in range(n_clusters):
|
||||
members = [responses[i].agent_id for i in range(len(responses)) if labels[i] == c]
|
||||
centroid = km.cluster_centers_[c]
|
||||
clusters.append({
|
||||
"cluster_id": int(c),
|
||||
"n": len(members),
|
||||
"agent_ids": members,
|
||||
"top_loadings": {
|
||||
statements[i] if i < len(statements) else axes[i - len(statements)]: float(centroid[i])
|
||||
for i in np.argsort(np.abs(centroid))[::-1][:8].tolist()
|
||||
},
|
||||
})
|
||||
return {
|
||||
"n": len(responses),
|
||||
"clusters": clusters,
|
||||
"pca": {
|
||||
"components": pcs.tolist(),
|
||||
"explained_variance": pca.explained_variance_ratio_.tolist(),
|
||||
"agent_ids": [r.agent_id for r in responses],
|
||||
},
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
from __future__ import annotations
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
from pydantic import ValidationError
|
||||
from app.models.interview import (
|
||||
LikertInstrument, QSortInstrument,
|
||||
)
|
||||
|
||||
class InstrumentValidationError(ValueError):
|
||||
pass
|
||||
|
||||
def _parse_yaml(path: Path) -> dict:
|
||||
if not path.exists():
|
||||
raise InstrumentValidationError(f"instrument file not found: {path}")
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
except yaml.YAMLError as e:
|
||||
raise InstrumentValidationError(f"YAML parse error in {path}: {e}") from e
|
||||
if not isinstance(data, dict):
|
||||
raise InstrumentValidationError(f"top-level YAML must be a mapping in {path}")
|
||||
return data
|
||||
|
||||
def load_likert_instrument(path: Path) -> LikertInstrument:
|
||||
data = _parse_yaml(Path(path))
|
||||
try:
|
||||
return LikertInstrument(**data)
|
||||
except ValidationError as e:
|
||||
raise InstrumentValidationError(str(e)) from e
|
||||
|
||||
def load_qsort_instrument(path: Path) -> QSortInstrument:
|
||||
data = _parse_yaml(Path(path))
|
||||
try:
|
||||
return QSortInstrument(**data)
|
||||
except ValidationError as e:
|
||||
raise InstrumentValidationError(str(e)) from e
|
||||
|
||||
def instrument_hash(path: Path) -> str:
|
||||
data = Path(path).read_bytes()
|
||||
return hashlib.sha256(data).hexdigest()[:16]
|
||||
|
||||
def freeze_snapshot(instruments: dict[str, Path], out_path: Path) -> dict:
|
||||
snapshot = {
|
||||
name: {
|
||||
"path": str(p),
|
||||
"hash": instrument_hash(p),
|
||||
"content": _parse_yaml(p),
|
||||
}
|
||||
for name, p in instruments.items()
|
||||
}
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
return snapshot
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
"""
|
||||
Interview lifecycle hook installer (Task 20).
|
||||
|
||||
install_hooks(manager) registers two callbacks on a SimulationManager:
|
||||
- on_ready → spawn T0 longitudinal pre-survey in a background thread
|
||||
- on_completed → spawn full post-sim batch + synthesis in a background thread
|
||||
|
||||
Both hooks are best-effort: failures are logged but never propagate to the
|
||||
calling thread.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from app.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def install_hooks(manager) -> None:
|
||||
"""Attach interview lifecycle callbacks to a SimulationManager.
|
||||
|
||||
on_ready → spawn T0 longitudinal in a background thread
|
||||
on_completed → spawn full post-sim batch in a background thread
|
||||
Hooks are best-effort; failures only log.
|
||||
"""
|
||||
|
||||
def _on_ready(state) -> None:
|
||||
sim_id = (
|
||||
getattr(state, "simulation_id", None)
|
||||
or getattr(state, "sim_id", None)
|
||||
or getattr(state, "id", None)
|
||||
)
|
||||
if not sim_id:
|
||||
return
|
||||
threading.Thread(target=_run_pre, args=(sim_id,), daemon=True).start()
|
||||
|
||||
def _on_completed(state) -> None:
|
||||
sim_id = (
|
||||
getattr(state, "simulation_id", None)
|
||||
or getattr(state, "sim_id", None)
|
||||
or getattr(state, "id", None)
|
||||
)
|
||||
if not sim_id:
|
||||
return
|
||||
threading.Thread(target=_run_post, args=(sim_id,), daemon=True).start()
|
||||
|
||||
manager.register_on_ready(_on_ready)
|
||||
manager.register_on_completed(_on_completed)
|
||||
|
||||
|
||||
def _run_pre(sim_id: str) -> None:
|
||||
try:
|
||||
from app.api.interview import _build_orchestrator
|
||||
|
||||
orch = _build_orchestrator(sim_id)
|
||||
orch.run_pre()
|
||||
except Exception as e:
|
||||
logger.warning(f"auto pre-survey failed for {sim_id}: {e!r}")
|
||||
|
||||
|
||||
def _run_post(sim_id: str) -> None:
|
||||
try:
|
||||
from app.api.interview import _build_orchestrator
|
||||
from app.services.interview_synthesizer import InterviewSynthesizer
|
||||
|
||||
orch = _build_orchestrator(sim_id)
|
||||
orch.run_post()
|
||||
InterviewSynthesizer(store=orch.store).run()
|
||||
except Exception as e:
|
||||
logger.warning(f"auto post-survey failed for {sim_id}: {e!r}")
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import math
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from app.models.interview import (
|
||||
LikertInstrument, LikertResponse, InterviewPhase,
|
||||
)
|
||||
from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
|
||||
from app.services.interviews.instrument_loader import load_likert_instrument
|
||||
|
||||
|
||||
class LongitudinalSubagent:
|
||||
def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
|
||||
self.instrument: LikertInstrument = load_likert_instrument(Path(instrument_path))
|
||||
self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
|
||||
self.language = language
|
||||
|
||||
def _schema_hint(self) -> str:
|
||||
ids = [i.item_id for i in self.instrument.items]
|
||||
return json.dumps({
|
||||
"responses": {k: "<int 1-5>" for k in ids},
|
||||
"confidence": {k: "<float 0-1>" for k in ids},
|
||||
"open_comment": "<string, optional>",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def _user_prompt(self) -> str:
|
||||
lines = [
|
||||
"Bitte bewerten Sie die folgenden Aussagen auf einer Skala von 1 (lehne stark ab) bis 5 (stimme stark zu)."
|
||||
if self.language == "de"
|
||||
else "Please rate the following statements on a scale from 1 (strongly disagree) to 5 (strongly agree)."
|
||||
]
|
||||
for it in self.instrument.items:
|
||||
txt = it.de if self.language == "de" else it.en
|
||||
lines.append(f"- [{it.item_id}] {txt}")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _validator(self, raw: dict) -> Optional[dict]:
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
resp = raw.get("responses")
|
||||
if not isinstance(resp, dict):
|
||||
return None
|
||||
required = {it.item_id for it in self.instrument.items}
|
||||
if not required.issubset(resp.keys()):
|
||||
return None
|
||||
coerced: dict[str, int] = {}
|
||||
for k, v in resp.items():
|
||||
iv = coerce_int(v)
|
||||
if iv is None or not 1 <= iv <= 5:
|
||||
return None
|
||||
coerced[k] = iv
|
||||
raw["responses"] = coerced
|
||||
return raw
|
||||
|
||||
def administer(self, persona: PersonaRecord, phase: InterviewPhase) -> LikertResponse:
|
||||
raw = self.interviewer.ask_in_character(
|
||||
persona,
|
||||
user_prompt=self._user_prompt(),
|
||||
schema_hint=self._schema_hint(),
|
||||
validate=self._validator,
|
||||
)
|
||||
return LikertResponse(
|
||||
agent_id=persona.agent_id,
|
||||
phase=phase,
|
||||
responses={k: int(v) for k, v in raw["responses"].items()},
|
||||
confidence={k: float(v) for k, v in raw.get("confidence", {}).items()},
|
||||
open_comment=raw.get("open_comment"),
|
||||
)
|
||||
|
||||
|
||||
def run_aggregate(t0: list[LikertResponse], t1: list[LikertResponse]) -> dict:
|
||||
by_t0 = {r.agent_id: r for r in t0}
|
||||
by_t1 = {r.agent_id: r for r in t1}
|
||||
paired = sorted(set(by_t0) & set(by_t1))
|
||||
items: set[str] = set()
|
||||
for r in t0 + t1:
|
||||
items.update(r.responses.keys())
|
||||
per_item: dict[str, dict] = {}
|
||||
for it in sorted(items):
|
||||
deltas = []
|
||||
for aid in paired:
|
||||
v0 = by_t0[aid].responses.get(it)
|
||||
v1 = by_t1[aid].responses.get(it)
|
||||
if v0 is None or v1 is None:
|
||||
continue
|
||||
deltas.append(v1 - v0)
|
||||
if not deltas:
|
||||
per_item[it] = {"mean_delta": None, "n": 0}
|
||||
continue
|
||||
m = sum(deltas) / len(deltas)
|
||||
var = sum((d - m) ** 2 for d in deltas) / max(len(deltas) - 1, 1)
|
||||
per_item[it] = {
|
||||
"mean_delta": m,
|
||||
"sd_delta": math.sqrt(var),
|
||||
"n": len(deltas),
|
||||
"n_positive": sum(1 for d in deltas if d > 0),
|
||||
"n_negative": sum(1 for d in deltas if d < 0),
|
||||
}
|
||||
per_agent: dict[int, dict] = {}
|
||||
for aid in paired:
|
||||
r0 = by_t0[aid].responses
|
||||
r1 = by_t1[aid].responses
|
||||
common = set(r0) & set(r1)
|
||||
total = sum(abs(r1[k] - r0[k]) for k in common)
|
||||
per_agent[aid] = {"total_abs_drift": total, "n_items": len(common)}
|
||||
return {
|
||||
"n_paired": len(paired),
|
||||
"n_t0_only": len(set(by_t0) - set(by_t1)),
|
||||
"n_t1_only": len(set(by_t1) - set(by_t0)),
|
||||
"per_item": per_item,
|
||||
"per_agent": per_agent,
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
import yaml
|
||||
from app.models.interview import ScenarioRating, ScenarioResponse
|
||||
from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
|
||||
|
||||
class ScenarioSubagent:
|
||||
def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
|
||||
with Path(instrument_path).open("r", encoding="utf-8") as f:
|
||||
self.instrument = yaml.safe_load(f)
|
||||
self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
|
||||
self.language = language
|
||||
|
||||
def _schema_hint(self) -> str:
|
||||
sids = [s["scenario_id"] for s in self.instrument["scenarios"]]
|
||||
return json.dumps({
|
||||
"ratings": {sid: {
|
||||
"desirability": "<int 1-7>",
|
||||
"plausibility": "<int 1-7>",
|
||||
"impact_on_my_group": "<int 1-7>",
|
||||
"fairness": "<int 1-7>",
|
||||
"if_woke_up_response": "<string>",
|
||||
} for sid in sids}
|
||||
}, ensure_ascii=False)
|
||||
|
||||
def _user_prompt(self) -> str:
|
||||
head = ("Bewerten Sie jedes der folgenden Szenarien auf vier Dimensionen (1-7) "
|
||||
"und beantworten Sie kurz, was Sie tun würden, wenn Sie in dieser Welt aufwachten.") \
|
||||
if self.language == "de" else \
|
||||
("Rate each of the following scenarios on four dimensions (1-7) "
|
||||
"and briefly answer what you would do if you woke up in this world.")
|
||||
blocks = []
|
||||
for s in self.instrument["scenarios"]:
|
||||
label = s["label_de"] if self.language == "de" else s["label_en"]
|
||||
desc = s["description_de"] if self.language == "de" else s["description_en"]
|
||||
blocks.append(f"--- {s['scenario_id']}: {label} ---\n{desc}")
|
||||
return head + "\n\n" + "\n\n".join(blocks)
|
||||
|
||||
def _validate(self, raw: dict) -> Optional[dict]:
|
||||
if not isinstance(raw, dict): return None
|
||||
sids = {s["scenario_id"] for s in self.instrument["scenarios"]}
|
||||
ratings = raw.get("ratings", {})
|
||||
if set(ratings.keys()) != sids: return None
|
||||
for sid, v in ratings.items():
|
||||
if not isinstance(v, dict): return None
|
||||
for k in ("desirability", "plausibility", "impact_on_my_group", "fairness"):
|
||||
iv = coerce_int(v.get(k))
|
||||
if iv is None or not 1 <= iv <= 7: return None
|
||||
v[k] = iv
|
||||
if not isinstance(v.get("if_woke_up_response", ""), str): return None
|
||||
return raw
|
||||
|
||||
def administer(self, persona: PersonaRecord) -> ScenarioResponse:
|
||||
raw = self.interviewer.ask_in_character(
|
||||
persona, user_prompt=self._user_prompt(),
|
||||
schema_hint=self._schema_hint(), validate=self._validate,
|
||||
)
|
||||
ratings = {sid: ScenarioRating(**v) for sid, v in raw["ratings"].items()}
|
||||
return ScenarioResponse(agent_id=persona.agent_id, ratings=ratings)
|
||||
|
||||
def polarity_matrix(responses: list[ScenarioResponse]) -> dict:
|
||||
matrix: dict[str, dict] = {}
|
||||
sids: set[str] = set()
|
||||
for r in responses: sids.update(r.ratings.keys())
|
||||
for sid in sorted(sids):
|
||||
vals = [r.ratings[sid] for r in responses if sid in r.ratings]
|
||||
if not vals:
|
||||
matrix[sid] = {"n": 0}
|
||||
continue
|
||||
matrix[sid] = {
|
||||
"n": len(vals),
|
||||
"mean_desirability": statistics.mean(v.desirability for v in vals),
|
||||
"mean_plausibility": statistics.mean(v.plausibility for v in vals),
|
||||
"mean_impact": statistics.mean(v.impact_on_my_group for v in vals),
|
||||
"mean_fairness": statistics.mean(v.fairness for v in vals),
|
||||
"sd_desirability": statistics.pstdev([v.desirability for v in vals]) if len(vals) > 1 else 0.0,
|
||||
"sd_plausibility": statistics.pstdev([v.plausibility for v in vals]) if len(vals) > 1 else 0.0,
|
||||
}
|
||||
return matrix
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
from __future__ import annotations
|
||||
import json
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from pydantic import BaseModel
|
||||
from app.models.interview import InterviewPhase, SubagentKind
|
||||
|
||||
|
||||
class InterviewStore:
|
||||
def __init__(self, root: Path, sim_id: str):
|
||||
self.base = Path(root) / "simulations" / sim_id / "interviews"
|
||||
self.base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def start_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path:
|
||||
run_id = time.strftime("%Y%m%dT%H%M%S") + "-" + uuid.uuid4().hex[:6]
|
||||
run_dir = self.base / phase.value / subagent.value / run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
meta = {"run_id": run_id, "phase": phase.value, "subagent": subagent.value,
|
||||
"created_at": time.time()}
|
||||
(run_dir / "run.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
|
||||
return run_dir
|
||||
|
||||
def append_response(self, run_dir: Path, model: BaseModel) -> None:
|
||||
path = run_dir / "responses.jsonl"
|
||||
with path.open("a", encoding="utf-8") as f:
|
||||
f.write(model.model_dump_json() + "\n")
|
||||
|
||||
def append_jsonl(self, run_dir: Path, filename: str, payload: dict | BaseModel) -> None:
|
||||
path = run_dir / filename
|
||||
with path.open("a", encoding="utf-8") as f:
|
||||
if isinstance(payload, BaseModel):
|
||||
f.write(payload.model_dump_json() + "\n")
|
||||
else:
|
||||
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
|
||||
def read_responses(self, run_dir: Path, filename: str = "responses.jsonl") -> list[dict]:
|
||||
path = run_dir / filename
|
||||
if not path.exists():
|
||||
return []
|
||||
return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
||||
|
||||
def write_aggregate(self, run_dir: Path, payload: dict) -> None:
|
||||
(run_dir / "aggregate.json").write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
def write_named(self, run_dir: Path, name: str, payload: Any) -> None:
|
||||
(run_dir / name).write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
def audit(
|
||||
self,
|
||||
run_dir: Path,
|
||||
agent_id: int | None,
|
||||
event: str,
|
||||
detail: str | dict = "",
|
||||
) -> None:
|
||||
entry = {"ts": time.time(), "agent_id": agent_id, "event": event, "detail": detail}
|
||||
with (run_dir / "audit.jsonl").open("a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
|
||||
|
||||
def mark_latest(self, run_dir: Path) -> None:
|
||||
pointer = run_dir.parent / "latest.json"
|
||||
pointer.write_text(json.dumps({
|
||||
"run_dir": str(run_dir.relative_to(self.base)),
|
||||
}), encoding="utf-8")
|
||||
|
||||
def latest_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path | None:
|
||||
pointer = self.base / phase.value / subagent.value / "latest.json"
|
||||
if not pointer.exists():
|
||||
return None
|
||||
rel = json.loads(pointer.read_text())["run_dir"]
|
||||
path = self.base / rel
|
||||
return path if path.exists() else None
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
from __future__ import annotations
|
||||
from typing import Any, Optional
|
||||
from app.models.interview import (
|
||||
LikertResponse, QSortResponse, DelphiRatingResponse, ScenarioResponse, SubagentKind,
|
||||
)
|
||||
|
||||
class InterviewZepWriter:
|
||||
"""Writes interview episodes (per-agent responses, aggregates) to a Zep graph.
|
||||
|
||||
Expects ``memory_updater`` to expose ``add_text_episode(graph_id, text)`` — that
|
||||
is the method the real ``ZepGraphMemoryUpdater`` provides for synchronous text
|
||||
writes outside the agent-activity batch pipeline. A no-op shim with the same
|
||||
method is acceptable for tests and stub mode.
|
||||
"""
|
||||
def __init__(self, memory_updater, graph_id: str):
|
||||
self.updater = memory_updater
|
||||
self.graph_id = graph_id
|
||||
|
||||
def _emit(self, text: str) -> None:
|
||||
if hasattr(self.updater, "add_text_episode"):
|
||||
self.updater.add_text_episode(self.graph_id, text)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"memory_updater is missing add_text_episode(graph_id, text); "
|
||||
"InterviewZepWriter requires the explicit text-episode API."
|
||||
)
|
||||
|
||||
def _summarize_likert(self, r: LikertResponse) -> str:
|
||||
mean_v = sum(r.responses.values()) / max(len(r.responses), 1)
|
||||
top = sorted(r.responses.items(), key=lambda kv: -kv[1])[:3]
|
||||
bot = sorted(r.responses.items(), key=lambda kv: kv[1])[:3]
|
||||
return (f"mean={mean_v:.2f}; agrees with {[k for k,_ in top]}; "
|
||||
f"disagrees with {[k for k,_ in bot]}")
|
||||
|
||||
def _summarize_qsort(self, r: QSortResponse) -> str:
|
||||
plus = [k for k, v in r.placements.items() if v >= 2]
|
||||
minus = [k for k, v in r.placements.items() if v <= -2]
|
||||
return f"+strongly:{plus}; -strongly:{minus}"
|
||||
|
||||
def _summarize_scenario(self, r: ScenarioResponse) -> str:
|
||||
parts = [f"{sid}: des={rt.desirability} plaus={rt.plausibility}"
|
||||
for sid, rt in r.ratings.items()]
|
||||
return "; ".join(parts)
|
||||
|
||||
def write_per_agent(
|
||||
self, subagent: SubagentKind, response: Any, agent_name: str,
|
||||
phase: Optional[str] = None,
|
||||
) -> None:
|
||||
if isinstance(response, LikertResponse):
|
||||
phase = phase or response.phase.value
|
||||
summary = self._summarize_likert(response)
|
||||
elif isinstance(response, QSortResponse):
|
||||
phase = phase or "T1"
|
||||
summary = self._summarize_qsort(response)
|
||||
elif isinstance(response, ScenarioResponse):
|
||||
phase = phase or "T1"
|
||||
summary = self._summarize_scenario(response)
|
||||
elif isinstance(response, DelphiRatingResponse):
|
||||
phase = phase or f"T1/R{response.round}"
|
||||
summary = f"round={response.round}; {len(response.ratings)} themes rated"
|
||||
else:
|
||||
phase = phase or "T1"
|
||||
summary = str(response)[:200]
|
||||
text = f"Agent {agent_name} (interview/{subagent.value}/{phase}): {summary}"
|
||||
self._emit(text)
|
||||
|
||||
def write_aggregate(self, subagent: SubagentKind, summary: str) -> None:
|
||||
self._emit(f"Interview aggregate ({subagent.value}): {summary}")
|
||||
|
|
@ -1091,8 +1091,10 @@ class OasisProfileGenerator:
|
|||
with open(file_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
|
||||
# 写入OASIS要求的表头
|
||||
headers = ['user_id', 'name', 'username', 'user_char', 'description']
|
||||
# 写入表头:OASIS要求的5列 + 额外的source_entity_uuid列(反向链接到Zep实体)。
|
||||
# OASIS按列名读取,额外的列不会影响其行为,但允许下游(面试子系统等)
|
||||
# 重建 agent_id -> Zep entity uuid 的映射。
|
||||
headers = ['user_id', 'name', 'username', 'user_char', 'description', 'source_entity_uuid']
|
||||
writer.writerow(headers)
|
||||
|
||||
# 写入数据行
|
||||
|
|
@ -1112,7 +1114,8 @@ class OasisProfileGenerator:
|
|||
profile.name, # name: 真实姓名
|
||||
profile.user_name, # username: 用户名
|
||||
user_char, # user_char: 完整人设(内部LLM使用)
|
||||
description # description: 简短简介(外部显示)
|
||||
description, # description: 简短简介(外部显示)
|
||||
profile.source_entity_uuid or "", # source_entity_uuid: Zep实体UUID
|
||||
]
|
||||
writer.writerow(row)
|
||||
|
||||
|
|
@ -1184,6 +1187,12 @@ class OasisProfileGenerator:
|
|||
item["profession"] = profile.profession
|
||||
if profile.interested_topics:
|
||||
item["interested_topics"] = profile.interested_topics
|
||||
# source_entity_uuid: 反向链接到Zep实体,下游(面试子系统等)需要此映射以
|
||||
# 在Zep图谱中查找Agent的上下文。仅在存在时写入。
|
||||
if profile.source_entity_uuid:
|
||||
item["source_entity_uuid"] = profile.source_entity_uuid
|
||||
if profile.source_entity_type:
|
||||
item["source_entity_type"] = profile.source_entity_type
|
||||
|
||||
data.append(item)
|
||||
|
||||
|
|
|
|||
|
|
@ -129,6 +129,13 @@ class SimulationManager:
|
|||
'../../uploads/simulations'
|
||||
)
|
||||
|
||||
# Class-level hook registries so callbacks survive across instances.
|
||||
# The Flask API endpoints construct fresh `SimulationManager()` instances per request,
|
||||
# while lifecycle hooks are registered once at app startup — storing the lists on the
|
||||
# instance would silently drop those hooks on every request.
|
||||
_on_ready_hooks: list = []
|
||||
_on_completed_hooks: list = []
|
||||
|
||||
def __init__(self):
|
||||
# 确保目录存在
|
||||
os.makedirs(self.SIMULATION_DATA_DIR, exist_ok=True)
|
||||
|
|
@ -191,6 +198,46 @@ class SimulationManager:
|
|||
self._simulations[simulation_id] = state
|
||||
return state
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle hook registration (class-level — see class docstring)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def register_on_ready(cls, fn) -> None:
|
||||
"""Register a callback invoked when a simulation transitions to READY.
|
||||
|
||||
Class-level so hooks registered at app startup remain visible to every
|
||||
SimulationManager() instance constructed later (e.g. per-request in Flask).
|
||||
"""
|
||||
cls._on_ready_hooks.append(fn)
|
||||
|
||||
@classmethod
|
||||
def register_on_completed(cls, fn) -> None:
|
||||
"""Register a callback invoked when a simulation transitions to COMPLETED.
|
||||
|
||||
Class-level so hooks registered at app startup remain visible to every
|
||||
SimulationManager() instance constructed later (e.g. per-request in Flask).
|
||||
"""
|
||||
cls._on_completed_hooks.append(fn)
|
||||
|
||||
def _notify_on_ready(self, state: "SimulationState") -> None:
|
||||
"""Invoke all on_ready hooks; exceptions are isolated per hook."""
|
||||
for fn in list(type(self)._on_ready_hooks):
|
||||
try:
|
||||
fn(state)
|
||||
except Exception as e:
|
||||
logger.warning(f"on_ready hook failed: {e!r}")
|
||||
|
||||
def _notify_on_completed(self, state: "SimulationState") -> None:
|
||||
"""Invoke all on_completed hooks; exceptions are isolated per hook."""
|
||||
for fn in list(type(self)._on_completed_hooks):
|
||||
try:
|
||||
fn(state)
|
||||
except Exception as e:
|
||||
logger.warning(f"on_completed hook failed: {e!r}")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def create_simulation(
|
||||
self,
|
||||
project_id: str,
|
||||
|
|
@ -441,6 +488,7 @@ class SimulationManager:
|
|||
# 更新状态
|
||||
state.status = SimulationStatus.READY
|
||||
self._save_simulation_state(state)
|
||||
self._notify_on_ready(state)
|
||||
|
||||
logger.info(f"模拟准备完成: {simulation_id}, "
|
||||
f"entities={state.entities_count}, profiles={state.profiles_count}")
|
||||
|
|
|
|||
|
|
@ -227,6 +227,28 @@ class SimulationRunner:
|
|||
# 图谱记忆更新配置
|
||||
_graph_memory_enabled: Dict[str, bool] = {} # simulation_id -> enabled
|
||||
|
||||
# Completion callbacks registered from outside (e.g. SimulationManager lifecycle hooks).
|
||||
# Each callable receives the SimulationRunState that just transitioned to COMPLETED.
|
||||
_on_completed_callbacks: list = []
|
||||
|
||||
@classmethod
|
||||
def register_on_completed(cls, fn) -> None:
|
||||
"""Register a callback invoked when a simulation transitions to COMPLETED.
|
||||
|
||||
The callback receives the SimulationRunState instance. It is called from
|
||||
the monitor daemon thread, so keep it short or hand off to another thread.
|
||||
"""
|
||||
cls._on_completed_callbacks.append(fn)
|
||||
|
||||
@classmethod
|
||||
def _fire_on_completed(cls, state: SimulationRunState) -> None:
|
||||
"""Invoke all registered on_completed callbacks; exceptions are isolated."""
|
||||
for fn in list(cls._on_completed_callbacks):
|
||||
try:
|
||||
fn(state)
|
||||
except Exception as e:
|
||||
logger.warning(f"on_completed callback failed: {e!r}")
|
||||
|
||||
@classmethod
|
||||
def get_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]:
|
||||
"""获取运行状态"""
|
||||
|
|
@ -528,6 +550,7 @@ class SimulationRunner:
|
|||
state.runner_status = RunnerStatus.COMPLETED
|
||||
state.completed_at = datetime.now().isoformat()
|
||||
logger.info(f"模拟完成: {simulation_id}")
|
||||
cls._fire_on_completed(state)
|
||||
else:
|
||||
state.runner_status = RunnerStatus.FAILED
|
||||
# 从主日志文件读取错误信息
|
||||
|
|
@ -638,6 +661,7 @@ class SimulationRunner:
|
|||
state.runner_status = RunnerStatus.COMPLETED
|
||||
state.completed_at = datetime.now().isoformat()
|
||||
logger.info(f"所有平台模拟已完成: {state.simulation_id}")
|
||||
cls._fire_on_completed(state)
|
||||
|
||||
# 更新轮次信息(从 round_end 事件)
|
||||
elif event_type == "round_end":
|
||||
|
|
|
|||
|
|
@ -337,6 +337,44 @@ class ZepGraphMemoryUpdater:
|
|||
self._total_activities += 1
|
||||
logger.debug(f"添加活动到Zep队列: {activity.agent_name} - {activity.action_type}")
|
||||
|
||||
def add_text_episode(self, graph_id: str, text: str) -> None:
|
||||
"""
|
||||
直接将一段文本写入Zep图谱(同步发送,不经过批量队列)
|
||||
|
||||
用于面试子系统(InterviewZepWriter)等需要立即写入、不属于
|
||||
agent活动流水线的场景。绕过 _send_batch_activities 的批量逻辑,
|
||||
但仍带重试。
|
||||
|
||||
Args:
|
||||
graph_id: 目标图谱ID(允许覆盖 self.graph_id,便于多图场景)
|
||||
text: 要发送的文本内容
|
||||
"""
|
||||
if not text:
|
||||
return
|
||||
target_graph_id = graph_id or self.graph_id
|
||||
if not target_graph_id:
|
||||
logger.warning("add_text_episode 调用时未指定graph_id,跳过")
|
||||
return
|
||||
|
||||
for attempt in range(self.MAX_RETRIES):
|
||||
try:
|
||||
self.client.graph.add(
|
||||
graph_id=target_graph_id,
|
||||
type="text",
|
||||
data=text,
|
||||
)
|
||||
self._total_sent += 1
|
||||
self._total_items_sent += 1
|
||||
logger.debug(f"add_text_episode 发送成功 (graph={target_graph_id}, len={len(text)})")
|
||||
return
|
||||
except Exception as e:
|
||||
if attempt < self.MAX_RETRIES - 1:
|
||||
logger.warning(f"add_text_episode 失败 (尝试 {attempt + 1}/{self.MAX_RETRIES}): {e}")
|
||||
time.sleep(self.RETRY_DELAY * (attempt + 1))
|
||||
else:
|
||||
logger.error(f"add_text_episode 失败,已重试{self.MAX_RETRIES}次: {e}")
|
||||
self._failed_count += 1
|
||||
|
||||
def add_activity_from_dict(self, data: Dict[str, Any], platform: str):
|
||||
"""
|
||||
从字典数据添加活动
|
||||
|
|
|
|||
|
|
@ -32,6 +32,82 @@ class LLMClient:
|
|||
base_url=self.base_url
|
||||
)
|
||||
|
||||
def _stub_key(self, messages: list[dict]) -> str:
|
||||
user_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
|
||||
sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
|
||||
# Allow callers to embed an explicit stub_key=... token
|
||||
for chunk in user_msg.split():
|
||||
if chunk.startswith("stub_key="):
|
||||
return chunk[len("stub_key="):]
|
||||
import hashlib
|
||||
return hashlib.sha256((sys_msg + "|" + user_msg).encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
def _stub_response(self, messages: list[dict]) -> str:
|
||||
import json as _json
|
||||
return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)
|
||||
|
||||
def _stub_response_json(self, messages: list[dict]) -> dict:
|
||||
import hashlib, json as _json
|
||||
sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
|
||||
usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
|
||||
h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
|
||||
seed = int(h[:8], 16)
|
||||
rng = (seed % 5) + 1
|
||||
|
||||
# Longitudinal Likert (12 items)
|
||||
if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
|
||||
ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
|
||||
"mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
|
||||
return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
|
||||
"confidence": {k: 0.6 for k in ids},
|
||||
"open_comment": f"stub:{h[:8]}"}
|
||||
|
||||
# Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
|
||||
if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
|
||||
buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
|
||||
stmts = [f"st_{i+1:02d}" for i in range(24)]
|
||||
# shuffle deterministically
|
||||
order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
|
||||
placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
|
||||
return {
|
||||
"placements": placements,
|
||||
"likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
|
||||
["ax_pres_extr","ax_loc_eu","ax_sci_trad",
|
||||
"ax_ind_col","ax_short_long","ax_mkt_reg"])},
|
||||
}
|
||||
|
||||
# Scenario: S1..S4 × 4 dims
|
||||
if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
|
||||
return {"ratings": {sid: {
|
||||
"desirability": ((seed >> (i*3)) % 7) + 1,
|
||||
"plausibility": ((seed >> (i*3+1)) % 7) + 1,
|
||||
"impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
|
||||
"fairness": ((seed >> (i*3+4)) % 7) + 1,
|
||||
"if_woke_up_response": f"act-{sid}-{h[:4]}",
|
||||
} for i, sid in enumerate(["S1","S2","S3","S4"])}}
|
||||
|
||||
# Delphi R1: q1..q4 free text
|
||||
if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
|
||||
return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
|
||||
|
||||
# Delphi theme extraction (no in-character system prompt)
|
||||
if "extract distinct thematic codes" in sys_msg:
|
||||
return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
|
||||
|
||||
# Delphi R2 (rate) or R3 (revise)
|
||||
if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
|
||||
or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
|
||||
theme_ids = [f"theme_{i}" for i in range(5)]
|
||||
out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
|
||||
"plausibility": ((seed >> (i*2+1)) % 5) + 1}
|
||||
for i, tid in enumerate(theme_ids)}}
|
||||
if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
|
||||
out["justification"] = "stub-revision"
|
||||
return out
|
||||
|
||||
# Fallback
|
||||
return {"stub_key": h[:12], "value": rng}
|
||||
|
||||
def chat(
|
||||
self,
|
||||
messages: List[Dict[str, str]],
|
||||
|
|
@ -51,6 +127,10 @@ class LLMClient:
|
|||
Returns:
|
||||
模型响应文本
|
||||
"""
|
||||
from app.config import Config
|
||||
if getattr(Config, "LLM_STUB_MODE", False):
|
||||
return self._stub_response(messages)
|
||||
|
||||
kwargs = {
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
|
|
@ -84,6 +164,10 @@ class LLMClient:
|
|||
Returns:
|
||||
解析后的JSON对象
|
||||
"""
|
||||
from app.config import Config
|
||||
if getattr(Config, "LLM_STUB_MODE", False):
|
||||
return self._stub_response_json(messages)
|
||||
|
||||
response = self.chat(
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,11 @@ dependencies = [
|
|||
# 工具库
|
||||
"python-dotenv>=1.0.0",
|
||||
"pydantic>=2.0.0",
|
||||
"PyYAML>=6.0",
|
||||
"scikit-learn>=1.4",
|
||||
"scipy>=1.12",
|
||||
"numpy>=1.26",
|
||||
"pandas>=2.1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,8 @@
|
|||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
addopts = -ra --strict-markers
|
||||
markers =
|
||||
integration: marks integration tests (deselect with -m 'not integration')
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
name: delphi_v1
|
||||
version: "1.0"
|
||||
language_default: de
|
||||
rounds: 3
|
||||
questions:
|
||||
- {question_id: q1, de: "Welche drei Faktoren werden die deutsche Fischerei bis 2040 am stärksten prägen?", en: "Which three factors will most shape German fisheries by 2040?"}
|
||||
- {question_id: q2, de: "Welche Akteurinnen und Akteure sind heute entscheidend, werden aber unterschätzt?", en: "Which actors are decisive today but underestimated?"}
|
||||
- {question_id: q3, de: "Was sollte sich in den nächsten fünf Jahren ändern, damit die Fischerei eine Zukunft hat?", en: "What should change in the next five years for fisheries to have a future?"}
|
||||
- {question_id: q4, de: "Welcher Trend macht Ihnen am meisten Hoffnung – und welcher am meisten Sorge?", en: "Which trend gives you most hope — and which most concern?"}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
name: diversity_v1
|
||||
version: "1.0"
|
||||
language_default: de
|
||||
distribution: [2, 3, 4, 6, 4, 3, 2] # buckets from -3 to +3, total 24
|
||||
statements:
|
||||
- {statement_id: st_01, de: "Die Ostsee gehört den Fischern, die hier seit Generationen leben.", en: "The Baltic belongs to fishers who have lived here for generations."}
|
||||
- {statement_id: st_02, de: "MSC-Zertifizierung schützt vor allem große Konzerne.", en: "MSC certification mainly protects large corporations."}
|
||||
- {statement_id: st_03, de: "Wissenschaftliche Quoten sind die einzige Grundlage für Politik.", en: "Scientific quotas are the only legitimate basis for policy."}
|
||||
- {statement_id: st_04, de: "Aquakultur kann Ostseefischerei ersetzen.", en: "Aquaculture can replace Baltic fisheries."}
|
||||
- {statement_id: st_05, de: "Sportfischer schaden den Beständen mehr als die Berufsfischer.", en: "Recreational anglers harm stocks more than commercial fishers."}
|
||||
- {statement_id: st_06, de: "Die EU-Fischereipolitik kennt die Ostsee nicht.", en: "EU fisheries policy doesn't understand the Baltic."}
|
||||
- {statement_id: st_07, de: "Großtechnische Fischerei ist effizienter und damit nachhaltiger.", en: "Industrial fisheries are more efficient and therefore more sustainable."}
|
||||
- {statement_id: st_08, de: "Wer Fisch isst, sollte mehr dafür bezahlen.", en: "Those who eat fish should pay more for it."}
|
||||
- {statement_id: st_09, de: "Die Kleinfischerei muss subventioniert werden.", en: "Small-scale fisheries must be subsidised."}
|
||||
- {statement_id: st_10, de: "Marine Schutzgebiete sind reine Symbolpolitik.", en: "Marine protected areas are mere symbolism."}
|
||||
- {statement_id: st_11, de: "Russlands Krieg ändert alles in der Ostsee.", en: "Russia's war changes everything in the Baltic."}
|
||||
- {statement_id: st_12, de: "Nur drastische Reduktion der Fangmengen rettet die Bestände.", en: "Only drastic catch reductions will save the stocks."}
|
||||
- {statement_id: st_13, de: "NGOs übertreiben die Krise systematisch.", en: "NGOs systematically exaggerate the crisis."}
|
||||
- {statement_id: st_14, de: "Klimawandel ist das eigentliche Problem, nicht die Fischerei.", en: "Climate change is the real problem, not fisheries."}
|
||||
- {statement_id: st_15, de: "Tradition zählt mehr als kurzfristige Bestandszahlen.", en: "Tradition matters more than short-term stock numbers."}
|
||||
- {statement_id: st_16, de: "Verbraucher entscheiden über die Zukunft des Fisches.", en: "Consumers decide the future of fish."}
|
||||
- {statement_id: st_17, de: "Ohne Generalstreik der Fischer ändert sich nichts.", en: "Without a fishers' general strike, nothing will change."}
|
||||
- {statement_id: st_18, de: "Die Bundesregierung sollte Kutter aufkaufen und stilllegen.", en: "The federal government should buy out and decommission boats."}
|
||||
- {statement_id: st_19, de: "Die Dorschkrise ist Folge gescheiterter Politik.", en: "The cod crisis is the result of policy failure."}
|
||||
- {statement_id: st_20, de: "Ostsee-Aquakultur ist ökologisch problematisch.", en: "Baltic aquaculture is ecologically problematic."}
|
||||
- {statement_id: st_21, de: "Junge Menschen werden keinen Fischereibetrieb mehr übernehmen.", en: "Young people will no longer take over fishing businesses."}
|
||||
- {statement_id: st_22, de: "Markt regelt sich selbst, auch beim Fisch.", en: "The market regulates itself, also for fish."}
|
||||
- {statement_id: st_23, de: "Lokale Genossenschaften sind die Lösung.", en: "Local cooperatives are the solution."}
|
||||
- {statement_id: st_24, de: "In 20 Jahren gibt es keine deutsche Ostseefischerei mehr.", en: "In 20 years there will be no German Baltic fisheries left."}
|
||||
likert_axes:
|
||||
- {axis_id: ax_pres_extr, scale: 7, de: "Bewahrung (1) vs. Nutzung (7)", en: "Preservation (1) vs. Extraction (7)"}
|
||||
- {axis_id: ax_loc_eu, scale: 7, de: "Lokal (1) vs. EU-zentral (7)", en: "Local (1) vs. EU-central (7)"}
|
||||
- {axis_id: ax_sci_trad, scale: 7, de: "Wissenschaft (1) vs. Tradition (7)", en: "Science-led (1) vs. Tradition-led (7)"}
|
||||
- {axis_id: ax_ind_col, scale: 7, de: "Individuum (1) vs. Kollektiv (7)", en: "Individual (1) vs. Collective (7)"}
|
||||
- {axis_id: ax_short_long,scale: 7, de: "Kurzfristig (1) vs. Langfristig (7)", en: "Short-term (1) vs. Long-term (7)"}
|
||||
- {axis_id: ax_mkt_reg, scale: 7, de: "Markt (1) vs. Regulierung (7)", en: "Market (1) vs. Regulation (7)"}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
name: longitudinal_v1
|
||||
version: "1.0"
|
||||
language_default: de
|
||||
items:
|
||||
# Stock status & recovery
|
||||
- {item_id: stk_1, family: stocks, scale: 5,
|
||||
de: "Der westliche Dorschbestand wird sich bis 2035 erholen.",
|
||||
en: "The Western Baltic cod stock will recover by 2035."}
|
||||
- {item_id: stk_2, family: stocks, scale: 5,
|
||||
de: "Der Heringsbestand in der westlichen Ostsee ist nicht mehr zu retten.",
|
||||
en: "The Western Baltic herring stock can no longer be saved.",
|
||||
reverse_coded: true}
|
||||
- {item_id: stk_3, family: stocks, scale: 5,
|
||||
de: "Wissenschaftliche Bestandsschätzungen sind generell zuverlässig.",
|
||||
en: "Scientific stock assessments are generally reliable."}
|
||||
# Governance & CFP
|
||||
- {item_id: gov_1, family: governance, scale: 5,
|
||||
de: "Die Gemeinsame Fischereipolitik der EU scheitert beim Schutz der Ostseefische.",
|
||||
en: "The EU Common Fisheries Policy fails to protect Baltic fish.",
|
||||
reverse_coded: true}
|
||||
- {item_id: gov_2, family: governance, scale: 5,
|
||||
de: "Entscheidungen über Fangquoten sollten stärker lokal getroffen werden.",
|
||||
en: "Decisions on catch quotas should be taken more locally."}
|
||||
- {item_id: gov_3, family: governance, scale: 5,
|
||||
de: "Die deutsche Bundesregierung handelt entschlossen bei Fischereifragen.",
|
||||
en: "The German federal government acts decisively on fisheries issues."}
|
||||
# Market & MSC
|
||||
- {item_id: mkt_1, family: market, scale: 5,
|
||||
de: "Nur MSC-zertifizierter Fisch sollte verkauft werden dürfen.",
|
||||
en: "Only MSC-certified fish should be allowed for sale."}
|
||||
- {item_id: mkt_2, family: market, scale: 5,
|
||||
de: "Importierter Fisch verdrängt die deutsche Kleinfischerei.",
|
||||
en: "Imported fish displaces German small-scale fisheries."}
|
||||
- {item_id: mkt_3, family: market, scale: 5,
|
||||
de: "Verbraucher zahlen gerne mehr für nachhaltigen Ostseefisch.",
|
||||
en: "Consumers gladly pay more for sustainable Baltic fish."}
|
||||
# Climate & adaptation
|
||||
- {item_id: clm_1, family: climate, scale: 5,
|
||||
de: "Der Klimawandel macht traditionelle Ostseefischerei unmöglich.",
|
||||
en: "Climate change makes traditional Baltic fisheries impossible.",
|
||||
reverse_coded: true}
|
||||
- {item_id: clm_2, family: climate, scale: 5,
|
||||
de: "Aquakultur ist die Zukunft der deutschen Fischwirtschaft.",
|
||||
en: "Aquaculture is the future of the German fishing industry."}
|
||||
- {item_id: clm_3, family: climate, scale: 5,
|
||||
de: "Die Fischerei muss sich grundlegend an neue Arten anpassen.",
|
||||
en: "Fisheries must fundamentally adapt to new species."}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
name: scenario_v1
|
||||
version: "1.0"
|
||||
language_default: de
|
||||
scenarios:
|
||||
- scenario_id: S1
|
||||
label_de: "Erholung 2040"
|
||||
label_en: "Recovery 2040"
|
||||
description_de: |
|
||||
Bis 2040 haben sich Dorsch- und Heringsbestände in der westlichen Ostsee
|
||||
deutlich erholt. MSC-Zertifizierung ist branchenweit Standard. Die kleine
|
||||
Küstenfischerei hat sich stabilisiert; die Politik gilt als erfolgreich.
|
||||
description_en: |
|
||||
By 2040, Western Baltic cod and herring stocks have substantially recovered.
|
||||
MSC certification is industry-wide standard. Small-scale coastal fisheries
|
||||
have stabilised; policy is regarded as successful.
|
||||
- scenario_id: S2
|
||||
label_de: "Kollaps 2040"
|
||||
label_en: "Collapse 2040"
|
||||
description_de: |
|
||||
Bis 2040 sind Dorsch- und Heringsbestände zusammengebrochen. Die Flotte
|
||||
ist halbiert, Aquakultur dominiert den Markt, Häfen veröden.
|
||||
description_en: |
|
||||
By 2040, cod and herring stocks have collapsed. The fleet is halved,
|
||||
aquaculture dominates the market, harbour towns decline.
|
||||
- scenario_id: S3
|
||||
label_de: "Festung Europa 2040"
|
||||
label_en: "Fortress Europe 2040"
|
||||
description_de: |
|
||||
Bis 2040 verfolgt die EU eine protektionistische Politik mit hohen Importzöllen,
|
||||
Meeresschutzgebiete bedecken 30% der Ostsee, Sportfischerei ist stark eingeschränkt.
|
||||
description_en: |
|
||||
By 2040, the EU pursues a protectionist policy with high import tariffs,
|
||||
MPAs cover 30% of the Baltic, recreational fishing is strongly curtailed.
|
||||
- scenario_id: S4
|
||||
label_de: "Privatisierung 2040"
|
||||
label_en: "Privatisation 2040"
|
||||
description_de: |
|
||||
Bis 2040 sind Fangrechte als handelbare Quoten (ITQs) etabliert. Die Branche
|
||||
hat sich konsolidiert; nur große, kapitalstarke Unternehmen sind übrig.
|
||||
description_en: |
|
||||
By 2040, fishing rights are tradable quotas (ITQs). The industry has
|
||||
consolidated; only large, well-capitalised firms remain.
|
||||
dimensions:
|
||||
- {dimension_id: desirability, scale: 7,
|
||||
de: "Wie wünschenswert ist dieses Szenario?", en: "How desirable is this scenario?"}
|
||||
- {dimension_id: plausibility, scale: 7,
|
||||
de: "Wie plausibel ist dieses Szenario?", en: "How plausible is this scenario?"}
|
||||
- {dimension_id: impact_on_my_group, scale: 7,
|
||||
de: "Wie stark trifft es Ihre Gruppe?", en: "How strongly does it affect your group?"}
|
||||
- {dimension_id: fairness, scale: 7,
|
||||
de: "Wie fair ist dieses Szenario?", en: "How fair is this scenario?"}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import os
|
||||
import sys
|
||||
import pathlib
|
||||
import pytest
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
os.environ.setdefault("LLM_API_KEY", "test")
|
||||
os.environ.setdefault("LLM_BASE_URL", "https://example.invalid")
|
||||
os.environ.setdefault("LLM_MODEL_NAME", "test-model")
|
||||
os.environ.setdefault("ZEP_API_KEY", "test")
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_uploads(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
|
||||
return tmp_path
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from app.config import Config
|
||||
from app.models.interview import SubagentKind, InterviewPhase
|
||||
from app.services.interviews.adapters import FileSystemPersonaProvider
|
||||
from app.services.interviews.base import MemoryDigest
|
||||
from app.services.interviews.zep_writer import InterviewZepWriter
|
||||
from app.services.interview_orchestrator import InterviewOrchestrator
|
||||
from app.services.interview_synthesizer import InterviewSynthesizer
|
||||
from app.utils.llm_client import LLMClient
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
|
||||
|
||||
class _NullUpdater:
|
||||
def __init__(self): self.events = []
|
||||
def add_text_episode(self, graph_id, text): self.events.append(text)
|
||||
|
||||
class _StaticMem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_uploads(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
Config.LLM_STUB_MODE = True
|
||||
sim_dir = tmp_path / "simulations" / "intg_sim"
|
||||
sim_dir.mkdir(parents=True)
|
||||
profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
|
||||
"persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
|
||||
(sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
|
||||
return tmp_path
|
||||
|
||||
def _make_orch(tmp_path):
|
||||
sim_dir = tmp_path / "simulations" / "intg_sim"
|
||||
personas = FileSystemPersonaProvider(
|
||||
reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
|
||||
)
|
||||
llm = LLMClient(api_key="x", base_url="x", model="x")
|
||||
updater = _NullUpdater()
|
||||
writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
|
||||
return InterviewOrchestrator(
|
||||
llm=llm, memory=_StaticMem(), personas=personas,
|
||||
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
|
||||
zep_writer=writer, max_workers=2, language="de",
|
||||
)
|
||||
|
||||
def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
|
||||
tmp = seeded_uploads
|
||||
orch = _make_orch(tmp)
|
||||
|
||||
pre = orch.run_pre()
|
||||
assert pre["longitudinal"]["n_responded"] >= 1
|
||||
|
||||
post = orch.run_post()
|
||||
assert "longitudinal" in post
|
||||
assert "diversity" in post
|
||||
assert "scenario" in post
|
||||
assert "delphi" in post
|
||||
|
||||
synth = InterviewSynthesizer(store=orch.store)
|
||||
report = synth.run()
|
||||
assert "Stakeholder Interview Synthesis" in report
|
||||
assert "Limitations" in report
|
||||
|
||||
csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
|
||||
assert csv_path.exists()
|
||||
lines = csv_path.read_text().splitlines()
|
||||
assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
|
||||
|
||||
def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
|
||||
tmp = seeded_uploads
|
||||
orch = _make_orch(tmp)
|
||||
orch.run_pre()
|
||||
first = orch.run_post()
|
||||
second = orch.rerun(SubagentKind.SCENARIO)
|
||||
first_scn = first["scenario"]["run_dir"]
|
||||
second_scn = second["scenario"]["run_dir"]
|
||||
assert first_scn != second_scn
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
import csv
|
||||
import json
|
||||
from pathlib import Path
|
||||
from app.services.interviews.adapters import (
|
||||
FileSystemPersonaProvider, ZepMemoryProvider,
|
||||
)
|
||||
|
||||
def _write_reddit_profiles(tmp_path: Path):
|
||||
data = [
|
||||
{"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
|
||||
"persona": "I am a small-scale Baltic fisher.", "profession": "fisher", "bio": ""},
|
||||
{"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
|
||||
"persona": "I work for an environmental NGO.", "profession": "ngo_staff", "bio": ""},
|
||||
]
|
||||
p = tmp_path / "reddit_profiles.json"
|
||||
p.write_text(json.dumps(data), encoding="utf-8")
|
||||
return p
|
||||
|
||||
def test_file_system_persona_provider_reads_reddit_json(tmp_path):
|
||||
p = _write_reddit_profiles(tmp_path)
|
||||
provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
|
||||
personas = provider.all()
|
||||
assert len(personas) == 2
|
||||
assert personas[0].name == "Fischer Müller"
|
||||
assert personas[0].agent_id == 0
|
||||
|
||||
def test_zep_memory_provider_returns_empty_when_unavailable():
|
||||
class _BrokenReader:
|
||||
def get_entity_with_context(self, *a, **kw):
|
||||
raise RuntimeError("offline")
|
||||
prov = ZepMemoryProvider(entity_reader=_BrokenReader(), graph_id="g1",
|
||||
agent_to_entity={0: "uuid-zero"})
|
||||
d = prov.get_digest(0)
|
||||
assert d.available is False
|
||||
assert d.text != ""
|
||||
|
||||
def test_zep_memory_provider_truncates_to_max_chars():
|
||||
class _R:
|
||||
def get_entity_with_context(self, *a, **kw):
|
||||
class _Ctx:
|
||||
name = "X"; summary = "Y"
|
||||
related_edges = [{"fact": "very long fact " * 200}]
|
||||
return _Ctx()
|
||||
prov = ZepMemoryProvider(entity_reader=_R(), graph_id="g1",
|
||||
agent_to_entity={5: "uuid-five"})
|
||||
d = prov.get_digest(5, max_chars=300)
|
||||
assert d.available is True
|
||||
assert len(d.text) <= 300
|
||||
|
||||
|
||||
def test_agent_to_entity_from_reddit_json(tmp_path):
|
||||
"""C5: ``FileSystemPersonaProvider.agent_to_entity()`` must reconstruct the
|
||||
``{agent_id: zep_entity_uuid}`` map from a reddit_profiles.json that
|
||||
includes ``source_entity_uuid``.
|
||||
"""
|
||||
data = [
|
||||
{"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
|
||||
"persona": "p", "profession": "fisher",
|
||||
"source_entity_uuid": "uuid-zero"},
|
||||
{"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
|
||||
"persona": "p", "profession": "ngo_staff",
|
||||
"source_entity_uuid": "uuid-one"},
|
||||
# Row with no uuid must be skipped.
|
||||
{"user_id": 2, "user_name": "gov1", "name": "Gov Agent",
|
||||
"persona": "p", "profession": "official"},
|
||||
]
|
||||
p = tmp_path / "reddit_profiles.json"
|
||||
p.write_text(json.dumps(data), encoding="utf-8")
|
||||
|
||||
provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
|
||||
mapping = provider.agent_to_entity()
|
||||
|
||||
assert mapping == {0: "uuid-zero", 1: "uuid-one"}
|
||||
# Map values are strings, keys are ints.
|
||||
for k, v in mapping.items():
|
||||
assert isinstance(k, int)
|
||||
assert isinstance(v, str)
|
||||
|
||||
|
||||
def test_agent_to_entity_empty_when_no_field(tmp_path):
|
||||
"""C5: if no row has ``source_entity_uuid``, return an empty dict — not
|
||||
a crash, not partial garbage."""
|
||||
data = [{"user_id": 0, "user_name": "u", "name": "A", "persona": "p"}]
|
||||
p = tmp_path / "reddit_profiles.json"
|
||||
p.write_text(json.dumps(data), encoding="utf-8")
|
||||
provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
|
||||
assert provider.agent_to_entity() == {}
|
||||
|
||||
|
||||
def test_agent_to_entity_falls_back_to_twitter_csv(tmp_path):
|
||||
"""C5: when only twitter_profiles.csv exists, the helper must still
|
||||
extract uuids from the CSV's ``source_entity_uuid`` column.
|
||||
"""
|
||||
p = tmp_path / "twitter_profiles.csv"
|
||||
with p.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
|
||||
writer.writerow([0, "A0", "u0", "char", "desc", "uuid-zero"])
|
||||
writer.writerow([1, "A1", "u1", "char", "desc", ""]) # skipped (blank uuid)
|
||||
writer.writerow([2, "A2", "u2", "char", "desc", "uuid-two"])
|
||||
|
||||
provider = FileSystemPersonaProvider(reddit_path=None, twitter_path=p)
|
||||
assert provider.agent_to_entity() == {0: "uuid-zero", 2: "uuid-two"}
|
||||
|
||||
|
||||
def test_agent_to_entity_reddit_takes_precedence(tmp_path):
|
||||
"""C5: when both files exist, Reddit JSON wins; Twitter CSV only fills
|
||||
agents not already mapped."""
|
||||
reddit = tmp_path / "reddit_profiles.json"
|
||||
reddit.write_text(json.dumps([
|
||||
{"user_id": 0, "user_name": "u0", "name": "A0", "persona": "p",
|
||||
"source_entity_uuid": "reddit-zero"},
|
||||
]), encoding="utf-8")
|
||||
|
||||
twitter = tmp_path / "twitter_profiles.csv"
|
||||
with twitter.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
|
||||
writer.writerow([0, "A0", "u0", "char", "desc", "twitter-zero"]) # ignored
|
||||
writer.writerow([1, "A1", "u1", "char", "desc", "twitter-one"]) # used
|
||||
|
||||
provider = FileSystemPersonaProvider(reddit_path=reddit, twitter_path=twitter)
|
||||
assert provider.agent_to_entity() == {0: "reddit-zero", 1: "twitter-one"}
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
@pytest.fixture
|
||||
def client(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
|
||||
from app.config import Config
|
||||
Config.LLM_STUB_MODE = True
|
||||
Config.UPLOADS_DIR = str(tmp_path)
|
||||
# Seed a minimal reddit_profiles.json
|
||||
sim_dir = tmp_path / "simulations" / "sim_test"
|
||||
sim_dir.mkdir(parents=True)
|
||||
profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
|
||||
"persona": "p", "profession": "fisher"} for i in range(3)]
|
||||
(sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
|
||||
from flask import Flask
|
||||
from app.api import register_blueprints
|
||||
app = Flask(__name__)
|
||||
register_blueprints(app)
|
||||
return app.test_client()
|
||||
|
||||
def test_post_pre_returns_task_id(client):
|
||||
res = client.post("/api/interview/sim_test/pre")
|
||||
assert res.status_code == 200
|
||||
body = res.get_json()
|
||||
assert body["success"] is True
|
||||
assert "task_id" in body["data"]
|
||||
|
||||
def test_status_endpoint_returns_progress(client):
|
||||
res = client.post("/api/interview/sim_test/pre")
|
||||
task_id = res.get_json()["data"]["task_id"]
|
||||
res2 = client.get(f"/api/interview/sim_test/status?task_id={task_id}")
|
||||
assert res2.status_code == 200
|
||||
assert "status" in res2.get_json()["data"]
|
||||
|
||||
def test_unknown_subagent_returns_400(client):
|
||||
res = client.post("/api/interview/sim_test/rerun",
|
||||
json={"subagent": "nonsense"})
|
||||
assert res.status_code == 400
|
||||
|
||||
|
||||
def test_build_orchestrator_reads_graph_id_from_state(tmp_path, monkeypatch):
|
||||
"""C1+C2: ``_build_orchestrator`` must resolve the Zep graph_id from
|
||||
``state.json`` (written by ``SimulationManager``), not from the
|
||||
nonexistent ``graph_id.txt``. The graph_id then must reach the
|
||||
``InterviewZepWriter`` instead of being silently swallowed.
|
||||
"""
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
|
||||
monkeypatch.setenv("ZEP_API_KEY", "test-fake-key")
|
||||
from app.config import Config
|
||||
Config.LLM_STUB_MODE = True
|
||||
Config.UPLOADS_DIR = str(tmp_path)
|
||||
Config.ZEP_API_KEY = "test-fake-key"
|
||||
|
||||
# SimulationManager's data dir is class-level — point it at tmp_path.
|
||||
from app.services.simulation_manager import SimulationManager
|
||||
sim_root = tmp_path / "simulations"
|
||||
sim_root.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
|
||||
|
||||
sim_id = "sim_graphid"
|
||||
sim_dir = sim_root / sim_id
|
||||
sim_dir.mkdir(parents=True)
|
||||
# Seed a profile file so FileSystemPersonaProvider can work.
|
||||
(sim_dir / "reddit_profiles.json").write_text(
|
||||
json.dumps([
|
||||
{"user_id": 0, "user_name": "u0", "name": "A0",
|
||||
"persona": "p", "profession": "fisher",
|
||||
"source_entity_uuid": "uuid-zero"},
|
||||
{"user_id": 1, "user_name": "u1", "name": "A1",
|
||||
"persona": "p", "profession": "fisher",
|
||||
"source_entity_uuid": "uuid-one"},
|
||||
]),
|
||||
encoding="utf-8",
|
||||
)
|
||||
# Seed state.json with the graph_id.
|
||||
state_doc = {
|
||||
"simulation_id": sim_id,
|
||||
"project_id": "p",
|
||||
"graph_id": "graph-from-state",
|
||||
"status": "ready",
|
||||
"enable_twitter": False,
|
||||
"enable_reddit": True,
|
||||
}
|
||||
(sim_dir / "state.json").write_text(json.dumps(state_doc), encoding="utf-8")
|
||||
|
||||
# Patch ZepGraphMemoryUpdater + ZepEntityReader so we don't hit the network.
|
||||
import app.services.zep_graph_memory_updater as zgmu
|
||||
import app.services.zep_entity_reader as zer
|
||||
|
||||
class _FakeUpdater:
|
||||
def __init__(self, graph_id, api_key=None):
|
||||
self.graph_id = graph_id
|
||||
|
||||
def add_text_episode(self, graph_id, text):
|
||||
return None
|
||||
|
||||
class _FakeReader:
|
||||
def __init__(self, api_key=None):
|
||||
pass
|
||||
|
||||
def get_entity_with_context(self, graph_id, entity_uuid):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(zgmu, "ZepGraphMemoryUpdater", _FakeUpdater)
|
||||
monkeypatch.setattr(zer, "ZepEntityReader", _FakeReader)
|
||||
|
||||
from app.api.interview import _build_orchestrator
|
||||
|
||||
orch = _build_orchestrator(sim_id)
|
||||
assert orch.zep_writer.graph_id == "graph-from-state"
|
||||
# Updater on the writer must be the real (or fake) ZepGraphMemoryUpdater path,
|
||||
# NOT the null updater — i.e. its graph_id must match.
|
||||
assert getattr(orch.zep_writer.updater, "graph_id", None) == "graph-from-state"
|
||||
|
||||
# ZepMemoryProvider must have received the agent_to_entity map (C5).
|
||||
assert hasattr(orch.memory, "map")
|
||||
assert orch.memory.map == {0: "uuid-zero", 1: "uuid-one"}
|
||||
|
||||
|
||||
def test_build_orchestrator_falls_back_when_state_missing(tmp_path, monkeypatch):
|
||||
"""C1+C2: when ``state.json`` is missing, the orchestrator must still be
|
||||
constructed with the null updater/memory path (not crash, not silently
|
||||
pass a bare ``ZepGraphMemoryUpdater()`` that would error out).
|
||||
"""
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
|
||||
from app.config import Config
|
||||
Config.LLM_STUB_MODE = True
|
||||
Config.UPLOADS_DIR = str(tmp_path)
|
||||
|
||||
from app.services.simulation_manager import SimulationManager
|
||||
sim_root = tmp_path / "simulations"
|
||||
sim_root.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
|
||||
|
||||
sim_id = "sim_no_state"
|
||||
sim_dir = sim_root / sim_id
|
||||
sim_dir.mkdir(parents=True)
|
||||
(sim_dir / "reddit_profiles.json").write_text(
|
||||
json.dumps([{"user_id": 0, "user_name": "u0", "name": "A0",
|
||||
"persona": "p", "profession": "fisher"}]),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
from app.api.interview import _build_orchestrator
|
||||
|
||||
orch = _build_orchestrator(sim_id)
|
||||
assert orch.zep_writer.graph_id == ""
|
||||
# Null updater path: writer must still respond to _emit without raising.
|
||||
orch.zep_writer._emit("hello")
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
import json
|
||||
import pytest
|
||||
from app.services.interviews.base import (
|
||||
StakeholderInterviewer, MemoryDigest, PersonaRecord, SchemaValidationFailure,
|
||||
coerce_int,
|
||||
)
|
||||
|
||||
|
||||
def test_coerce_int_accepts_real_int():
|
||||
assert coerce_int(3) == 3
|
||||
assert coerce_int(-2) == -2
|
||||
assert coerce_int(0) == 0
|
||||
|
||||
|
||||
def test_coerce_int_accepts_numeric_strings():
|
||||
assert coerce_int("3") == 3
|
||||
assert coerce_int(" 4 ") == 4
|
||||
assert coerce_int("-2") == -2
|
||||
|
||||
|
||||
def test_coerce_int_rejects_non_numeric():
|
||||
assert coerce_int("3.5") is None
|
||||
assert coerce_int("abc") is None
|
||||
assert coerce_int(None) is None
|
||||
assert coerce_int([3]) is None
|
||||
assert coerce_int(3.5) is None
|
||||
|
||||
|
||||
def test_coerce_int_rejects_bool():
|
||||
"""True/False should NOT silently coerce to 1/0 even though Python says they're ints."""
|
||||
assert coerce_int(True) is None
|
||||
assert coerce_int(False) is None
|
||||
|
||||
|
||||
class _FakeLLM:
|
||||
def __init__(self, responses):
|
||||
self.responses = list(responses)
|
||||
self.calls = []
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
self.calls.append(messages)
|
||||
return self.responses.pop(0)
|
||||
|
||||
class _FakeMemory:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text=f"digest-for-{agent_id}", available=True)
|
||||
|
||||
def test_in_character_prompt_includes_persona_and_memory():
|
||||
llm = _FakeLLM([{"x": 1}])
|
||||
mem = _FakeMemory()
|
||||
interviewer = StakeholderInterviewer(llm=llm, memory=mem)
|
||||
persona = PersonaRecord(agent_id=7, name="A", persona="I am a small-scale Baltic fisher.")
|
||||
out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="{...}")
|
||||
assert out == {"x": 1}
|
||||
sys_msg = llm.calls[0][0]["content"]
|
||||
assert "small-scale Baltic fisher" in sys_msg
|
||||
assert "digest-for-7" in sys_msg
|
||||
|
||||
def test_schema_retry_on_first_failure():
|
||||
bad_then_good = [{}, {"responses": {"a": 3}}]
|
||||
llm = _FakeLLM(bad_then_good)
|
||||
mem = _FakeMemory()
|
||||
interviewer = StakeholderInterviewer(llm=llm, memory=mem)
|
||||
def validator(d):
|
||||
return d if "responses" in d else None
|
||||
persona = PersonaRecord(agent_id=1, name="A", persona="p")
|
||||
out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x", validate=validator)
|
||||
assert out == {"responses": {"a": 3}}
|
||||
assert len(llm.calls) == 2
|
||||
|
||||
def test_two_failures_raise():
|
||||
llm = _FakeLLM([{}, {}])
|
||||
mem = _FakeMemory()
|
||||
interviewer = StakeholderInterviewer(llm=llm, memory=mem)
|
||||
persona = PersonaRecord(agent_id=1, name="A", persona="p")
|
||||
with pytest.raises(ValueError):
|
||||
interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x",
|
||||
validate=lambda d: d if "responses" in d else None)
|
||||
|
||||
|
||||
def test_schema_failure_captures_both_raw_attempts():
|
||||
bad1 = {"oops": "no responses key"}
|
||||
bad2 = {"still": "wrong shape"}
|
||||
llm = _FakeLLM([bad1, bad2])
|
||||
mem = _FakeMemory()
|
||||
interviewer = StakeholderInterviewer(llm=llm, memory=mem)
|
||||
persona = PersonaRecord(agent_id=42, name="A", persona="p")
|
||||
with pytest.raises(SchemaValidationFailure) as exc_info:
|
||||
interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x",
|
||||
validate=lambda d: d if "responses" in d else None)
|
||||
err = exc_info.value
|
||||
assert err.agent_id == 42
|
||||
assert len(err.attempts) == 2
|
||||
assert err.attempts[0]["raw"] == bad1
|
||||
assert err.attempts[1]["raw"] == bad2
|
||||
assert err.attempts[0]["attempt"] == 1
|
||||
assert err.attempts[1]["attempt"] == 2
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
from pathlib import Path
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.delphi import (
|
||||
DelphiSubagent, extract_themes, convergence_metrics,
|
||||
)
|
||||
|
||||
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "delphi_v1.yaml"
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _R1LLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"answers": {
|
||||
"q1": "Klimawandel, Quoten, Generationswechsel",
|
||||
"q2": "MSC, Aquakultur",
|
||||
"q3": "Russland, EU-Politik",
|
||||
"q4": "Verbraucherpreise",
|
||||
}}
|
||||
|
||||
class _R2LLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"ratings": {f"theme_{i}": {"importance": 4, "plausibility": 3} for i in range(5)}}
|
||||
|
||||
class _ExtractLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"themes": [
|
||||
{"theme_id": "theme_0", "label": "Klimawandel"},
|
||||
{"theme_id": "theme_1", "label": "Quoten"},
|
||||
{"theme_id": "theme_2", "label": "MSC"},
|
||||
{"theme_id": "theme_3", "label": "EU-Politik"},
|
||||
{"theme_id": "theme_4", "label": "Generationswechsel"},
|
||||
]}
|
||||
|
||||
def test_delphi_round1_open():
|
||||
sub = DelphiSubagent(llm=_R1LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
|
||||
persona = PersonaRecord(agent_id=2, name="A", persona="p")
|
||||
resp = sub.administer_round1(persona)
|
||||
assert resp.round == 1
|
||||
assert len(resp.answers) == 4
|
||||
|
||||
def test_extract_themes_aggregates():
|
||||
from app.models.interview import DelphiOpenResponse
|
||||
r1 = [DelphiOpenResponse(agent_id=i, answers={"q1": "Klimawandel", "q2": "MSC"}) for i in range(3)]
|
||||
themes = extract_themes(r1, llm=_ExtractLLM())
|
||||
assert len(themes) == 5
|
||||
assert all("theme_id" in t for t in themes)
|
||||
|
||||
def test_convergence_metrics():
|
||||
from app.models.interview import DelphiRatingResponse
|
||||
r2 = [DelphiRatingResponse(agent_id=i, round=2,
|
||||
ratings={"t1": {"importance": 3, "plausibility": 3}}) for i in range(5)]
|
||||
r3 = [DelphiRatingResponse(agent_id=i, round=3,
|
||||
ratings={"t1": {"importance": 4, "plausibility": 4}}) for i in range(5)]
|
||||
conv = convergence_metrics(r2, r3)
|
||||
assert "t1" in conv
|
||||
assert conv["t1"]["delta_iqr_importance"] is not None
|
||||
|
||||
|
||||
def test_delphi_r2_accepts_string_ratings():
|
||||
"""Delphi R2/R3 ratings should accept stringified importance/plausibility ints."""
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.delphi import DelphiSubagent
|
||||
from pathlib import Path as _P
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _StringLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"ratings": {
|
||||
"t1": {"importance": "4", "plausibility": "3"},
|
||||
"t2": {"importance": "5", "plausibility": "2"},
|
||||
}}
|
||||
|
||||
inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "delphi_v1.yaml"
|
||||
sub = DelphiSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
|
||||
persona = PersonaRecord(agent_id=1, name="A", persona="p")
|
||||
themes = [{"theme_id": "t1", "label": "T1"}, {"theme_id": "t2", "label": "T2"}]
|
||||
resp = sub.administer_round2(persona, themes)
|
||||
assert resp.ratings["t1"]["importance"] == 4
|
||||
assert isinstance(resp.ratings["t1"]["importance"], int)
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
from pathlib import Path
|
||||
import numpy as np
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.diversity import (
|
||||
DiversitySubagent, run_typology,
|
||||
)
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _CannedLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
# Place all 24 statements into legal buckets per the forced distribution
|
||||
placements = {}
|
||||
buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
|
||||
for i in range(24):
|
||||
placements[f"st_{i+1:02d}"] = buckets[i]
|
||||
return {
|
||||
"placements": placements,
|
||||
"likert_axes": {"ax_pres_extr": 5, "ax_loc_eu": 3, "ax_sci_trad": 4,
|
||||
"ax_ind_col": 4, "ax_short_long": 5, "ax_mkt_reg": 3},
|
||||
}
|
||||
|
||||
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "diversity_v1.yaml"
|
||||
|
||||
def test_diversity_administer():
|
||||
sub = DiversitySubagent(llm=_CannedLLM(), memory=_Mem(), instrument_path=INSTRUMENT)
|
||||
persona = PersonaRecord(agent_id=1, name="A", persona="p")
|
||||
resp = sub.administer(persona)
|
||||
assert len(resp.placements) == 24
|
||||
assert set(resp.likert_axes.keys()) == {
|
||||
"ax_pres_extr","ax_loc_eu","ax_sci_trad","ax_ind_col","ax_short_long","ax_mkt_reg"
|
||||
}
|
||||
|
||||
def test_typology_runs_pca_kmeans():
|
||||
from app.models.interview import QSortResponse
|
||||
rng = np.random.default_rng(42)
|
||||
responses = []
|
||||
for aid in range(20):
|
||||
placements = {f"st_{i+1:02d}": int(rng.integers(-3, 4)) for i in range(24)}
|
||||
axes = {f"ax_{j}": int(rng.integers(1, 8)) for j in range(6)}
|
||||
responses.append(QSortResponse(agent_id=aid, placements=placements, likert_axes=axes))
|
||||
result = run_typology(responses, n_clusters=3)
|
||||
assert "clusters" in result
|
||||
assert len(result["clusters"]) == 3
|
||||
assert "pca" in result
|
||||
assert len(result["pca"]["components"]) >= 2
|
||||
|
||||
|
||||
def test_diversity_accepts_string_likert_values():
|
||||
"""Diversity placements + axes should accept stringified ints."""
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.diversity import DiversitySubagent
|
||||
from pathlib import Path as _P
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
|
||||
|
||||
class _StringLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {
|
||||
"placements": {f"st_{i+1:02d}": str(buckets[i]) for i in range(24)},
|
||||
"likert_axes": {a: "4" for a in (
|
||||
"ax_pres_extr","ax_loc_eu","ax_sci_trad",
|
||||
"ax_ind_col","ax_short_long","ax_mkt_reg")},
|
||||
}
|
||||
|
||||
inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "diversity_v1.yaml"
|
||||
sub = DiversitySubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
|
||||
persona = PersonaRecord(agent_id=7, name="A", persona="p")
|
||||
resp = sub.administer(persona)
|
||||
assert isinstance(resp.placements["st_01"], int)
|
||||
assert isinstance(resp.likert_axes["ax_pres_extr"], int)
|
||||
assert resp.likert_axes["ax_pres_extr"] == 4
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
import pytest
|
||||
from app.services.interviews.instrument_loader import (
|
||||
load_likert_instrument, InstrumentValidationError,
|
||||
)
|
||||
|
||||
def _write(tmp_path, text):
|
||||
p = tmp_path / "inst.yaml"
|
||||
p.write_text(text, encoding="utf-8")
|
||||
return p
|
||||
|
||||
def test_loads_valid_likert(tmp_path):
|
||||
p = _write(tmp_path, """
|
||||
name: longitudinal_v1
|
||||
version: "1.0"
|
||||
language_default: de
|
||||
items:
|
||||
- item_id: stk_1
|
||||
de: "Der westliche Dorschbestand wird sich erholen"
|
||||
en: "Western cod stock will recover"
|
||||
scale: 5
|
||||
family: stocks
|
||||
""")
|
||||
inst = load_likert_instrument(p)
|
||||
assert inst.name == "longitudinal_v1"
|
||||
assert len(inst.items) == 1
|
||||
|
||||
def test_rejects_duplicate_item_id(tmp_path):
|
||||
p = _write(tmp_path, """
|
||||
name: x
|
||||
items:
|
||||
- {item_id: a, de: d, en: e, scale: 5}
|
||||
- {item_id: a, de: d, en: e, scale: 5}
|
||||
""")
|
||||
with pytest.raises(InstrumentValidationError):
|
||||
load_likert_instrument(p)
|
||||
|
||||
def test_rejects_missing_required_field(tmp_path):
|
||||
p = _write(tmp_path, """
|
||||
name: x
|
||||
items:
|
||||
- {item_id: a, de: d, scale: 5}
|
||||
""")
|
||||
with pytest.raises(InstrumentValidationError):
|
||||
load_likert_instrument(p)
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
"""
|
||||
Tests for interview lifecycle hook installer (Task 20).
|
||||
"""
|
||||
|
||||
from app.services.interviews.lifecycle import install_hooks
|
||||
|
||||
|
||||
class _StubMgr:
|
||||
def __init__(self):
|
||||
self.ready = []
|
||||
self.completed = []
|
||||
|
||||
def register_on_ready(self, fn):
|
||||
self.ready.append(fn)
|
||||
|
||||
def register_on_completed(self, fn):
|
||||
self.completed.append(fn)
|
||||
|
||||
|
||||
def test_install_hooks_registers_two_callables():
|
||||
mgr = _StubMgr()
|
||||
install_hooks(mgr)
|
||||
assert len(mgr.ready) == 1
|
||||
assert len(mgr.completed) == 1
|
||||
assert callable(mgr.ready[0])
|
||||
assert callable(mgr.completed[0])
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import json
|
||||
from app.utils.llm_client import LLMClient
|
||||
|
||||
|
||||
def test_stub_mode_returns_deterministic_canned_json(monkeypatch):
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
from app.config import Config
|
||||
Config.LLM_STUB_MODE = True
|
||||
client = LLMClient(api_key="x", base_url="x", model="x")
|
||||
messages = [
|
||||
{"role": "system", "content": "You are persona_42. Return JSON."},
|
||||
{"role": "user", "content": "stub_key=longitudinal:item_001"},
|
||||
]
|
||||
out1 = client.chat_json(messages=messages, temperature=0.0)
|
||||
out2 = client.chat_json(messages=messages, temperature=0.0)
|
||||
assert out1 == out2
|
||||
assert isinstance(out1, dict)
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
from pathlib import Path
|
||||
import pytest
|
||||
from app.models.interview import InterviewPhase
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate
|
||||
|
||||
|
||||
class _FakeMem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
|
||||
class _CannedLLM:
|
||||
def __init__(self): self.n = 0
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
self.n += 1
|
||||
return {
|
||||
"responses": {
|
||||
"stk_1": 4, "stk_2": 3, "stk_3": 5,
|
||||
"gov_1": 3, "gov_2": 4, "gov_3": 2,
|
||||
"mkt_1": 5, "mkt_2": 3, "mkt_3": 4,
|
||||
"clm_1": 2, "clm_2": 4, "clm_3": 5,
|
||||
},
|
||||
"confidence": {
|
||||
"stk_1": 0.8, "stk_2": 0.7, "stk_3": 0.9,
|
||||
"gov_1": 0.6, "gov_2": 0.7, "gov_3": 0.5,
|
||||
"mkt_1": 0.7, "mkt_2": 0.6, "mkt_3": 0.8,
|
||||
"clm_1": 0.5, "clm_2": 0.7, "clm_3": 0.6,
|
||||
},
|
||||
"open_comment": "test",
|
||||
}
|
||||
|
||||
|
||||
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
|
||||
|
||||
|
||||
def test_longitudinal_administer_one_agent():
|
||||
sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT)
|
||||
persona = PersonaRecord(agent_id=3, name="A", persona="p")
|
||||
resp = sub.administer(persona, phase=InterviewPhase.T0)
|
||||
assert resp.agent_id == 3
|
||||
assert resp.phase == InterviewPhase.T0
|
||||
assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"}
|
||||
|
||||
|
||||
def test_longitudinal_aggregate_delta():
|
||||
from app.models.interview import LikertResponse
|
||||
t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0,
|
||||
responses={"stk_1": 3, "gov_1": 4},
|
||||
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
|
||||
t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1,
|
||||
responses={"stk_1": 4, "gov_1": 4},
|
||||
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
|
||||
agg = run_aggregate(t0, t1)
|
||||
assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0
|
||||
assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0
|
||||
assert agg["n_paired"] == 5
|
||||
|
||||
|
||||
def test_longitudinal_accepts_string_likert_values():
|
||||
"""Real LLMs sometimes return Likert values as JSON strings ('3' not 3).
|
||||
The validator should coerce them rather than fail the agent."""
|
||||
from app.models.interview import InterviewPhase
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.longitudinal import LongitudinalSubagent
|
||||
from pathlib import Path as _P
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _StringLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {
|
||||
"responses": { # all strings, not ints
|
||||
"stk_1": "4", "stk_2": "3", "stk_3": "5",
|
||||
"gov_1": "3", "gov_2": "4", "gov_3": "2",
|
||||
"mkt_1": "5", "mkt_2": "3", "mkt_3": "4",
|
||||
"clm_1": "2", "clm_2": "4", "clm_3": "5",
|
||||
},
|
||||
"confidence": {},
|
||||
"open_comment": "stringified",
|
||||
}
|
||||
|
||||
inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
|
||||
sub = LongitudinalSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
|
||||
persona = PersonaRecord(agent_id=99, name="A", persona="p")
|
||||
resp = sub.administer(persona, phase=InterviewPhase.T0)
|
||||
assert resp.agent_id == 99
|
||||
assert resp.responses["stk_1"] == 4
|
||||
assert isinstance(resp.responses["stk_1"], int)
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import pytest
|
||||
from pydantic import ValidationError
|
||||
from app.models.interview import (
|
||||
LikertItem, LikertInstrument, LikertResponse,
|
||||
InterviewPhase, SubagentKind,
|
||||
)
|
||||
|
||||
def test_likert_item_requires_de_and_en():
|
||||
item = LikertItem(item_id="x1", de="Frage", en="Question", scale=5)
|
||||
assert item.scale == 5
|
||||
|
||||
def test_likert_item_rejects_bad_scale():
|
||||
with pytest.raises(ValidationError):
|
||||
LikertItem(item_id="x1", de="d", en="e", scale=2)
|
||||
|
||||
def test_likert_instrument_unique_item_ids():
|
||||
with pytest.raises(ValidationError):
|
||||
LikertInstrument(
|
||||
name="t",
|
||||
items=[LikertItem(item_id="a", de="d", en="e", scale=5),
|
||||
LikertItem(item_id="a", de="d", en="e", scale=5)],
|
||||
)
|
||||
|
||||
def test_likert_response_validates_scale_range():
|
||||
with pytest.raises(ValidationError):
|
||||
LikertResponse(agent_id=1, phase=InterviewPhase.T0,
|
||||
responses={"a": 6}, confidence={"a": 0.5})
|
||||
|
||||
def test_subagent_kind_enum():
|
||||
assert SubagentKind.LONGITUDINAL.value == "longitudinal"
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
from pathlib import Path
|
||||
import pytest
|
||||
from app.models.interview import InterviewPhase, SubagentKind
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interview_orchestrator import (
|
||||
InterviewOrchestrator, PersonaProvider,
|
||||
)
|
||||
|
||||
INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _LLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
sys_text = next((m["content"] for m in messages if m["role"] == "system"), "")
|
||||
if "longitudinal" in sys_text or "stk_" in (messages[-1].get("content") or ""):
|
||||
return {
|
||||
"responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
|
||||
"mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
|
||||
"confidence": {}, "open_comment": "ok",
|
||||
}
|
||||
return {}
|
||||
|
||||
class _Personas(PersonaProvider):
|
||||
def __init__(self, n=3):
|
||||
self._items = [PersonaRecord(agent_id=i, name=f"A{i}", persona="p") for i in range(n)]
|
||||
def all(self): return list(self._items)
|
||||
|
||||
class _NoopZep:
|
||||
def write_per_agent(self, *a, **kw): pass
|
||||
def write_aggregate(self, *a, **kw): pass
|
||||
|
||||
def test_pre_phase_runs_longitudinal_only(tmp_path):
|
||||
orch = InterviewOrchestrator(
|
||||
llm=_LLM(), memory=_Mem(), personas=_Personas(3),
|
||||
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim1",
|
||||
zep_writer=_NoopZep(), max_workers=2,
|
||||
)
|
||||
result = orch.run_pre()
|
||||
assert result["longitudinal"]["n_responded"] == 3
|
||||
assert "diversity" not in result # only longitudinal in pre-phase
|
||||
|
||||
def test_partial_failure_does_not_kill_run(tmp_path):
|
||||
class _FlakyLLM:
|
||||
def __init__(self): self.n = 0
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
self.n += 1
|
||||
if self.n % 2 == 0:
|
||||
raise RuntimeError("simulated LLM 5xx")
|
||||
return {
|
||||
"responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
|
||||
"mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
|
||||
"confidence": {}, "open_comment": "ok",
|
||||
}
|
||||
orch = InterviewOrchestrator(
|
||||
llm=_FlakyLLM(), memory=_Mem(), personas=_Personas(4),
|
||||
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim2",
|
||||
zep_writer=_NoopZep(), max_workers=1,
|
||||
)
|
||||
result = orch.run_pre()
|
||||
assert result["longitudinal"]["n_responded"] < 4
|
||||
assert result["longitudinal"]["n_failed"] > 0
|
||||
|
||||
|
||||
def test_schema_failure_audit_captures_raw_llm_output(tmp_path):
|
||||
"""When an agent's LLM output fails the schema validator twice, the audit log
|
||||
should preserve both raw outputs so we can debug what the model actually said."""
|
||||
bad_response = {"wrong": "shape, no responses key"}
|
||||
class _BadLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return bad_response # always fails Longitudinal validator
|
||||
orch = InterviewOrchestrator(
|
||||
llm=_BadLLM(), memory=_Mem(), personas=_Personas(1),
|
||||
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim3",
|
||||
zep_writer=_NoopZep(), max_workers=1,
|
||||
)
|
||||
result = orch.run_pre()
|
||||
assert result["longitudinal"]["n_responded"] == 0
|
||||
assert result["longitudinal"]["n_failed"] == 1
|
||||
|
||||
import json as _j
|
||||
run_dir = Path(result["longitudinal"]["run_dir"])
|
||||
audit_path = run_dir / "audit.jsonl"
|
||||
lines = audit_path.read_text(encoding="utf-8").splitlines()
|
||||
assert lines, "audit.jsonl should not be empty"
|
||||
entry = _j.loads(lines[0])
|
||||
assert entry["event"] == "schema_validation_failure"
|
||||
assert entry["agent_id"] == 0
|
||||
detail = entry["detail"]
|
||||
assert detail["label"] == "longitudinal_T0"
|
||||
assert len(detail["attempts"]) == 2
|
||||
assert detail["attempts"][0]["raw"] == bad_response
|
||||
assert detail["attempts"][1]["raw"] == bad_response
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
from pathlib import Path
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
|
||||
|
||||
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _LLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"ratings": {sid: {
|
||||
"desirability": 4, "plausibility": 3, "impact_on_my_group": 5, "fairness": 3,
|
||||
"if_woke_up_response": f"act-on-{sid}",
|
||||
} for sid in ("S1", "S2", "S3", "S4")}}
|
||||
|
||||
def test_scenario_administer():
|
||||
sub = ScenarioSubagent(llm=_LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
|
||||
persona = PersonaRecord(agent_id=1, name="A", persona="p")
|
||||
resp = sub.administer(persona)
|
||||
assert set(resp.ratings.keys()) == {"S1", "S2", "S3", "S4"}
|
||||
assert resp.ratings["S1"].desirability == 4
|
||||
|
||||
def test_polarity_matrix():
|
||||
from app.models.interview import ScenarioResponse, ScenarioRating
|
||||
responses = [ScenarioResponse(agent_id=i, ratings={
|
||||
"S1": ScenarioRating(desirability=5, plausibility=4, impact_on_my_group=5, fairness=4,
|
||||
if_woke_up_response="x"),
|
||||
}) for i in range(3)]
|
||||
m = polarity_matrix(responses)
|
||||
assert "S1" in m
|
||||
assert m["S1"]["mean_desirability"] == 5
|
||||
assert m["S1"]["n"] == 3
|
||||
|
||||
|
||||
def test_scenario_accepts_string_likert_values():
|
||||
"""Scenario ratings should accept stringified ints across all 4 dimensions."""
|
||||
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||
from app.services.interviews.scenario import ScenarioSubagent
|
||||
from pathlib import Path as _P
|
||||
|
||||
class _Mem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text="x", available=True)
|
||||
|
||||
class _StringLLM:
|
||||
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||
return {"ratings": {sid: {
|
||||
"desirability": "4", "plausibility": "3",
|
||||
"impact_on_my_group": "5", "fairness": "3",
|
||||
"if_woke_up_response": f"act-{sid}",
|
||||
} for sid in ("S1","S2","S3","S4")}}
|
||||
|
||||
inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
|
||||
sub = ScenarioSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
|
||||
persona = PersonaRecord(agent_id=3, name="A", persona="p")
|
||||
resp = sub.administer(persona)
|
||||
assert resp.ratings["S1"].desirability == 4
|
||||
assert isinstance(resp.ratings["S1"].desirability, int)
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
"""
|
||||
Tests for SimulationManager lifecycle hooks (on_ready / on_completed).
|
||||
|
||||
NOTE ON SHAPE DIVERGENCE vs. original plan spec:
|
||||
- SimulationState uses `simulation_id` (not `sim_id`)
|
||||
- `status` is a SimulationStatus enum, not a plain string
|
||||
- The COMPLETED transition lives in simulation_runner.py (SimulationRunner._monitor_simulation),
|
||||
not in simulation_manager.py. The _notify_on_completed hook is registered on SimulationManager
|
||||
and the production insertion point for COMPLETED is documented in DONE_WITH_CONCERNS.
|
||||
|
||||
Hooks are stored on the class (C3 fix), so each test snapshots/restores the
|
||||
registries via the autouse fixture to keep test isolation.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from app.services.simulation_manager import SimulationManager, SimulationState, SimulationStatus
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_class_hooks():
|
||||
saved_ready = list(SimulationManager._on_ready_hooks)
|
||||
saved_completed = list(SimulationManager._on_completed_hooks)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
SimulationManager._on_ready_hooks[:] = saved_ready
|
||||
SimulationManager._on_completed_hooks[:] = saved_completed
|
||||
|
||||
|
||||
def test_register_post_ready_hook_invoked():
|
||||
called = []
|
||||
mgr = SimulationManager()
|
||||
mgr.register_on_ready(lambda state: called.append(("ready", state.simulation_id)))
|
||||
state = SimulationState(
|
||||
simulation_id="abc",
|
||||
project_id="proj1",
|
||||
graph_id="graph1",
|
||||
status=SimulationStatus.READY,
|
||||
)
|
||||
mgr._notify_on_ready(state)
|
||||
assert called == [("ready", "abc")]
|
||||
|
||||
|
||||
def test_register_post_completed_hook_invoked():
|
||||
called = []
|
||||
mgr = SimulationManager()
|
||||
mgr.register_on_completed(lambda state: called.append(("done", state.simulation_id)))
|
||||
state = SimulationState(
|
||||
simulation_id="abc",
|
||||
project_id="proj1",
|
||||
graph_id="graph1",
|
||||
status=SimulationStatus.COMPLETED,
|
||||
)
|
||||
mgr._notify_on_completed(state)
|
||||
assert called == [("done", "abc")]
|
||||
|
||||
|
||||
def test_hooks_survive_across_instances():
|
||||
"""C3: hook registries are class-level, so callbacks registered through the
|
||||
classmethod must still fire on a freshly constructed instance. This is
|
||||
what makes the Flask per-request ``SimulationManager()`` pattern work
|
||||
after ``install_hooks(SimulationManager)`` runs at app startup.
|
||||
"""
|
||||
called: list[str] = []
|
||||
|
||||
# Register via the class — the production install_hooks(cls) path.
|
||||
SimulationManager.register_on_ready(lambda s: called.append(f"ready:{s.simulation_id}"))
|
||||
SimulationManager.register_on_completed(lambda s: called.append(f"done:{s.simulation_id}"))
|
||||
|
||||
# New, independently-constructed instance must still see the hooks.
|
||||
fresh = SimulationManager()
|
||||
state = SimulationState(
|
||||
simulation_id="cross_instance",
|
||||
project_id="p",
|
||||
graph_id="g",
|
||||
status=SimulationStatus.READY,
|
||||
)
|
||||
fresh._notify_on_ready(state)
|
||||
state.status = SimulationStatus.COMPLETED
|
||||
fresh._notify_on_completed(state)
|
||||
|
||||
assert "ready:cross_instance" in called
|
||||
assert "done:cross_instance" in called
|
||||
|
||||
|
||||
def test_register_via_instance_also_lands_on_class():
|
||||
"""Registering through an instance must populate the class registry too —
|
||||
backward-compatibility with code that calls ``manager.register_on_*``.
|
||||
"""
|
||||
mgr1 = SimulationManager()
|
||||
mgr1.register_on_ready(lambda s: None)
|
||||
# A second, unrelated instance must see the hook.
|
||||
mgr2 = SimulationManager()
|
||||
assert len(SimulationManager._on_ready_hooks) >= 1
|
||||
assert SimulationManager._on_ready_hooks is mgr2.__class__._on_ready_hooks
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from app.models.interview import (
|
||||
LikertResponse, InterviewPhase, SubagentKind,
|
||||
)
|
||||
from app.services.interviews.storage import InterviewStore
|
||||
|
||||
def test_run_directory_layout(tmp_path):
|
||||
store = InterviewStore(root=tmp_path, sim_id="sim42")
|
||||
run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
|
||||
assert run_dir.exists()
|
||||
assert run_dir.parent.name == "longitudinal"
|
||||
assert run_dir.parent.parent.name == "T0"
|
||||
|
||||
def test_append_response(tmp_path):
|
||||
store = InterviewStore(root=tmp_path, sim_id="sim42")
|
||||
run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
|
||||
r = LikertResponse(agent_id=1, phase=InterviewPhase.T0,
|
||||
responses={"a": 3}, confidence={"a": 0.5})
|
||||
store.append_response(run_dir, r)
|
||||
contents = (run_dir / "responses.jsonl").read_text()
|
||||
assert json.loads(contents.splitlines()[0])["agent_id"] == 1
|
||||
|
||||
def test_write_aggregate_and_latest_pointer(tmp_path):
|
||||
store = InterviewStore(root=tmp_path, sim_id="sim42")
|
||||
run_dir = store.start_run(phase=InterviewPhase.T1, subagent=SubagentKind.SCENARIO)
|
||||
store.write_aggregate(run_dir, {"k": 1})
|
||||
store.mark_latest(run_dir)
|
||||
latest = (run_dir.parent / "latest.json").read_text()
|
||||
assert json.loads(latest)["run_dir"].endswith(run_dir.name)
|
||||
|
||||
def test_audit_log_append(tmp_path):
|
||||
store = InterviewStore(root=tmp_path, sim_id="sim42")
|
||||
run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.DELPHI)
|
||||
store.audit(run_dir, agent_id=7, event="schema_violation", detail="missing key x")
|
||||
audit = (run_dir / "audit.jsonl").read_text()
|
||||
assert "schema_violation" in audit
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from app.services.interviews.storage import InterviewStore
|
||||
from app.models.interview import InterviewPhase, SubagentKind, LikertResponse
|
||||
from app.services.interview_synthesizer import InterviewSynthesizer
|
||||
|
||||
def _seed_minimal(tmp_path: Path) -> InterviewStore:
|
||||
store = InterviewStore(root=tmp_path, sim_id="s1")
|
||||
rd = store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
|
||||
for i in range(3):
|
||||
store.append_response(rd, LikertResponse(
|
||||
agent_id=i, phase=InterviewPhase.T0,
|
||||
responses={"stk_1": 3, "gov_1": 3}, confidence={"stk_1": 0.5, "gov_1": 0.5},
|
||||
))
|
||||
store.write_aggregate(rd, {"per_item": {}, "n_paired": 0})
|
||||
store.mark_latest(rd)
|
||||
return store
|
||||
|
||||
def test_synthesizer_runs_with_partial_data(tmp_path):
|
||||
store = _seed_minimal(tmp_path)
|
||||
synth = InterviewSynthesizer(store=store)
|
||||
report = synth.run()
|
||||
assert "limitations" in report.lower()
|
||||
assert "stub mode" in report.lower() or "n_responded" in report.lower()
|
||||
|
||||
def test_synthesizer_writes_files(tmp_path):
|
||||
store = _seed_minimal(tmp_path)
|
||||
synth = InterviewSynthesizer(store=store)
|
||||
synth.run()
|
||||
files = list((store.base / "synthesis").iterdir())
|
||||
names = {f.name for f in files}
|
||||
assert "report.md" in names
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
import pytest
|
||||
|
||||
from app.models.interview import (
|
||||
LikertResponse, InterviewPhase, SubagentKind,
|
||||
)
|
||||
from app.services.interviews.zep_writer import InterviewZepWriter
|
||||
|
||||
|
||||
class _FakeMemoryUpdater:
|
||||
"""Fake mirroring the real ZepGraphMemoryUpdater contract.
|
||||
|
||||
Post-C4 the writer only uses ``add_text_episode(graph_id, text)`` —
|
||||
``add_activity`` is deliberately omitted to lock in the new behaviour and
|
||||
catch any regression that re-introduces the broken dict-based fallback.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.events: list[dict] = []
|
||||
|
||||
def add_text_episode(self, graph_id, text):
|
||||
self.events.append({"graph_id": graph_id, "text": text})
|
||||
|
||||
|
||||
def test_per_agent_episode_text():
|
||||
upd = _FakeMemoryUpdater()
|
||||
w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
|
||||
r = LikertResponse(agent_id=42, phase=InterviewPhase.T1,
|
||||
responses={"stk_1": 4, "gov_1": 3},
|
||||
confidence={"stk_1": 0.8, "gov_1": 0.7})
|
||||
w.write_per_agent(SubagentKind.LONGITUDINAL, r, agent_name="Fischer Müller")
|
||||
assert any("Fischer Müller" in str(e) for e in upd.events)
|
||||
assert any("longitudinal/T1" in str(e) for e in upd.events)
|
||||
# Each event must carry the configured graph_id.
|
||||
assert all(e["graph_id"] == "g1" for e in upd.events)
|
||||
|
||||
|
||||
def test_aggregate_episode():
|
||||
upd = _FakeMemoryUpdater()
|
||||
w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
|
||||
w.write_aggregate(SubagentKind.SCENARIO, summary="S1 mean desirability 5.2; S2 mean 2.1")
|
||||
assert any("S1 mean" in str(e) for e in upd.events)
|
||||
|
||||
|
||||
def test_emit_uses_add_text_episode_with_graph_id():
|
||||
"""C4: ``_emit`` must call ``updater.add_text_episode(graph_id, text)``
|
||||
with the constructor's graph_id and the raw text — no dict shape, no
|
||||
``add_activity`` fallback (the real ``add_activity`` rejects dicts).
|
||||
"""
|
||||
upd = _FakeMemoryUpdater()
|
||||
w = InterviewZepWriter(memory_updater=upd, graph_id="g_xyz")
|
||||
w._emit("hello world")
|
||||
assert upd.events == [{"graph_id": "g_xyz", "text": "hello world"}]
|
||||
|
||||
|
||||
def test_emit_raises_when_updater_lacks_add_text_episode():
|
||||
"""C4: a memory_updater without ``add_text_episode`` must surface a
|
||||
RuntimeError rather than silently no-op via a broken ``add_activity``
|
||||
fallback.
|
||||
"""
|
||||
|
||||
class _Broken:
|
||||
def add_activity(self, activity): # pragma: no cover - kept for clarity
|
||||
raise AssertionError("must not be called")
|
||||
|
||||
w = InterviewZepWriter(memory_updater=_Broken(), graph_id="g1")
|
||||
with pytest.raises(RuntimeError, match="add_text_episode"):
|
||||
w._emit("x")
|
||||
|
||||
|
||||
def test_real_updater_exposes_add_text_episode():
|
||||
"""C4 sanity check: ZepGraphMemoryUpdater (the real class) must expose
|
||||
``add_text_episode`` so the production wiring works without falling
|
||||
through to the broken ``add_activity(dict)`` path.
|
||||
"""
|
||||
from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
|
||||
|
||||
assert hasattr(ZepGraphMemoryUpdater, "add_text_episode")
|
||||
|
|
@ -994,10 +994,15 @@ dependencies = [
|
|||
{ name = "charset-normalizer" },
|
||||
{ name = "flask" },
|
||||
{ name = "flask-cors" },
|
||||
{ name = "numpy" },
|
||||
{ name = "openai" },
|
||||
{ name = "pandas" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pymupdf" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "pyyaml" },
|
||||
{ name = "scikit-learn" },
|
||||
{ name = "scipy" },
|
||||
{ name = "zep-cloud" },
|
||||
]
|
||||
|
||||
|
|
@ -1022,13 +1027,18 @@ requires-dist = [
|
|||
{ name = "charset-normalizer", specifier = ">=3.0.0" },
|
||||
{ name = "flask", specifier = ">=3.0.0" },
|
||||
{ name = "flask-cors", specifier = ">=6.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.26" },
|
||||
{ name = "openai", specifier = ">=1.0.0" },
|
||||
{ name = "pandas", specifier = ">=2.1" },
|
||||
{ name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" },
|
||||
{ name = "pydantic", specifier = ">=2.0.0" },
|
||||
{ name = "pymupdf", specifier = ">=1.24.0" },
|
||||
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
|
||||
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
||||
{ name = "pyyaml", specifier = ">=6.0" },
|
||||
{ name = "scikit-learn", specifier = ">=1.4" },
|
||||
{ name = "scipy", specifier = ">=1.12" },
|
||||
{ name = "zep-cloud", specifier = "==3.13.0" },
|
||||
]
|
||||
provides-extras = ["dev"]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,280 @@
|
|||
# Stakeholder Interview Subagents — Design Spec
|
||||
|
||||
- **Date:** 2026-05-23
|
||||
- **Project:** MiroFish (multi-agent simulation engine for German fisheries discourse)
|
||||
- **Author:** Christian Möllmann (with Claude Code)
|
||||
- **Status:** Approved design — pending implementation plan
|
||||
|
||||
## 1. Purpose
|
||||
|
||||
After the OASIS Twitter + Reddit simulation produces a population of in-character stakeholder agents (fishers, NGOs, policy actors, scientists, consumers, etc.) grounded in a German fisheries discourse knowledge graph, we want to interrogate each agent individually with a structured questionnaire about the future of German fisheries.
|
||||
|
||||
Four methodologies run as independent subagents over the same agent population:
|
||||
|
||||
1. **Longitudinal** — pre/post Likert to measure opinion drift induced by simulated peer interaction
|
||||
2. **Diversity** — Q-sort + multi-dim Likert to map the value space and derive a stakeholder typology
|
||||
3. **Delphi** — three-round consensus probing to identify where stakeholder views converge vs. stay polarised
|
||||
4. **Scenario** — rating of 4 pre-defined 2040 scenarios on desirability, plausibility, group-impact, fairness
|
||||
|
||||
A synthesiser combines the four outputs into a single cross-method report.
|
||||
|
||||
## 2. Non-goals (v1)
|
||||
|
||||
- Real-time WebSocket streaming of interview progress (polling suffices)
|
||||
- Adaptive instruments / IRT calibration
|
||||
- Web UI for editing instruments (YAML + restart is fine)
|
||||
- Cross-simulation comparison endpoints (CSV exports support this externally)
|
||||
- Multi-language support beyond DE / EN
|
||||
|
||||
## 3. Architectural approach
|
||||
|
||||
**Chosen approach: Deterministic instrument runners.** Each subagent is a fixed protocol, not a ReACT loop. Rationale: fisheries futures methodology favours instrument fidelity (every stakeholder sees the same scale) over agent autonomy; results must be directly tabularisable for downstream analysis in pandas/R.
|
||||
|
||||
Rejected:
|
||||
- *ReACT-style subagents* — non-deterministic, ~3–10× cost, can't guarantee every agent answered every item
|
||||
- *Single InterviewService with mode enum* — couples four distinct methodologies (especially multi-round Delphi and two-phase Longitudinal) into one growing class
|
||||
|
||||
## 4. System architecture
|
||||
|
||||
```
|
||||
InterviewOrchestrator
|
||||
│
|
||||
┌──────────────┬───────┴───────┬──────────────┐
|
||||
▼ ▼ ▼ ▼
|
||||
Longitudinal Diversity Delphi Scenario
|
||||
Subagent Subagent Subagent Subagent
|
||||
│ │ │ │
|
||||
└──────────────┴──────┬────────┴──────────────┘
|
||||
▼
|
||||
StakeholderInterviewer (base)
|
||||
│
|
||||
┌─────────────────┼─────────────────┐
|
||||
▼ ▼ ▼
|
||||
LLMClient ZepEntityReader ProfileLoader
|
||||
(in-character) (memory digest) (reddit/twitter)
|
||||
│
|
||||
▼
|
||||
uploads/.../interviews/ + Zep episodes
|
||||
```
|
||||
|
||||
### 4.1 New files
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `backend/app/services/interviews/base.py` | `StakeholderInterviewer` — persona+memory loading, in-character prompting, retry/validation |
|
||||
| `backend/app/services/interviews/longitudinal.py` | Pre/post Likert |
|
||||
| `backend/app/services/interviews/diversity.py` | Q-sort + multi-dim value-space mapping |
|
||||
| `backend/app/services/interviews/delphi.py` | Three-round consensus |
|
||||
| `backend/app/services/interviews/scenario.py` | Scenario rating |
|
||||
| `backend/app/services/interview_orchestrator.py` | Fan-out, parallel execution, two-phase lifecycle |
|
||||
| `backend/app/services/interview_synthesizer.py` | Cross-method narrative report |
|
||||
| `backend/app/api/interview.py` | New Flask blueprint `/api/interview/*` |
|
||||
| `backend/app/models/interview.py` | Pydantic schemas for instruments + responses |
|
||||
| `backend/scripts/instruments/*.yaml` | Editable instrument definitions (one YAML per subagent) |
|
||||
| `frontend/src/components/Step4bInterviews.vue` | Four tabs + synthesis tab |
|
||||
| `backend/tests/interviews/` | Unit tests per subagent + base + orchestrator + synthesiser |
|
||||
| `tests/integration/test_interview_pipeline.py` | End-to-end with stub LLM + disposable Zep graph |
|
||||
|
||||
### 4.2 Lifecycle integration
|
||||
|
||||
Two hooks added to `backend/app/services/simulation_manager.py`:
|
||||
|
||||
- `on_ready()` — automatically triggers Longitudinal T0 (pre-simulation baseline)
|
||||
- `on_completed()` — queues a `task_id` running Longitudinal T1 + Diversity + Delphi + Scenario in parallel, then Synthesiser
|
||||
|
||||
The two-phase split is **non-negotiable**: Longitudinal needs T0 captured before OASIS exposes agents to peer-generated content, otherwise drift is unmeasurable.
|
||||
|
||||
## 5. Instrument design
|
||||
|
||||
All instruments live in `backend/scripts/instruments/*.yaml` so content is editable without redeploying. Items default to German, translatable via existing locale system.
|
||||
|
||||
### 5.1 Longitudinal — opinion drift
|
||||
|
||||
- 12–15 item 5-point Likert ("lehne stark ab" → "stimme stark zu")
|
||||
- Administered at T0 (post-persona, pre-OASIS) and T1 (post-OASIS)
|
||||
- Item families (3–4 each): stock status & recovery; governance & CFP; market & MSC; climate & adaptation
|
||||
- Per-agent output: response value + LLM self-reported confidence per item + one open comment
|
||||
- Aggregate: Δ-matrix (N × M items), per-item Wilcoxon signed-rank, per-agent total drift magnitude
|
||||
|
||||
### 5.2 Diversity — typology mapping
|
||||
|
||||
- One-shot, post-simulation only
|
||||
- **Part A (Q-sort lite):** 24 statements sorted onto forced quasi-normal distribution from −3 to +3
|
||||
- **Part B:** 6 multi-dim Likert axes (preservation↔extraction, local↔EU, science-led↔tradition-led, individual↔collective, short-term↔long-term, market↔regulation)
|
||||
- Per-agent output: vector ∈ ℝ^30
|
||||
- Aggregate: PCA + k-means → 3–5 stakeholder clusters with archetype descriptions + cluster-membership probabilities
|
||||
|
||||
### 5.3 Delphi — consensus probing
|
||||
|
||||
- Three rounds, fully automated
|
||||
- **R1 (open):** 4 open questions; LLM extracts thematic codes from responses
|
||||
- **R2 (rate):** Agent sees anonymised list of all unique themes; rates each on importance (1–5) + plausibility (1–5)
|
||||
- **R3 (revise):** Agent sees group median + IQR per theme; can revise own ratings; free-text justification
|
||||
- Aggregate: per-theme convergence (Δ-IQR R2→R3), persistent disagreements (IQR > 2), ranked consensus statements
|
||||
|
||||
### 5.4 Scenario — futures evaluation
|
||||
|
||||
Four 2040 scenarios (YAML-editable):
|
||||
|
||||
- **S1 "Erholung"** — cod and herring recover, MSC ubiquitous, small-scale fleet stabilises
|
||||
- **S2 "Kollaps"** — both stocks collapse, fleet halved, aquaculture dominant
|
||||
- **S3 "Festung Europa"** — protectionist EU policy, MPAs cover 30%, recreational fishing curtailed
|
||||
- **S4 "Privatisierung"** — ITQs, consolidation, large operators only
|
||||
|
||||
Each agent rates each scenario on 4 dimensions (1–7 Likert): desirability, plausibility, impact-on-my-group, fairness. Plus one open question per scenario: "If you woke up in this 2040, what would you do?"
|
||||
|
||||
Aggregate: 4 × 4 per-agent matrix + open-text corpus → polarity charts (desirability × plausibility by stakeholder type), narrative themes.
|
||||
|
||||
### 5.5 Cross-cutting
|
||||
|
||||
**In-character prompting.** Every LLM call uses a system prompt of the form:
|
||||
|
||||
> You are [persona_text]. You are answering a survey about the future of German fisheries. Answer strictly in character based on your background, values, and what you experienced during the simulated social media discourse summarised below: [Zep memory digest]. Return JSON only.
|
||||
|
||||
Memory digest comes from `ZepEntityReader.get_entity_with_context()`.
|
||||
|
||||
**Structured output enforced.** Every response goes through `LLMClient.chat_json()` with a per-instrument JSON schema. One auto-retry on schema violation; agent flagged in audit log on second failure.
|
||||
|
||||
**Cost guardrails.** Longitudinal × 2 phases + Delphi × 3 rounds is heaviest. For N=50 agents and ~100 LLM calls per agent across all 4 subagents, budget ~5k calls / 5–10M tokens per simulation. Persona system prompts stay constant within a subagent run → cacheable.
|
||||
|
||||
## 6. Data flow and storage
|
||||
|
||||
### 6.1 Storage layout
|
||||
|
||||
```
|
||||
uploads/simulations/{sim_id}/interviews/
|
||||
├── instruments_used.json # frozen snapshot of YAML at run-time
|
||||
├── T0/
|
||||
│ └── longitudinal/
|
||||
│ ├── responses.jsonl
|
||||
│ ├── audit.jsonl # raw LLM I/O, retries, validation failures
|
||||
│ └── aggregate.json
|
||||
├── T1/
|
||||
│ ├── longitudinal/{same structure}
|
||||
│ ├── diversity/
|
||||
│ │ ├── responses.jsonl
|
||||
│ │ ├── typology.json
|
||||
│ │ └── pca.json
|
||||
│ ├── delphi/
|
||||
│ │ ├── round1_themes.jsonl
|
||||
│ │ ├── round2_ratings.jsonl
|
||||
│ │ ├── round3_revisions.jsonl
|
||||
│ │ └── convergence.json
|
||||
│ └── scenario/
|
||||
│ ├── responses.jsonl
|
||||
│ └── polarity_matrix.json
|
||||
└── synthesis/
|
||||
├── report.md
|
||||
└── exports/
|
||||
├── all_responses.csv # tidy long format
|
||||
└── codebook.json
|
||||
```
|
||||
|
||||
JSONL for raw responses (append-safe, streams cleanly); JSON for aggregates; CSV for analysis hand-off. `instruments_used.json` snapshot is critical for reproducibility when YAML is later edited.
|
||||
|
||||
### 6.2 Zep integration
|
||||
|
||||
Two write patterns, both reusing `ZepGraphMemoryUpdater.add_activity()`:
|
||||
|
||||
- **Per-agent episode** — after each subagent finishes for an agent, write one episode: `"Agent {name} (interview/{subagent}/{phase}): {short summary of stance}"`. The existing ReportAgent can retrieve interview content via its current `panorama_search` / `insight_forge` tools without changes.
|
||||
- **Aggregate episodes** — after each subagent's aggregate step, write one summary episode per cluster / theme / scenario.
|
||||
|
||||
No new Zep schemas. No new entity types. Interviews are just more episodes — append-only, safe.
|
||||
|
||||
### 6.3 API surface
|
||||
|
||||
New blueprint `/api/interview`:
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|---|---|---|
|
||||
| `POST` | `/api/interview/{sim_id}/pre` | Trigger T0 longitudinal (auto on READY, manual for re-runs) |
|
||||
| `POST` | `/api/interview/{sim_id}/post` | Trigger all 4 post-sim subagents; returns `task_id` |
|
||||
| `GET` | `/api/interview/{sim_id}/status?task_id=...` | Per-subagent progress |
|
||||
| `GET` | `/api/interview/{sim_id}/results/{subagent}` | Aggregate JSON for one subagent |
|
||||
| `GET` | `/api/interview/{sim_id}/results/synthesis` | Full synthesis report |
|
||||
| `GET` | `/api/interview/{sim_id}/export.csv` | Tidy long-format CSV across all 4 subagents |
|
||||
| `POST` | `/api/interview/{sim_id}/rerun` | Re-run one subagent (e.g. after editing YAML) |
|
||||
|
||||
All responses follow the existing `{success, data, error}` envelope. Polling reuses `models/task.py`.
|
||||
|
||||
### 6.4 Parallelism
|
||||
|
||||
- Within a subagent: `ThreadPoolExecutor(max_workers=8)` for per-agent LLM calls
|
||||
- Across the 4 post-sim subagents: parallel, except Delphi (sequential rounds internally)
|
||||
- Synthesiser waits for all four
|
||||
- Token budget guard: `Config.INTERVIEW_MAX_TOKENS_PER_RUN`; if projected cost exceeds, API returns 400 with dry-run estimate and `confirm=true` override
|
||||
|
||||
### 6.5 Frontend
|
||||
|
||||
New `Step4bInterviews.vue` between current Step4 (report) and Step5 (interaction). Four tabs (one per subagent) + a synthesis tab. Each tab shows progress bar during run, then results: Likert heatmap (longitudinal Δ), PCA scatter (diversity), convergence chart (Delphi), polarity quadrants (scenario). Download button per tab pulls the CSV export.
|
||||
|
||||
## 7. Error handling
|
||||
|
||||
**Per-agent failures are isolated.** If agent 17 times out or fails JSON validation twice, agent 17 is marked `failed` in `audit.jsonl`; the rest of the run continues. Aggregates report `n_responded` / `n_total` honestly.
|
||||
|
||||
| Failure | Handling |
|
||||
|---|---|
|
||||
| LLM timeout / 5xx | Exponential-backoff retry (3 attempts) via existing `LLMClient`; then mark agent failed |
|
||||
| JSON schema violation | One auto-retry with explicit corrective instruction; then mark failed |
|
||||
| Likert out-of-range / missing items | Re-ask only the bad items; if still bad, item-level missing |
|
||||
| Zep memory fetch fails | Run without memory digest; flag in audit (`memory_available: false`); down-weight in drift analysis |
|
||||
| Whole-subagent crash | Other 3 continue; synthesiser runs on what completed and flags the gap |
|
||||
| Token budget exceeded | Pause, write partial results, return 503 with `resume_token` |
|
||||
|
||||
**Idempotency.** Every subagent run is keyed by `(sim_id, subagent, phase, run_id)`. Re-runs write a new `run_id` directory; never overwrite. A `latest.json` pointer tracks the canonical run.
|
||||
|
||||
## 8. Validation
|
||||
|
||||
Three layers:
|
||||
|
||||
1. **Schema validation** — pydantic models for every response; JSONL files validated on write
|
||||
2. **Instrument validation** — `validate_instrument(yaml)` pre-flight: required fields, scale coherence, no duplicate item_ids, DE+EN both present if i18n enabled
|
||||
3. **Plausibility checks** on aggregates (flag, don't kill):
|
||||
- Longitudinal: >80% zero drift on every item OR >80% flip — likely a prompting bug or acquiescence bias
|
||||
- Diversity: first two PCA components explain <30% of variance — instrument not discriminating
|
||||
- Delphi: R3 ratings identical to R2 for >90% of agents — no engagement with anonymised feedback
|
||||
- Scenario: all agents rate all scenarios identically on `desirability` — instrument failure
|
||||
|
||||
Flags surface in the synthesis report under "instrument health" so the user can decide whether data is publishable.
|
||||
|
||||
## 9. Testing
|
||||
|
||||
**Unit tests** (`backend/tests/interviews/`):
|
||||
|
||||
- `test_instruments.py` — every YAML parses and validates
|
||||
- `test_base_interviewer.py` — persona+memory loading, in-character prompt construction, schema-retry logic (mock `LLMClient`)
|
||||
- One file per subagent — happy path + each failure mode in §7
|
||||
- `test_orchestrator.py` — fan-out, partial failures, two-phase ordering (T0 before T1)
|
||||
- `test_synthesizer.py` — missing-subagent handling, stable output shape
|
||||
|
||||
**Integration test** (`tests/integration/test_interview_pipeline.py`):
|
||||
|
||||
End-to-end with N=5 agents against a recorded LLM cassette. Verifies T0 at READY, T1 + 3 others at COMPLETED, CSV export well-formed, Zep episodes written.
|
||||
|
||||
**Stub LLM mode** (`Config.LLM_STUB_MODE=true`) returns deterministic canned responses keyed by `(subagent, item_id, persona_hash)`. Full pipeline exercisable in CI for free.
|
||||
|
||||
**Zep**: disposable graph in integration tests (consistent with project conventions); unit tests stub.
|
||||
|
||||
## 10. Methodological caveats (auto-emitted in synthesis)
|
||||
|
||||
The synthesiser **always** emits a "Limitations" section, programmatically generated from run metadata:
|
||||
|
||||
- **Simulated, not real stakeholders.** Responses reflect how the seed-document discourse + LLM jointly encode each stakeholder type, not what actual fishers / NGO staff would say. The instrument measures the *model of the stakeholder*, not the stakeholder.
|
||||
- **Memory digest is lossy.** Each agent's "experience" of OASIS is summarised to bounded length; agents do not have full episodic recall.
|
||||
- **LLM acquiescence and centrality bias.** Likert with LLM respondents skews toward 3–4 of 5; per-item distribution shape statistics are reported.
|
||||
- **N is what it is.** `n_total` and `n_responded` printed verbatim; no rounding, no smoothing.
|
||||
- **Instrument provenance.** Hash of `instruments_used.json` printed so future-you can rebuild the exact instrument.
|
||||
|
||||
This section is load-bearing for any publication: it makes the system intellectually defensible rather than a black box.
|
||||
|
||||
## 11. Defaulted decisions (revisit later if needed)
|
||||
|
||||
- **N agents:** assumed 50, driven from existing simulation config; if you typically run more/fewer, cost guardrail threshold needs adjusting
|
||||
- **Default instrument language:** German with English fallback in YAML
|
||||
- **Delphi rounds = 3:** classic Delphi can run more; 3 is the methodological floor and the cost ceiling here
|
||||
|
||||
## 12. Open questions for implementation phase
|
||||
|
||||
- Whether to write a separate `instruments_changelog.md` per run, or embed change tracking in `instruments_used.json` metadata
|
||||
- Whether the synthesiser should write into Zep as a single mega-episode or stay file-only (current design: file-only, plus the per-agent + per-aggregate episodes from each subagent)
|
||||
- Whether `Step4bInterviews.vue` should sit strictly after Step4 (current design) or render in parallel — interviews depend on the simulation having reached `completed` (Step3 output) and on the `graph_id` (created in Step1); they do not depend on Step4's ReportAgent run, so a parallel layout is technically possible
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
import service from './index'
|
||||
|
||||
export async function startPre(simId) {
|
||||
const r = await service.post(`/api/interview/${simId}/pre`)
|
||||
return r
|
||||
}
|
||||
export async function startPost(simId) {
|
||||
const r = await service.post(`/api/interview/${simId}/post`)
|
||||
return r
|
||||
}
|
||||
export async function rerun(simId, subagent) {
|
||||
const r = await service.post(`/api/interview/${simId}/rerun`, { subagent })
|
||||
return r
|
||||
}
|
||||
export async function getStatus(simId, taskId) {
|
||||
const r = await service.get(`/api/interview/${simId}/status`, { params: { task_id: taskId } })
|
||||
return r
|
||||
}
|
||||
export async function getResults(simId, subagent) {
|
||||
const r = await service.get(`/api/interview/${simId}/results/${subagent}`)
|
||||
return r
|
||||
}
|
||||
export async function getSynthesis(simId) {
|
||||
const r = await service.get(`/api/interview/${simId}/results/synthesis`)
|
||||
return r
|
||||
}
|
||||
export function exportCsvUrl(simId) {
|
||||
return `/api/interview/${simId}/export.csv`
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
<template>
|
||||
<section class="step4b">
|
||||
<header>
|
||||
<h2>{{ t('interview.title') }}</h2>
|
||||
<p class="subtitle">{{ t('interview.subtitle') }}</p>
|
||||
</header>
|
||||
|
||||
<div class="actions">
|
||||
<button :disabled="busy" @click="startPostRun">{{ t('interview.runAll') }}</button>
|
||||
<a :href="csvUrl" target="_blank" rel="noopener">{{ t('interview.downloadCsv') }}</a>
|
||||
</div>
|
||||
|
||||
<nav class="tabs">
|
||||
<button v-for="tab in tabs" :key="tab.id"
|
||||
:class="{ active: active === tab.id }"
|
||||
@click="active = tab.id">
|
||||
{{ tab.label }}
|
||||
</button>
|
||||
</nav>
|
||||
|
||||
<component :is="currentPanel" :sim-id="simId" :status="status" />
|
||||
</section>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { computed, ref } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import LongitudinalPanel from './interviews/LongitudinalPanel.vue'
|
||||
import DiversityPanel from './interviews/DiversityPanel.vue'
|
||||
import DelphiPanel from './interviews/DelphiPanel.vue'
|
||||
import ScenarioPanel from './interviews/ScenarioPanel.vue'
|
||||
import SynthesisPanel from './interviews/SynthesisPanel.vue'
|
||||
import { startPost, getStatus, exportCsvUrl } from '../api/interview'
|
||||
|
||||
const props = defineProps({ simId: { type: String, required: true } })
|
||||
const { t } = useI18n()
|
||||
const tabs = [
|
||||
{ id: 'longitudinal', label: t('interview.tab.longitudinal') },
|
||||
{ id: 'diversity', label: t('interview.tab.diversity') },
|
||||
{ id: 'delphi', label: t('interview.tab.delphi') },
|
||||
{ id: 'scenario', label: t('interview.tab.scenario') },
|
||||
{ id: 'synthesis', label: t('interview.tab.synthesis') },
|
||||
]
|
||||
const active = ref('longitudinal')
|
||||
const status = ref({ status: 'idle' })
|
||||
const busy = ref(false)
|
||||
const csvUrl = computed(() => exportCsvUrl(props.simId))
|
||||
|
||||
const panels = {
|
||||
longitudinal: LongitudinalPanel, diversity: DiversityPanel,
|
||||
delphi: DelphiPanel, scenario: ScenarioPanel, synthesis: SynthesisPanel,
|
||||
}
|
||||
const currentPanel = computed(() => panels[active.value])
|
||||
|
||||
async function startPostRun() {
|
||||
busy.value = true
|
||||
try {
|
||||
const res = await startPost(props.simId)
|
||||
if (!res.success) throw new Error(res.error || 'failed to start')
|
||||
await poll(res.data.task_id)
|
||||
} finally { busy.value = false }
|
||||
}
|
||||
|
||||
async function poll(taskId) {
|
||||
while (true) {
|
||||
const r = await getStatus(props.simId, taskId)
|
||||
status.value = r.data
|
||||
if (['completed', 'failed'].includes(r.data.status)) break
|
||||
await new Promise(resolve => setTimeout(resolve, 1500))
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.step4b { padding: 1rem; }
|
||||
.tabs { display: flex; gap: .5rem; margin: 1rem 0; }
|
||||
.tabs button.active { font-weight: 700; border-bottom: 2px solid #333; }
|
||||
.actions { display: flex; gap: 1rem; align-items: center; }
|
||||
</style>
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
<template>
|
||||
<div class="panel">
|
||||
<h3>Delphi convergence (R1→R3)</h3>
|
||||
<div v-if="loading">Loading…</div>
|
||||
<div v-else-if="error">{{ error }}</div>
|
||||
<svg v-else ref="chart" :width="width" :height="height"></svg>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onMounted, ref, watch } from 'vue'
|
||||
import * as d3 from 'd3'
|
||||
import { getResults } from '../../api/interview'
|
||||
|
||||
const props = defineProps({ simId: String, status: Object })
|
||||
const chart = ref(null); const loading = ref(true); const error = ref(null)
|
||||
const width = 640, height = 420
|
||||
|
||||
watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
|
||||
onMounted(load)
|
||||
|
||||
async function load() {
|
||||
loading.value = true; error.value = null
|
||||
try {
|
||||
// service interceptor returns the envelope {success, data, error} directly
|
||||
const r = await getResults(props.simId, 'delphi')
|
||||
if (!r.success) { error.value = r.error; return }
|
||||
draw(r.data.aggregate)
|
||||
} catch (e) { error.value = String(e) } finally { loading.value = false }
|
||||
}
|
||||
|
||||
function draw(agg) {
|
||||
const themes = agg.themes || []
|
||||
if (!themes.length) return
|
||||
const svg = d3.select(chart.value); svg.selectAll('*').remove()
|
||||
const margin = { top: 20, right: 20, bottom: 80, left: 60 }
|
||||
const w = width - margin.left - margin.right
|
||||
const h = height - margin.top - margin.bottom
|
||||
const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
|
||||
const x = d3.scaleBand().domain(themes.map(t => t.theme_id)).range([0, w]).padding(0.15)
|
||||
const y = d3.scaleLinear().domain([0, agg.n_r1 || 1]).range([h, 0])
|
||||
const bars = themes.map((t) => ({
|
||||
theme: t.theme_id, label: t.label,
|
||||
nr1: agg.n_r1, nr2: agg.n_r2, nr3: agg.n_r3,
|
||||
}))
|
||||
g.selectAll('rect').data(bars).enter().append('rect')
|
||||
.attr('x', d => x(d.theme)).attr('y', d => y(d.nr3))
|
||||
.attr('width', x.bandwidth()).attr('height', d => h - y(d.nr3))
|
||||
.attr('fill', d3.schemeCategory10[2])
|
||||
g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
|
||||
.selectAll('text').attr('transform', 'rotate(-30)').attr('text-anchor', 'end')
|
||||
g.append('g').call(d3.axisLeft(y))
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.panel { padding: .5rem; }
|
||||
</style>
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
<template>
|
||||
<div class="panel">
|
||||
<h3>Stakeholder typology (PCA)</h3>
|
||||
<div v-if="loading">Loading…</div>
|
||||
<div v-else-if="error">{{ error }}</div>
|
||||
<svg v-else ref="chart" :width="width" :height="height"></svg>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onMounted, ref, watch } from 'vue'
|
||||
import * as d3 from 'd3'
|
||||
import { getResults } from '../../api/interview'
|
||||
|
||||
const props = defineProps({ simId: String, status: Object })
|
||||
const chart = ref(null); const loading = ref(true); const error = ref(null)
|
||||
const width = 640, height = 480
|
||||
|
||||
watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
|
||||
onMounted(load)
|
||||
|
||||
async function load() {
|
||||
loading.value = true; error.value = null
|
||||
try {
|
||||
// service interceptor returns the envelope {success, data, error} directly
|
||||
const r = await getResults(props.simId, 'diversity')
|
||||
if (!r.success) { error.value = r.error; return }
|
||||
draw(r.data.aggregate)
|
||||
} catch (e) { error.value = String(e) } finally { loading.value = false }
|
||||
}
|
||||
|
||||
function draw(agg) {
|
||||
// The /results endpoint returns aggregate.json which contains clusters + agent_ids.
|
||||
// For v1 use clusters only, distributing them across a notional 2D layout per cluster.
|
||||
const clusters = agg.clusters || []
|
||||
if (!clusters.length) return
|
||||
const svg = d3.select(chart.value); svg.selectAll('*').remove()
|
||||
const margin = { top: 20, right: 20, bottom: 30, left: 30 }
|
||||
const w = width - margin.left - margin.right
|
||||
const h = height - margin.top - margin.bottom
|
||||
const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
|
||||
const points = []
|
||||
clusters.forEach((c, i) => {
|
||||
(c.agent_ids || []).forEach((aid, k) => {
|
||||
const angle = (i / clusters.length) * 2 * Math.PI
|
||||
const radius = (k % 5 + 1) * 0.15 + 0.2
|
||||
points.push({ x: 0.5 + Math.cos(angle) * radius, y: 0.5 + Math.sin(angle) * radius,
|
||||
cluster: c.cluster_id, agent_id: aid })
|
||||
})
|
||||
})
|
||||
const x = d3.scaleLinear().domain([0, 1]).range([0, w])
|
||||
const y = d3.scaleLinear().domain([0, 1]).range([h, 0])
|
||||
const color = d3.scaleOrdinal(d3.schemeCategory10)
|
||||
g.selectAll('circle').data(points).enter().append('circle')
|
||||
.attr('cx', d => x(d.x)).attr('cy', d => y(d.y)).attr('r', 5)
|
||||
.attr('fill', d => color(d.cluster)).attr('opacity', .7)
|
||||
.append('title').text(d => `agent ${d.agent_id} · cluster ${d.cluster}`)
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.panel { padding: .5rem; }
|
||||
</style>
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
<template>
|
||||
<div class="panel">
|
||||
<h3>Longitudinal Δ (T0 → T1)</h3>
|
||||
<div v-if="loading">Loading…</div>
|
||||
<div v-else-if="error">{{ error }}</div>
|
||||
<svg v-else ref="chart" :width="width" :height="height"></svg>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onMounted, ref, watch } from 'vue'
|
||||
import * as d3 from 'd3'
|
||||
import { getResults } from '../../api/interview'
|
||||
|
||||
const props = defineProps({ simId: String, status: Object })
|
||||
const chart = ref(null)
|
||||
const loading = ref(true)
|
||||
const error = ref(null)
|
||||
const width = 640
|
||||
const height = 360
|
||||
|
||||
watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
|
||||
onMounted(load)
|
||||
|
||||
async function load() {
|
||||
loading.value = true; error.value = null
|
||||
try {
|
||||
// service interceptor returns the envelope {success, data, error} directly
|
||||
const r = await getResults(props.simId, 'longitudinal')
|
||||
if (!r.success) { error.value = r.error; return }
|
||||
draw(r.data.aggregate)
|
||||
} catch (e) { error.value = String(e) }
|
||||
finally { loading.value = false }
|
||||
}
|
||||
|
||||
function draw(agg) {
|
||||
const items = Object.entries(agg.per_item || {})
|
||||
if (items.length === 0) return
|
||||
const svg = d3.select(chart.value)
|
||||
svg.selectAll('*').remove()
|
||||
const margin = { top: 20, right: 20, bottom: 60, left: 80 }
|
||||
const w = width - margin.left - margin.right
|
||||
const h = height - margin.top - margin.bottom
|
||||
const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
|
||||
const x = d3.scaleBand().domain(items.map(([k]) => k)).range([0, w]).padding(0.1)
|
||||
const y = d3.scaleLinear().domain([-4, 4]).range([h, 0])
|
||||
const color = d3.scaleDiverging(d3.interpolateRdBu).domain([-4, 0, 4])
|
||||
g.selectAll('rect').data(items).enter().append('rect')
|
||||
.attr('x', d => x(d[0]))
|
||||
.attr('y', d => y(Math.max(0, d[1].mean_delta || 0)))
|
||||
.attr('width', x.bandwidth())
|
||||
.attr('height', d => Math.abs(y(d[1].mean_delta || 0) - y(0)))
|
||||
.attr('fill', d => color(d[1].mean_delta || 0))
|
||||
g.append('g').attr('transform', `translate(0,${y(0)})`)
|
||||
.call(d3.axisBottom(x)).selectAll('text')
|
||||
.attr('transform', 'rotate(-40)').attr('text-anchor', 'end')
|
||||
g.append('g').call(d3.axisLeft(y))
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.panel { padding: .5rem; }
|
||||
</style>
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
<template>
|
||||
<div class="panel">
|
||||
<h3>Scenarios: desirability × plausibility</h3>
|
||||
<div v-if="loading">Loading…</div>
|
||||
<div v-else-if="error">{{ error }}</div>
|
||||
<svg v-else ref="chart" :width="width" :height="height"></svg>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onMounted, ref, watch } from 'vue'
|
||||
import * as d3 from 'd3'
|
||||
import { getResults } from '../../api/interview'
|
||||
|
||||
const props = defineProps({ simId: String, status: Object })
|
||||
const chart = ref(null); const loading = ref(true); const error = ref(null)
|
||||
const width = 520, height = 520
|
||||
|
||||
watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
|
||||
onMounted(load)
|
||||
|
||||
async function load() {
|
||||
loading.value = true; error.value = null
|
||||
try {
|
||||
// service interceptor returns the envelope {success, data, error} directly
|
||||
const r = await getResults(props.simId, 'scenario')
|
||||
if (!r.success) { error.value = r.error; return }
|
||||
draw(r.data.aggregate.polarity || {})
|
||||
} catch (e) { error.value = String(e) } finally { loading.value = false }
|
||||
}
|
||||
|
||||
function draw(polarity) {
|
||||
const pts = Object.entries(polarity)
|
||||
.filter(([, v]) => v && v.n > 0)
|
||||
.map(([sid, v]) => ({
|
||||
sid, x: v.mean_plausibility, y: v.mean_desirability,
|
||||
n: v.n, sdx: v.sd_plausibility, sdy: v.sd_desirability,
|
||||
}))
|
||||
if (!pts.length) return
|
||||
const svg = d3.select(chart.value); svg.selectAll('*').remove()
|
||||
const margin = { top: 20, right: 20, bottom: 40, left: 40 }
|
||||
const w = width - margin.left - margin.right
|
||||
const h = height - margin.top - margin.bottom
|
||||
const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
|
||||
const x = d3.scaleLinear().domain([1, 7]).range([0, w])
|
||||
const y = d3.scaleLinear().domain([1, 7]).range([h, 0])
|
||||
g.append('line').attr('x1', 0).attr('x2', w).attr('y1', y(4)).attr('y2', y(4)).attr('stroke', '#ccc')
|
||||
g.append('line').attr('x1', x(4)).attr('x2', x(4)).attr('y1', 0).attr('y2', h).attr('stroke', '#ccc')
|
||||
g.selectAll('circle').data(pts).enter().append('circle')
|
||||
.attr('cx', d => x(d.x)).attr('cy', d => y(d.y))
|
||||
.attr('r', d => 6 + Math.sqrt(d.n))
|
||||
.attr('fill', d3.schemeCategory10[1]).attr('opacity', .7)
|
||||
g.selectAll('text.lbl').data(pts).enter().append('text')
|
||||
.attr('class', 'lbl').attr('x', d => x(d.x) + 8).attr('y', d => y(d.y))
|
||||
.text(d => `${d.sid} (n=${d.n})`)
|
||||
g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
|
||||
g.append('g').call(d3.axisLeft(y))
|
||||
g.append('text').attr('x', w/2).attr('y', h+34).attr('text-anchor', 'middle').text('plausibility')
|
||||
g.append('text').attr('transform', `rotate(-90)`).attr('x', -h/2).attr('y', -28)
|
||||
.attr('text-anchor', 'middle').text('desirability')
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.panel { padding: .5rem; }
|
||||
</style>
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
<template>
|
||||
<div class="panel">
|
||||
<h3>Synthesis</h3>
|
||||
<div v-if="loading">Loading…</div>
|
||||
<div v-else-if="error">{{ error }}</div>
|
||||
<pre v-else class="report">{{ report }}</pre>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { onMounted, ref, watch } from 'vue'
|
||||
import { getSynthesis } from '../../api/interview'
|
||||
|
||||
const props = defineProps({ simId: String, status: Object })
|
||||
const loading = ref(true); const error = ref(null); const report = ref('')
|
||||
|
||||
watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
|
||||
onMounted(load)
|
||||
|
||||
async function load() {
|
||||
loading.value = true; error.value = null
|
||||
try {
|
||||
// service interceptor returns the envelope {success, data, error} directly
|
||||
const r = await getSynthesis(props.simId)
|
||||
if (!r.success) { error.value = r.error; return }
|
||||
report.value = r.data.report_markdown
|
||||
} catch (e) { error.value = String(e) } finally { loading.value = false }
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.panel { padding: .5rem; }
|
||||
.report { white-space: pre-wrap; font-family: ui-monospace, monospace; line-height: 1.4; }
|
||||
</style>
|
||||
|
|
@ -4,6 +4,7 @@ import Process from '../views/MainView.vue'
|
|||
import SimulationView from '../views/SimulationView.vue'
|
||||
import SimulationRunView from '../views/SimulationRunView.vue'
|
||||
import ReportView from '../views/ReportView.vue'
|
||||
import InterviewView from '../views/InterviewView.vue'
|
||||
import InteractionView from '../views/InteractionView.vue'
|
||||
|
||||
const routes = [
|
||||
|
|
@ -36,6 +37,12 @@ const routes = [
|
|||
component: ReportView,
|
||||
props: true
|
||||
},
|
||||
{
|
||||
path: '/interview/:simulationId',
|
||||
name: 'Interview',
|
||||
component: InterviewView,
|
||||
props: true
|
||||
},
|
||||
{
|
||||
path: '/interaction/:reportId',
|
||||
name: 'Interaction',
|
||||
|
|
|
|||
|
|
@ -0,0 +1,192 @@
|
|||
<template>
|
||||
<div class="main-view">
|
||||
<!-- Header -->
|
||||
<header class="app-header">
|
||||
<div class="header-left">
|
||||
<div class="brand" @click="router.push('/')">MIROFISH</div>
|
||||
</div>
|
||||
|
||||
<div class="header-center">
|
||||
<div class="view-switcher">
|
||||
<button
|
||||
v-for="mode in ['graph', 'split', 'workbench']"
|
||||
:key="mode"
|
||||
class="switch-btn"
|
||||
:class="{ active: viewMode === mode }"
|
||||
@click="viewMode = mode"
|
||||
>
|
||||
{{ { graph: $t('main.layoutGraph'), split: $t('main.layoutSplit'), workbench: $t('main.layoutWorkbench') }[mode] }}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="header-right">
|
||||
<LanguageSwitcher />
|
||||
<div class="step-divider"></div>
|
||||
<div class="workflow-step">
|
||||
<span class="step-num">Step 4b/5</span>
|
||||
<span class="step-name">{{ $t('interview.title') }}</span>
|
||||
</div>
|
||||
<div class="step-divider"></div>
|
||||
<span class="status-indicator idle">
|
||||
<span class="dot"></span>
|
||||
{{ $t('common.ready') }}
|
||||
</span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- Main Content Area -->
|
||||
<main class="content-area">
|
||||
<!-- Right Panel fills workbench mode -->
|
||||
<div class="panel-wrapper right" :style="rightPanelStyle">
|
||||
<Step4bInterviews :sim-id="currentSimId" />
|
||||
</div>
|
||||
</main>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { ref, computed } from 'vue'
|
||||
import { useRoute, useRouter } from 'vue-router'
|
||||
import LanguageSwitcher from '../components/LanguageSwitcher.vue'
|
||||
import Step4bInterviews from '../components/Step4bInterviews.vue'
|
||||
|
||||
const route = useRoute()
|
||||
const router = useRouter()
|
||||
|
||||
const currentSimId = ref(route.params.simulationId)
|
||||
const viewMode = ref('workbench')
|
||||
|
||||
const rightPanelStyle = computed(() => {
|
||||
if (viewMode.value === 'workbench') return { width: '100%', opacity: 1, transform: 'translateX(0)' }
|
||||
if (viewMode.value === 'graph') return { width: '0%', opacity: 0, transform: 'translateX(20px)' }
|
||||
return { width: '50%', opacity: 1, transform: 'translateX(0)' }
|
||||
})
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.main-view {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100vh;
|
||||
overflow: hidden;
|
||||
font-family: 'JetBrains Mono', 'Space Grotesk', 'Noto Sans SC', monospace;
|
||||
}
|
||||
|
||||
.app-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0 24px;
|
||||
height: 56px;
|
||||
background: #000;
|
||||
color: #fff;
|
||||
flex-shrink: 0;
|
||||
z-index: 10;
|
||||
}
|
||||
|
||||
.brand {
|
||||
font-size: 1rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.1em;
|
||||
cursor: pointer;
|
||||
transition: opacity 0.2s;
|
||||
}
|
||||
|
||||
.brand:hover { opacity: 0.8; }
|
||||
|
||||
.header-center {
|
||||
position: absolute;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
}
|
||||
|
||||
.view-switcher {
|
||||
display: flex;
|
||||
gap: 2px;
|
||||
background: #1a1a1a;
|
||||
padding: 3px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.switch-btn {
|
||||
padding: 4px 12px;
|
||||
font-size: 0.75rem;
|
||||
background: transparent;
|
||||
border: none;
|
||||
color: #666;
|
||||
cursor: pointer;
|
||||
border-radius: 2px;
|
||||
transition: all 0.15s;
|
||||
font-family: inherit;
|
||||
}
|
||||
|
||||
.switch-btn.active {
|
||||
background: #fff;
|
||||
color: #000;
|
||||
}
|
||||
|
||||
.header-right {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.step-divider {
|
||||
width: 1px;
|
||||
height: 20px;
|
||||
background: #333;
|
||||
}
|
||||
|
||||
.workflow-step {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: flex-end;
|
||||
}
|
||||
|
||||
.step-num {
|
||||
font-size: 0.65rem;
|
||||
color: #666;
|
||||
letter-spacing: 0.05em;
|
||||
}
|
||||
|
||||
.step-name {
|
||||
font-size: 0.75rem;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.status-indicator {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
font-size: 0.75rem;
|
||||
color: #999;
|
||||
}
|
||||
|
||||
.dot {
|
||||
width: 6px;
|
||||
height: 6px;
|
||||
border-radius: 50%;
|
||||
background: #666;
|
||||
}
|
||||
|
||||
.status-indicator.idle .dot { background: #666; }
|
||||
|
||||
.content-area {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.panel-wrapper {
|
||||
overflow: hidden;
|
||||
transition: width 0.35s cubic-bezier(0.4, 0, 0.2, 1),
|
||||
opacity 0.3s ease,
|
||||
transform 0.3s ease;
|
||||
}
|
||||
|
||||
.panel-wrapper.right {
|
||||
overflow-y: auto;
|
||||
}
|
||||
</style>
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"interview": {
|
||||
"title": "Stakeholder-Interviews",
|
||||
"subtitle": "Vier unabhängige Befragungen der simulierten Stakeholder-Population.",
|
||||
"runAll": "Alle Post-Simulations-Interviews starten",
|
||||
"downloadCsv": "CSV herunterladen",
|
||||
"tab": {
|
||||
"longitudinal": "Längsschnitt (Δ)",
|
||||
"diversity": "Diversität",
|
||||
"delphi": "Delphi",
|
||||
"scenario": "Szenarien",
|
||||
"synthesis": "Synthese"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -661,5 +661,18 @@
|
|||
"llmSelectAgentFailed": "LLM agent selection failed, using default selection: {error}",
|
||||
"generateInterviewQuestionsFailed": "Failed to generate interview questions: {error}",
|
||||
"generateInterviewSummaryFailed": "Failed to generate interview summary: {error}"
|
||||
},
|
||||
"interview": {
|
||||
"title": "Stakeholder interviews",
|
||||
"subtitle": "Four independent surveys of the simulated stakeholder population.",
|
||||
"runAll": "Run all post-simulation interviews",
|
||||
"downloadCsv": "Download CSV",
|
||||
"tab": {
|
||||
"longitudinal": "Longitudinal (Δ)",
|
||||
"diversity": "Diversity",
|
||||
"delphi": "Delphi",
|
||||
"scenario": "Scenarios",
|
||||
"synthesis": "Synthesis"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -661,5 +661,18 @@
|
|||
"llmSelectAgentFailed": "LLM选择Agent失败,使用默认选择: {error}",
|
||||
"generateInterviewQuestionsFailed": "生成采访问题失败: {error}",
|
||||
"generateInterviewSummaryFailed": "生成采访摘要失败: {error}"
|
||||
},
|
||||
"interview": {
|
||||
"title": "利益相关者访谈",
|
||||
"subtitle": "对模拟利益相关者群体进行的四项独立调查。",
|
||||
"runAll": "运行所有模拟后访谈",
|
||||
"downloadCsv": "下载 CSV",
|
||||
"tab": {
|
||||
"longitudinal": "纵向分析 (Δ)",
|
||||
"diversity": "多样性",
|
||||
"delphi": "德尔菲法",
|
||||
"scenario": "情景分析",
|
||||
"synthesis": "综合分析"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue