fix(interviews): wire Zep updater/memory/hooks correctly for production runs (C1-C5)

Five tightly-coupled fixes that were causing the interview subsystem to silently degrade in production: - C1+C2: `_build_orchestrator` now resolves `graph_id` from `SimulationManager().get_simulation(sim_id).graph_id` (the real persisted state) instead of a `graph_id.txt` that nothing in the codebase writes. `ZepGraphMemoryUpdater(graph_id=...)` is now called with the correct positional argument; the bare `try/except Exception` that was swallowing the TypeError is replaced with a narrow fallback that logs explicitly. - C3: `SimulationManager._on_ready_hooks` / `_on_completed_hooks` are now class-level (mirroring `SimulationRunner._on_completed_callbacks`). Hooks registered at app startup now survive across the per-request `SimulationManager()` instances created by the Flask API, so the T0 longitudinal auto-survey actually fires. - C4: `ZepGraphMemoryUpdater` gains an explicit `add_text_episode(graph_id, text)` method for synchronous text writes. `InterviewZepWriter._emit` no longer silently falls back to a dict-shaped `add_activity` call that the real implementation rejects (its `add_activity` requires an `AgentActivity` dataclass). - C5: `FileSystemPersonaProvider.agent_to_entity()` builds an `{agent_id: zep_entity_uuid}` map from the persisted profile files; the map is now passed to `ZepMemoryProvider` so `get_entity_with_context` is called with real Zep UUIDs instead of `str(agent_id)`. To make this work, `OasisProfileGenerator._save_reddit_json` and `_save_twitter_csv` now persist `source_entity_uuid` (Reddit JSON: optional field; Twitter CSV: appended column). Tests: 51 unit + 2 integration pass (was 40 + 2). New tests lock in each fix: - `test_hooks_survive_across_instances` (C3) - `test_build_orchestrator_reads_graph_id_from_state` (C1+C2+C5) - `test_build_orchestrator_falls_back_when_state_missing` (C1+C2) - `test_emit_uses_add_text_episode_with_graph_id`, `test_emit_raises_when_updater_lacks_add_text_episode`, `test_real_updater_exposes_add_text_episode` (C4) - `test_agent_to_entity_from_reddit_json`, `test_agent_to_entity_empty_when_no_field`, `test_agent_to_entity_falls_back_to_twitter_csv`, `test_agent_to_entity_reddit_takes_precedence` (C5) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 13:27:47 +02:00 · 2026-05-23 13:27:47 +02:00 · 6e1489fe08
parent 6b04ea5c27
commit 6e1489fe08
11 changed files with 526 additions and 62 deletions
--- a/backend/app/init.py
+++ b/backend/app/init.py
@ -48,16 +48,17 @@ def create_app(config_class=Config):
    if should_log_startup:
        logger.info("已注册模拟进程清理函数")
-    # Install interview lifecycle hooks on a singleton SimulationManager.
+    # Install interview lifecycle hooks on the SimulationManager class.
-    # The singleton's _notify_on_completed is also wired into SimulationRunner
+    # Hooks are stored on the class itself (not on a particular instance), so
-    # so that the runner's monitor thread fires the completed hooks when a
+    # any fresh `SimulationManager()` constructed later (e.g. per request in
-    # simulation process exits successfully.
+    # the Flask API) will see them.  We still bridge `_notify_on_completed`
    # into SimulationRunner via a transient instance so the runner's monitor
    # thread fires the completed hooks when a simulation process exits.
    from .services.simulation_manager import SimulationManager
    from .services.interviews.lifecycle import install_hooks
-    _simulation_manager_singleton = SimulationManager()
+    install_hooks(SimulationManager)
-    install_hooks(_simulation_manager_singleton)
+    SimulationRunner.register_on_completed(SimulationManager()._notify_on_completed)
    SimulationRunner.register_on_completed(_simulation_manager_singleton._notify_on_completed)
    if should_log_startup:
        logger.info("已安装面试生命周期钩子")
--- a/backend/app/api/interview.py
+++ b/backend/app/api/interview.py
@ -12,9 +12,31 @@ from app.services.interview_orchestrator import InterviewOrchestrator
 from app.services.interview_synthesizer import InterviewSynthesizer
 from app.services.interviews.storage import InterviewStore
 from app.utils.llm_client import LLMClient
 from app.utils.logger import get_logger
 from . import interview_bp
 logger = get_logger(__name__)
 class _NullUpdater:
    """No-op stand-in for ``ZepGraphMemoryUpdater`` used when Zep is unavailable.
    Exposes ``add_text_episode`` so ``InterviewZepWriter._emit`` succeeds silently —
    the interview pipeline still produces local artefacts; Zep just isn't updated.
    """
    def add_text_episode(self, graph_id, text):  # noqa: ARG002 - matches real API
        return None
 class _NullMemory:
    """Fallback memory provider that always reports unavailable digests."""
    def get_digest(self, agent_id, max_chars=2000):  # noqa: ARG002 - matches Protocol
        from app.services.interviews.base import MemoryDigest
        return MemoryDigest(text="[memory unavailable]", available=False)
 _TASKS: dict[str, dict] = {}
 _LOCK = threading.Lock()
@ -25,30 +47,72 @@ def _uploads_root() -> Path:
    return Path(getattr(Config, "UPLOADS_DIR", "uploads"))
 def _load_graph_id(sim_id: str) -> str:
    """Read the Zep ``graph_id`` for a simulation from its persisted state.
    The graph_id is written by ``SimulationManager`` into
    ``uploads/simulations/{sim_id}/state.json``.  Returns ``""`` if the state
    file is missing or unreadable — callers should treat empty graph_id as
    "Zep unavailable" and fall back to the null memory/writer path.
    """
    try:
        from app.services.simulation_manager import SimulationManager
        state = SimulationManager().get_simulation(sim_id)
        if state and state.graph_id:
            return state.graph_id
    except Exception as e:  # pragma: no cover - defensive
        logger.warning(f"_load_graph_id({sim_id}) failed: {e!r}")
    return ""
 def _build_orchestrator(sim_id: str) -> InterviewOrchestrator:
    sim_dir = _uploads_root() / "simulations" / sim_id
    reddit = sim_dir / "reddit_profiles.json"
    twitter = sim_dir / "twitter_profiles.csv"
-    personas = FileSystemPersonaProvider(reddit_path=reddit if reddit.exists() else None,
+    personas = FileSystemPersonaProvider(
-                                         twitter_path=twitter if twitter.exists() else None)
+        reddit_path=reddit if reddit.exists() else None,
-    # Zep memory + writer: best-effort; in stub/test mode the writer no-ops on exceptions
+        twitter_path=twitter if twitter.exists() else None,
-    class _NullUpdater:
+    )
-        def add_text_episode(self, *a, **kw): return None
+    # Build agent_id -> Zep entity uuid map from the persisted profile files.
-    try:
+    agent_to_entity = personas.agent_to_entity()
-        from app.services.zep_entity_reader import ZepEntityReader
+
-        from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
+    # Resolve the graph_id from the simulation's persisted state — NOT from a
-        graph_id = (sim_dir / "graph_id.txt").read_text().strip() if (sim_dir / "graph_id.txt").exists() else ""
+    # ``graph_id.txt`` (nothing in the codebase writes such a file).
-        reader = ZepEntityReader()
+    graph_id = _load_graph_id(sim_id)
-        updater = ZepGraphMemoryUpdater()
+
-        memory = ZepMemoryProvider(reader, graph_id=graph_id)
+    memory: object
-        zep_writer = InterviewZepWriter(memory_updater=updater, graph_id=graph_id)
+    zep_writer: InterviewZepWriter
-    except Exception:
+    if not graph_id:
-        class _Mem:
+        logger.warning(
-            def get_digest(self, agent_id, max_chars=2000):
+            f"interview: no graph_id for sim {sim_id} — Zep memory/writer disabled "
-                from app.services.interviews.base import MemoryDigest
+            "(simulation state missing or graph_id empty)"
-                return MemoryDigest(text="[memory unavailable]", available=False)
+        )
-        memory = _Mem()
+        memory = _NullMemory()
        zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
    else:
        try:
            from app.services.zep_entity_reader import ZepEntityReader
            from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
            reader = ZepEntityReader()
            updater = ZepGraphMemoryUpdater(graph_id=graph_id)
            memory = ZepMemoryProvider(
                reader, graph_id=graph_id, agent_to_entity=agent_to_entity
            )
            zep_writer = InterviewZepWriter(memory_updater=updater, graph_id=graph_id)
            if not agent_to_entity:
                logger.warning(
                    f"interview: empty agent_to_entity map for sim {sim_id} — "
                    "memory digests will be unavailable. Check that profile files "
                    "include `source_entity_uuid`."
                )
        except Exception as e:
            logger.warning(
                f"interview: Zep init failed for sim {sim_id} ({e!r}); "
                "falling back to null memory/writer"
            )
            memory = _NullMemory()
            zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
    llm = LLMClient(api_key=Config.LLM_API_KEY, base_url=Config.LLM_BASE_URL,
                    model=Config.LLM_MODEL_NAME)
    return InterviewOrchestrator(
--- a/backend/app/services/interviews/adapters.py
+++ b/backend/app/services/interviews/adapters.py
@ -54,6 +54,49 @@ class FileSystemPersonaProvider:
        twitter = [p for p in self._load_twitter() if p.agent_id not in seen]
        return reddit + twitter
    def agent_to_entity(self) -> dict[int, str]:
        """Build the ``{agent_id: zep_entity_uuid}`` map from the persisted profile files.
        Both writers (``oasis_profile_generator._save_reddit_json`` and
        ``_save_twitter_csv``) emit ``source_entity_uuid`` per agent.  Reddit takes
        precedence; rows with a missing/blank uuid are skipped.
        Returns an empty dict if neither file is present or no row has the field.
        """
        mapping: dict[int, str] = {}
        # Reddit JSON
        if self.reddit_path and self.reddit_path.exists():
            try:
                rows = json.loads(self.reddit_path.read_text(encoding="utf-8"))
                for row in rows:
                    uid = row.get("user_id")
                    uuid_ = row.get("source_entity_uuid")
                    if uid is None or not uuid_:
                        continue
                    mapping[int(uid)] = str(uuid_)
            except (json.JSONDecodeError, ValueError, TypeError):
                pass
        # Twitter CSV (only fills agents not already mapped)
        if self.twitter_path and self.twitter_path.exists():
            try:
                with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
                    for row in csv.DictReader(f):
                        uid = row.get("user_id")
                        uuid_ = row.get("source_entity_uuid")
                        if not uid or not uuid_:
                            continue
                        try:
                            uid_int = int(uid)
                        except (TypeError, ValueError):
                            continue
                        if uid_int not in mapping:
                            mapping[uid_int] = str(uuid_)
            except OSError:
                pass
        return mapping
 class ZepMemoryProvider:
    """Builds a bounded memory digest per agent from Zep entity context.
--- a/backend/app/services/interviews/zep_writer.py
+++ b/backend/app/services/interviews/zep_writer.py
@ -5,10 +5,12 @@ from app.models.interview import (
 )
 class InterviewZepWriter:
-    """Mirrors `ZepGraphMemoryUpdater.add_activity` usage but for interview episodes.
+    """Writes interview episodes (per-agent responses, aggregates) to a Zep graph.
-    The real `ZepGraphMemoryUpdater` may expose `add_activity` (preferred) or a lower-level
+    Expects ``memory_updater`` to expose ``add_text_episode(graph_id, text)`` — that
-    text-episode method; this writer adapts to either via duck typing.
+    is the method the real ``ZepGraphMemoryUpdater`` provides for synchronous text
    writes outside the agent-activity batch pipeline.  A no-op shim with the same
    method is acceptable for tests and stub mode.
    """
    def __init__(self, memory_updater, graph_id: str):
        self.updater = memory_updater
@ -17,10 +19,11 @@ class InterviewZepWriter:
    def _emit(self, text: str) -> None:
        if hasattr(self.updater, "add_text_episode"):
            self.updater.add_text_episode(self.graph_id, text)
        elif hasattr(self.updater, "add_activity"):
            self.updater.add_activity({"graph_id": self.graph_id, "text": text})
        else:
-            raise RuntimeError("memory_updater has neither add_text_episode nor add_activity")
+            raise RuntimeError(
                "memory_updater is missing add_text_episode(graph_id, text); "
                "InterviewZepWriter requires the explicit text-episode API."
            )
    def _summarize_likert(self, r: LikertResponse) -> str:
        mean_v = sum(r.responses.values()) / max(len(r.responses), 1)
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@ -1091,8 +1091,10 @@ class OasisProfileGenerator:
        with open(file_path, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
-            # 写入OASIS要求的表头
+            # 写入表头：OASIS要求的5列 + 额外的source_entity_uuid列（反向链接到Zep实体）。
-            headers = ['user_id', 'name', 'username', 'user_char', 'description']
+            # OASIS按列名读取，额外的列不会影响其行为，但允许下游（面试子系统等）
            # 重建 agent_id -> Zep entity uuid 的映射。
            headers = ['user_id', 'name', 'username', 'user_char', 'description', 'source_entity_uuid']
            writer.writerow(headers)
            # 写入数据行
@ -1112,7 +1114,8 @@ class OasisProfileGenerator:
                    profile.name,           # name: 真实姓名
                    profile.user_name,      # username: 用户名
                    user_char,              # user_char: 完整人设（内部LLM使用）
-                    description             # description: 简短简介（外部显示）
+                    description,            # description: 简短简介（外部显示）
                    profile.source_entity_uuid or "",  # source_entity_uuid: Zep实体UUID
                ]
                writer.writerow(row)
@ -1184,6 +1187,12 @@ class OasisProfileGenerator:
                item["profession"] = profile.profession
            if profile.interested_topics:
                item["interested_topics"] = profile.interested_topics
            # source_entity_uuid: 反向链接到Zep实体，下游（面试子系统等）需要此映射以
            # 在Zep图谱中查找Agent的上下文。仅在存在时写入。
            if profile.source_entity_uuid:
                item["source_entity_uuid"] = profile.source_entity_uuid
            if profile.source_entity_type:
                item["source_entity_type"] = profile.source_entity_type
            data.append(item)
--- a/backend/app/services/simulation_manager.py
+++ b/backend/app/services/simulation_manager.py
@ -129,6 +129,13 @@ class SimulationManager:
        '../../uploads/simulations'
    )
    # Class-level hook registries so callbacks survive across instances.
    # The Flask API endpoints construct fresh `SimulationManager()` instances per request,
    # while lifecycle hooks are registered once at app startup — storing the lists on the
    # instance would silently drop those hooks on every request.
    _on_ready_hooks: list = []
    _on_completed_hooks: list = []
    def __init__(self):
        # 确保目录存在
        os.makedirs(self.SIMULATION_DATA_DIR, exist_ok=True)
@ -136,10 +143,6 @@ class SimulationManager:
        # 内存中的模拟状态缓存
        self._simulations: Dict[str, SimulationState] = {}
        # Lifecycle hook registries
        self._on_ready_hooks: list = []
        self._on_completed_hooks: list = []
    def _get_simulation_dir(self, simulation_id: str) -> str:
        """获取模拟数据目录"""
        sim_dir = os.path.join(self.SIMULATION_DATA_DIR, simulation_id)
@ -196,20 +199,30 @@ class SimulationManager:
        return state
    # ------------------------------------------------------------------
-    # Lifecycle hook registration
+    # Lifecycle hook registration (class-level — see class docstring)
    # ------------------------------------------------------------------
-    def register_on_ready(self, fn) -> None:
+    @classmethod
-        """Register a callback invoked when a simulation transitions to READY."""
+    def register_on_ready(cls, fn) -> None:
-        self._on_ready_hooks.append(fn)
+        """Register a callback invoked when a simulation transitions to READY.
-    def register_on_completed(self, fn) -> None:
+        Class-level so hooks registered at app startup remain visible to every
-        """Register a callback invoked when a simulation transitions to COMPLETED."""
+        SimulationManager() instance constructed later (e.g. per-request in Flask).
-        self._on_completed_hooks.append(fn)
+        """
        cls._on_ready_hooks.append(fn)
    @classmethod
    def register_on_completed(cls, fn) -> None:
        """Register a callback invoked when a simulation transitions to COMPLETED.
        Class-level so hooks registered at app startup remain visible to every
        SimulationManager() instance constructed later (e.g. per-request in Flask).
        """
        cls._on_completed_hooks.append(fn)
    def _notify_on_ready(self, state: "SimulationState") -> None:
        """Invoke all on_ready hooks; exceptions are isolated per hook."""
-        for fn in list(self._on_ready_hooks):
+        for fn in list(type(self)._on_ready_hooks):
            try:
                fn(state)
            except Exception as e:
@ -217,7 +230,7 @@ class SimulationManager:
    def _notify_on_completed(self, state: "SimulationState") -> None:
        """Invoke all on_completed hooks; exceptions are isolated per hook."""
-        for fn in list(self._on_completed_hooks):
+        for fn in list(type(self)._on_completed_hooks):
            try:
                fn(state)
            except Exception as e:
--- a/backend/app/services/zep_graph_memory_updater.py
+++ b/backend/app/services/zep_graph_memory_updater.py
@ -337,6 +337,44 @@ class ZepGraphMemoryUpdater:
        self._total_activities += 1
        logger.debug(f"添加活动到Zep队列: {activity.agent_name} - {activity.action_type}")
    def add_text_episode(self, graph_id: str, text: str) -> None:
        """
        直接将一段文本写入Zep图谱（同步发送，不经过批量队列）
        用于面试子系统（InterviewZepWriter）等需要立即写入、不属于
        agent活动流水线的场景。绕过 _send_batch_activities 的批量逻辑，
        但仍带重试。
        Args:
            graph_id: 目标图谱ID（允许覆盖 self.graph_id，便于多图场景）
            text: 要发送的文本内容
        """
        if not text:
            return
        target_graph_id = graph_id or self.graph_id
        if not target_graph_id:
            logger.warning("add_text_episode 调用时未指定graph_id，跳过")
            return
        for attempt in range(self.MAX_RETRIES):
            try:
                self.client.graph.add(
                    graph_id=target_graph_id,
                    type="text",
                    data=text,
                )
                self._total_sent += 1
                self._total_items_sent += 1
                logger.debug(f"add_text_episode 发送成功 (graph={target_graph_id}, len={len(text)})")
                return
            except Exception as e:
                if attempt < self.MAX_RETRIES - 1:
                    logger.warning(f"add_text_episode 失败 (尝试 {attempt + 1}/{self.MAX_RETRIES}): {e}")
                    time.sleep(self.RETRY_DELAY * (attempt + 1))
                else:
                    logger.error(f"add_text_episode 失败，已重试{self.MAX_RETRIES}次: {e}")
                    self._failed_count += 1
    def add_activity_from_dict(self, data: Dict[str, Any], platform: str):
        """
        从字典数据添加活动
--- a/backend/tests/interviews/test_adapters.py
+++ b/backend/tests/interviews/test_adapters.py
@ -46,3 +46,78 @@ def test_zep_memory_provider_truncates_to_max_chars():
    d = prov.get_digest(5, max_chars=300)
    assert d.available is True
    assert len(d.text) <= 300
 def test_agent_to_entity_from_reddit_json(tmp_path):
    """C5: ``FileSystemPersonaProvider.agent_to_entity()`` must reconstruct the
    ``{agent_id: zep_entity_uuid}`` map from a reddit_profiles.json that
    includes ``source_entity_uuid``.
    """
    data = [
        {"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
         "persona": "p", "profession": "fisher",
         "source_entity_uuid": "uuid-zero"},
        {"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
         "persona": "p", "profession": "ngo_staff",
         "source_entity_uuid": "uuid-one"},
        # Row with no uuid must be skipped.
        {"user_id": 2, "user_name": "gov1", "name": "Gov Agent",
         "persona": "p", "profession": "official"},
    ]
    p = tmp_path / "reddit_profiles.json"
    p.write_text(json.dumps(data), encoding="utf-8")
    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
    mapping = provider.agent_to_entity()
    assert mapping == {0: "uuid-zero", 1: "uuid-one"}
    # Map values are strings, keys are ints.
    for k, v in mapping.items():
        assert isinstance(k, int)
        assert isinstance(v, str)
 def test_agent_to_entity_empty_when_no_field(tmp_path):
    """C5: if no row has ``source_entity_uuid``, return an empty dict — not
    a crash, not partial garbage."""
    data = [{"user_id": 0, "user_name": "u", "name": "A", "persona": "p"}]
    p = tmp_path / "reddit_profiles.json"
    p.write_text(json.dumps(data), encoding="utf-8")
    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
    assert provider.agent_to_entity() == {}
 def test_agent_to_entity_falls_back_to_twitter_csv(tmp_path):
    """C5: when only twitter_profiles.csv exists, the helper must still
    extract uuids from the CSV's ``source_entity_uuid`` column.
    """
    p = tmp_path / "twitter_profiles.csv"
    with p.open("w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
        writer.writerow([0, "A0", "u0", "char", "desc", "uuid-zero"])
        writer.writerow([1, "A1", "u1", "char", "desc", ""])  # skipped (blank uuid)
        writer.writerow([2, "A2", "u2", "char", "desc", "uuid-two"])
    provider = FileSystemPersonaProvider(reddit_path=None, twitter_path=p)
    assert provider.agent_to_entity() == {0: "uuid-zero", 2: "uuid-two"}
 def test_agent_to_entity_reddit_takes_precedence(tmp_path):
    """C5: when both files exist, Reddit JSON wins; Twitter CSV only fills
    agents not already mapped."""
    reddit = tmp_path / "reddit_profiles.json"
    reddit.write_text(json.dumps([
        {"user_id": 0, "user_name": "u0", "name": "A0", "persona": "p",
         "source_entity_uuid": "reddit-zero"},
    ]), encoding="utf-8")
    twitter = tmp_path / "twitter_profiles.csv"
    with twitter.open("w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
        writer.writerow([0, "A0", "u0", "char", "desc", "twitter-zero"])  # ignored
        writer.writerow([1, "A1", "u1", "char", "desc", "twitter-one"])  # used
    provider = FileSystemPersonaProvider(reddit_path=reddit, twitter_path=twitter)
    assert provider.agent_to_entity() == {0: "reddit-zero", 1: "twitter-one"}
--- a/backend/tests/interviews/test_api_interview.py
+++ b/backend/tests/interviews/test_api_interview.py
@ -40,3 +40,116 @@ def test_unknown_subagent_returns_400(client):
    res = client.post("/api/interview/sim_test/rerun",
                      json={"subagent": "nonsense"})
    assert res.status_code == 400
 def test_build_orchestrator_reads_graph_id_from_state(tmp_path, monkeypatch):
    """C1+C2: ``_build_orchestrator`` must resolve the Zep graph_id from
    ``state.json`` (written by ``SimulationManager``), not from the
    nonexistent ``graph_id.txt``.  The graph_id then must reach the
    ``InterviewZepWriter`` instead of being silently swallowed.
    """
    monkeypatch.setenv("LLM_STUB_MODE", "true")
    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
    monkeypatch.setenv("ZEP_API_KEY", "test-fake-key")
    from app.config import Config
    Config.LLM_STUB_MODE = True
    Config.UPLOADS_DIR = str(tmp_path)
    Config.ZEP_API_KEY = "test-fake-key"
    # SimulationManager's data dir is class-level — point it at tmp_path.
    from app.services.simulation_manager import SimulationManager
    sim_root = tmp_path / "simulations"
    sim_root.mkdir(parents=True, exist_ok=True)
    monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
    sim_id = "sim_graphid"
    sim_dir = sim_root / sim_id
    sim_dir.mkdir(parents=True)
    # Seed a profile file so FileSystemPersonaProvider can work.
    (sim_dir / "reddit_profiles.json").write_text(
        json.dumps([
            {"user_id": 0, "user_name": "u0", "name": "A0",
             "persona": "p", "profession": "fisher",
             "source_entity_uuid": "uuid-zero"},
            {"user_id": 1, "user_name": "u1", "name": "A1",
             "persona": "p", "profession": "fisher",
             "source_entity_uuid": "uuid-one"},
        ]),
        encoding="utf-8",
    )
    # Seed state.json with the graph_id.
    state_doc = {
        "simulation_id": sim_id,
        "project_id": "p",
        "graph_id": "graph-from-state",
        "status": "ready",
        "enable_twitter": False,
        "enable_reddit": True,
    }
    (sim_dir / "state.json").write_text(json.dumps(state_doc), encoding="utf-8")
    # Patch ZepGraphMemoryUpdater + ZepEntityReader so we don't hit the network.
    import app.services.zep_graph_memory_updater as zgmu
    import app.services.zep_entity_reader as zer
    class _FakeUpdater:
        def __init__(self, graph_id, api_key=None):
            self.graph_id = graph_id
        def add_text_episode(self, graph_id, text):
            return None
    class _FakeReader:
        def __init__(self, api_key=None):
            pass
        def get_entity_with_context(self, graph_id, entity_uuid):
            return None
    monkeypatch.setattr(zgmu, "ZepGraphMemoryUpdater", _FakeUpdater)
    monkeypatch.setattr(zer, "ZepEntityReader", _FakeReader)
    from app.api.interview import _build_orchestrator
    orch = _build_orchestrator(sim_id)
    assert orch.zep_writer.graph_id == "graph-from-state"
    # Updater on the writer must be the real (or fake) ZepGraphMemoryUpdater path,
    # NOT the null updater — i.e. its graph_id must match.
    assert getattr(orch.zep_writer.updater, "graph_id", None) == "graph-from-state"
    # ZepMemoryProvider must have received the agent_to_entity map (C5).
    assert hasattr(orch.memory, "map")
    assert orch.memory.map == {0: "uuid-zero", 1: "uuid-one"}
 def test_build_orchestrator_falls_back_when_state_missing(tmp_path, monkeypatch):
    """C1+C2: when ``state.json`` is missing, the orchestrator must still be
    constructed with the null updater/memory path (not crash, not silently
    pass a bare ``ZepGraphMemoryUpdater()`` that would error out).
    """
    monkeypatch.setenv("LLM_STUB_MODE", "true")
    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
    from app.config import Config
    Config.LLM_STUB_MODE = True
    Config.UPLOADS_DIR = str(tmp_path)
    from app.services.simulation_manager import SimulationManager
    sim_root = tmp_path / "simulations"
    sim_root.mkdir(parents=True, exist_ok=True)
    monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
    sim_id = "sim_no_state"
    sim_dir = sim_root / sim_id
    sim_dir.mkdir(parents=True)
    (sim_dir / "reddit_profiles.json").write_text(
        json.dumps([{"user_id": 0, "user_name": "u0", "name": "A0",
                     "persona": "p", "profession": "fisher"}]),
        encoding="utf-8",
    )
    from app.api.interview import _build_orchestrator
    orch = _build_orchestrator(sim_id)
    assert orch.zep_writer.graph_id == ""
    # Null updater path: writer must still respond to _emit without raising.
    orch.zep_writer._emit("hello")
--- a/backend/tests/interviews/test_simulation_hooks.py
+++ b/backend/tests/interviews/test_simulation_hooks.py
@ -7,11 +7,27 @@ NOTE ON SHAPE DIVERGENCE vs. original plan spec:
 - The COMPLETED transition lives in simulation_runner.py (SimulationRunner._monitor_simulation),
  not in simulation_manager.py.  The _notify_on_completed hook is registered on SimulationManager
  and the production insertion point for COMPLETED is documented in DONE_WITH_CONCERNS.
 Hooks are stored on the class (C3 fix), so each test snapshots/restores the
 registries via the autouse fixture to keep test isolation.
 """
 import pytest
 from app.services.simulation_manager import SimulationManager, SimulationState, SimulationStatus
@pytest.fixture(autouse=True)
 def _isolate_class_hooks():
    saved_ready = list(SimulationManager._on_ready_hooks)
    saved_completed = list(SimulationManager._on_completed_hooks)
    try:
        yield
    finally:
        SimulationManager._on_ready_hooks[:] = saved_ready
        SimulationManager._on_completed_hooks[:] = saved_completed
 def test_register_post_ready_hook_invoked():
    called = []
    mgr = SimulationManager()
@ -38,3 +54,43 @@ def test_register_post_completed_hook_invoked():
    )
    mgr._notify_on_completed(state)
    assert called == [("done", "abc")]
 def test_hooks_survive_across_instances():
    """C3: hook registries are class-level, so callbacks registered through the
    classmethod must still fire on a freshly constructed instance.  This is
    what makes the Flask per-request ``SimulationManager()`` pattern work
    after ``install_hooks(SimulationManager)`` runs at app startup.
    """
    called: list[str] = []
    # Register via the class — the production install_hooks(cls) path.
    SimulationManager.register_on_ready(lambda s: called.append(f"ready:{s.simulation_id}"))
    SimulationManager.register_on_completed(lambda s: called.append(f"done:{s.simulation_id}"))
    # New, independently-constructed instance must still see the hooks.
    fresh = SimulationManager()
    state = SimulationState(
        simulation_id="cross_instance",
        project_id="p",
        graph_id="g",
        status=SimulationStatus.READY,
    )
    fresh._notify_on_ready(state)
    state.status = SimulationStatus.COMPLETED
    fresh._notify_on_completed(state)
    assert "ready:cross_instance" in called
    assert "done:cross_instance" in called
 def test_register_via_instance_also_lands_on_class():
    """Registering through an instance must populate the class registry too —
    backward-compatibility with code that calls ``manager.register_on_*``.
    """
    mgr1 = SimulationManager()
    mgr1.register_on_ready(lambda s: None)
    # A second, unrelated instance must see the hook.
    mgr2 = SimulationManager()
    assert len(SimulationManager._on_ready_hooks) >= 1
    assert SimulationManager._on_ready_hooks is mgr2.__class__._on_ready_hooks
--- a/backend/tests/interviews/test_zep_writer.py
+++ b/backend/tests/interviews/test_zep_writer.py
@ -1,16 +1,26 @@
 import pytest
 from app.models.interview import (
    LikertResponse, InterviewPhase, SubagentKind,
 )
 from app.services.interviews.zep_writer import InterviewZepWriter
 class _FakeMemoryUpdater:
    """Fake mirroring the real ZepGraphMemoryUpdater contract.
    Post-C4 the writer only uses ``add_text_episode(graph_id, text)`` —
    ``add_activity`` is deliberately omitted to lock in the new behaviour and
    catch any regression that re-introduces the broken dict-based fallback.
    """
    def __init__(self):
-        self.events = []
+        self.events: list[dict] = []
-    def add_activity(self, activity):
+
        self.events.append(activity)
    def add_text_episode(self, graph_id, text):
        self.events.append({"graph_id": graph_id, "text": text})
 def test_per_agent_episode_text():
    upd = _FakeMemoryUpdater()
    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
@ -20,9 +30,48 @@ def test_per_agent_episode_text():
    w.write_per_agent(SubagentKind.LONGITUDINAL, r, agent_name="Fischer Müller")
    assert any("Fischer Müller" in str(e) for e in upd.events)
    assert any("longitudinal/T1" in str(e) for e in upd.events)
    # Each event must carry the configured graph_id.
    assert all(e["graph_id"] == "g1" for e in upd.events)
 def test_aggregate_episode():
    upd = _FakeMemoryUpdater()
    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
    w.write_aggregate(SubagentKind.SCENARIO, summary="S1 mean desirability 5.2; S2 mean 2.1")
    assert any("S1 mean" in str(e) for e in upd.events)
 def test_emit_uses_add_text_episode_with_graph_id():
    """C4: ``_emit`` must call ``updater.add_text_episode(graph_id, text)``
    with the constructor's graph_id and the raw text — no dict shape, no
    ``add_activity`` fallback (the real ``add_activity`` rejects dicts).
    """
    upd = _FakeMemoryUpdater()
    w = InterviewZepWriter(memory_updater=upd, graph_id="g_xyz")
    w._emit("hello world")
    assert upd.events == [{"graph_id": "g_xyz", "text": "hello world"}]
 def test_emit_raises_when_updater_lacks_add_text_episode():
    """C4: a memory_updater without ``add_text_episode`` must surface a
    RuntimeError rather than silently no-op via a broken ``add_activity``
    fallback.
    """
    class _Broken:
        def add_activity(self, activity):  # pragma: no cover - kept for clarity
            raise AssertionError("must not be called")
    w = InterviewZepWriter(memory_updater=_Broken(), graph_id="g1")
    with pytest.raises(RuntimeError, match="add_text_episode"):
        w._emit("x")
 def test_real_updater_exposes_add_text_episode():
    """C4 sanity check: ZepGraphMemoryUpdater (the real class) must expose
    ``add_text_episode`` so the production wiring works without falling
    through to the broken ``add_activity(dict)`` path.
    """
    from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
    assert hasattr(ZepGraphMemoryUpdater, "add_text_episode")