test(interviews): end-to-end pipeline test + content-aware LLM stubs for all 4 subagents

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 12:40:53 +02:00 · 2026-05-23 12:40:53 +02:00 · 61f13a806d
parent 52bae0a3da
commit 61f13a806d
3 changed files with 141 additions and 9 deletions
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@ -47,15 +47,66 @@ class LLMClient:
        return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)

    def _stub_response_json(self, messages: list[dict]) -> dict:
-        key = self._stub_key(messages)
-        # Deterministic centered Likert + plausible open text
-        digit = sum(ord(c) for c in key) % 5 + 1
-        return {
-            "stub_key": key,
-            "responses": {"item_001": digit, "item_002": digit, "item_003": (digit % 5) + 1},
-            "confidence": {"item_001": 0.7, "item_002": 0.7, "item_003": 0.6},
-            "open_comment": f"stub:{key}",
-        }
+        import hashlib, json as _json
+        sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
+        usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+        h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
+        seed = int(h[:8], 16)
+        rng = (seed % 5) + 1
+
+        # Longitudinal Likert (12 items)
+        if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
+            ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                   "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
+            return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
+                    "confidence": {k: 0.6 for k in ids},
+                    "open_comment": f"stub:{h[:8]}"}
+
+        # Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
+        if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
+            buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+            stmts = [f"st_{i+1:02d}" for i in range(24)]
+            # shuffle deterministically
+            order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
+            placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
+            return {
+                "placements": placements,
+                "likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
+                    ["ax_pres_extr","ax_loc_eu","ax_sci_trad",
+                     "ax_ind_col","ax_short_long","ax_mkt_reg"])},
+            }
+
+        # Scenario: S1..S4 × 4 dims
+        if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
+            return {"ratings": {sid: {
+                "desirability": ((seed >> (i*3)) % 7) + 1,
+                "plausibility": ((seed >> (i*3+1)) % 7) + 1,
+                "impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
+                "fairness": ((seed >> (i*3+4)) % 7) + 1,
+                "if_woke_up_response": f"act-{sid}-{h[:4]}",
+            } for i, sid in enumerate(["S1","S2","S3","S4"])}}
+
+        # Delphi R1: q1..q4 free text
+        if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
+            return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
+
+        # Delphi theme extraction (no in-character system prompt)
+        if "extract distinct thematic codes" in sys_msg:
+            return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
+
+        # Delphi R2 (rate) or R3 (revise)
+        if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
+           or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
+            theme_ids = [f"theme_{i}" for i in range(5)]
+            out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
+                                     "plausibility": ((seed >> (i*2+1)) % 5) + 1}
+                               for i, tid in enumerate(theme_ids)}}
+            if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
+                out["justification"] = "stub-revision"
+            return out
+
+        # Fallback
+        return {"stub_key": h[:12], "value": rng}

    def chat(
        self,
--- a/backend/tests/integration/init.py
+++ b/backend/tests/integration/init.py
--- a/backend/tests/integration/test_interview_pipeline.py
+++ b/backend/tests/integration/test_interview_pipeline.py
@ -0,0 +1,81 @@
+import json
+import pytest
+from pathlib import Path
+from app.config import Config
+from app.models.interview import SubagentKind, InterviewPhase
+from app.services.interviews.adapters import FileSystemPersonaProvider
+from app.services.interviews.base import MemoryDigest
+from app.services.interviews.zep_writer import InterviewZepWriter
+from app.services.interview_orchestrator import InterviewOrchestrator
+from app.services.interview_synthesizer import InterviewSynthesizer
+from app.utils.llm_client import LLMClient
+
+pytestmark = pytest.mark.integration
+
+INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+class _NullUpdater:
+    def __init__(self): self.events = []
+    def add_text_episode(self, graph_id, text): self.events.append(text)
+
+class _StaticMem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
+
+@pytest.fixture
+def seeded_uploads(tmp_path, monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    Config.LLM_STUB_MODE = True
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    sim_dir.mkdir(parents=True)
+    profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
+                 "persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
+    (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
+    return tmp_path
+
+def _make_orch(tmp_path):
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    personas = FileSystemPersonaProvider(
+        reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
+    )
+    llm = LLMClient(api_key="x", base_url="x", model="x")
+    updater = _NullUpdater()
+    writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
+    return InterviewOrchestrator(
+        llm=llm, memory=_StaticMem(), personas=personas,
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
+        zep_writer=writer, max_workers=2, language="de",
+    )
+
+def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+
+    pre = orch.run_pre()
+    assert pre["longitudinal"]["n_responded"] >= 1
+
+    post = orch.run_post()
+    assert "longitudinal" in post
+    assert "diversity" in post
+    assert "scenario" in post
+    assert "delphi" in post
+
+    synth = InterviewSynthesizer(store=orch.store)
+    report = synth.run()
+    assert "Stakeholder Interview Synthesis" in report
+    assert "Limitations" in report
+
+    csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
+    assert csv_path.exists()
+    lines = csv_path.read_text().splitlines()
+    assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
+
+def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+    orch.run_pre()
+    first = orch.run_post()
+    second = orch.rerun(SubagentKind.SCENARIO)
+    first_scn = first["scenario"]["run_dir"]
+    second_scn = second["scenario"]["run_dir"]
+    assert first_scn != second_scn