From 61f13a806d6a01119f4bfcfddf88b36a08f9ccfa Mon Sep 17 00:00:00 2001 From: Christian Moellmann Date: Sat, 23 May 2026 12:40:53 +0200 Subject: [PATCH] test(interviews): end-to-end pipeline test + content-aware LLM stubs for all 4 subagents Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/app/utils/llm_client.py | 69 +++++++++++++--- backend/tests/integration/__init__.py | 0 .../integration/test_interview_pipeline.py | 81 +++++++++++++++++++ 3 files changed, 141 insertions(+), 9 deletions(-) create mode 100644 backend/tests/integration/__init__.py create mode 100644 backend/tests/integration/test_interview_pipeline.py diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 32285596..9b22ac02 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -47,15 +47,66 @@ class LLMClient: return _json.dumps(self._stub_response_json(messages), ensure_ascii=False) def _stub_response_json(self, messages: list[dict]) -> dict: - key = self._stub_key(messages) - # Deterministic centered Likert + plausible open text - digit = sum(ord(c) for c in key) % 5 + 1 - return { - "stub_key": key, - "responses": {"item_001": digit, "item_002": digit, "item_003": (digit % 5) + 1}, - "confidence": {"item_001": 0.7, "item_002": 0.7, "item_003": 0.6}, - "open_comment": f"stub:{key}", - } + import hashlib, json as _json + sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "") + usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "") + h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest() + seed = int(h[:8], 16) + rng = (seed % 5) + 1 + + # Longitudinal Likert (12 items) + if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")): + ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3", + "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"] + return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)}, + "confidence": {k: 0.6 for k in ids}, + "open_comment": f"stub:{h[:8]}"} + + # Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2 + if "st_01" in usr_msg and "ax_pres_extr" in usr_msg: + buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2 + stmts = [f"st_{i+1:02d}" for i in range(24)] + # shuffle deterministically + order = sorted(range(24), key=lambda i: (h[i % len(h)], i)) + placements = {stmts[i]: buckets[order.index(i)] for i in range(24)} + return { + "placements": placements, + "likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate( + ["ax_pres_extr","ax_loc_eu","ax_sci_trad", + "ax_ind_col","ax_short_long","ax_mkt_reg"])}, + } + + # Scenario: S1..S4 × 4 dims + if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")): + return {"ratings": {sid: { + "desirability": ((seed >> (i*3)) % 7) + 1, + "plausibility": ((seed >> (i*3+1)) % 7) + 1, + "impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1, + "fairness": ((seed >> (i*3+4)) % 7) + 1, + "if_woke_up_response": f"act-{sid}-{h[:4]}", + } for i, sid in enumerate(["S1","S2","S3","S4"])}} + + # Delphi R1: q1..q4 free text + if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg: + return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}} + + # Delphi theme extraction (no in-character system prompt) + if "extract distinct thematic codes" in sys_msg: + return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]} + + # Delphi R2 (rate) or R3 (revise) + if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \ + or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg: + theme_ids = [f"theme_{i}" for i in range(5)] + out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1, + "plausibility": ((seed >> (i*2+1)) % 5) + 1} + for i, tid in enumerate(theme_ids)}} + if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg: + out["justification"] = "stub-revision" + return out + + # Fallback + return {"stub_key": h[:12], "value": rng} def chat( self, diff --git a/backend/tests/integration/__init__.py b/backend/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/integration/test_interview_pipeline.py b/backend/tests/integration/test_interview_pipeline.py new file mode 100644 index 00000000..54bb0540 --- /dev/null +++ b/backend/tests/integration/test_interview_pipeline.py @@ -0,0 +1,81 @@ +import json +import pytest +from pathlib import Path +from app.config import Config +from app.models.interview import SubagentKind, InterviewPhase +from app.services.interviews.adapters import FileSystemPersonaProvider +from app.services.interviews.base import MemoryDigest +from app.services.interviews.zep_writer import InterviewZepWriter +from app.services.interview_orchestrator import InterviewOrchestrator +from app.services.interview_synthesizer import InterviewSynthesizer +from app.utils.llm_client import LLMClient + +pytestmark = pytest.mark.integration + +INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments" + +class _NullUpdater: + def __init__(self): self.events = [] + def add_text_episode(self, graph_id, text): self.events.append(text) + +class _StaticMem: + def get_digest(self, agent_id, max_chars=2000): + return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True) + +@pytest.fixture +def seeded_uploads(tmp_path, monkeypatch): + monkeypatch.setenv("LLM_STUB_MODE", "true") + Config.LLM_STUB_MODE = True + sim_dir = tmp_path / "simulations" / "intg_sim" + sim_dir.mkdir(parents=True) + profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}", + "persona": "stakeholder p", "profession": "fisher"} for i in range(5)] + (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8") + return tmp_path + +def _make_orch(tmp_path): + sim_dir = tmp_path / "simulations" / "intg_sim" + personas = FileSystemPersonaProvider( + reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None, + ) + llm = LLMClient(api_key="x", base_url="x", model="x") + updater = _NullUpdater() + writer = InterviewZepWriter(memory_updater=updater, graph_id="g") + return InterviewOrchestrator( + llm=llm, memory=_StaticMem(), personas=personas, + instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim", + zep_writer=writer, max_workers=2, language="de", + ) + +def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads): + tmp = seeded_uploads + orch = _make_orch(tmp) + + pre = orch.run_pre() + assert pre["longitudinal"]["n_responded"] >= 1 + + post = orch.run_post() + assert "longitudinal" in post + assert "diversity" in post + assert "scenario" in post + assert "delphi" in post + + synth = InterviewSynthesizer(store=orch.store) + report = synth.run() + assert "Stakeholder Interview Synthesis" in report + assert "Limitations" in report + + csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv" + assert csv_path.exists() + lines = csv_path.read_text().splitlines() + assert lines[0].startswith("agent_id,") or "agent_id" in lines[0] + +def test_idempotent_rerun_creates_new_run_id(seeded_uploads): + tmp = seeded_uploads + orch = _make_orch(tmp) + orch.run_pre() + first = orch.run_post() + second = orch.rerun(SubagentKind.SCENARIO) + first_scn = first["scenario"]["run_dir"] + second_scn = second["scenario"]["run_dir"] + assert first_scn != second_scn