test(interviews): end-to-end pipeline test + content-aware LLM stubs for all 4 subagents

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Christian Moellmann 2026-05-23 12:40:53 +02:00
parent 52bae0a3da
commit 61f13a806d
3 changed files with 141 additions and 9 deletions

View File

@ -47,15 +47,66 @@ class LLMClient:
return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)
def _stub_response_json(self, messages: list[dict]) -> dict:
key = self._stub_key(messages)
# Deterministic centered Likert + plausible open text
digit = sum(ord(c) for c in key) % 5 + 1
return {
"stub_key": key,
"responses": {"item_001": digit, "item_002": digit, "item_003": (digit % 5) + 1},
"confidence": {"item_001": 0.7, "item_002": 0.7, "item_003": 0.6},
"open_comment": f"stub:{key}",
}
import hashlib, json as _json
sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
seed = int(h[:8], 16)
rng = (seed % 5) + 1
# Longitudinal Likert (12 items)
if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
"mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
"confidence": {k: 0.6 for k in ids},
"open_comment": f"stub:{h[:8]}"}
# Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
stmts = [f"st_{i+1:02d}" for i in range(24)]
# shuffle deterministically
order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
return {
"placements": placements,
"likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
["ax_pres_extr","ax_loc_eu","ax_sci_trad",
"ax_ind_col","ax_short_long","ax_mkt_reg"])},
}
# Scenario: S1..S4 × 4 dims
if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
return {"ratings": {sid: {
"desirability": ((seed >> (i*3)) % 7) + 1,
"plausibility": ((seed >> (i*3+1)) % 7) + 1,
"impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
"fairness": ((seed >> (i*3+4)) % 7) + 1,
"if_woke_up_response": f"act-{sid}-{h[:4]}",
} for i, sid in enumerate(["S1","S2","S3","S4"])}}
# Delphi R1: q1..q4 free text
if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
# Delphi theme extraction (no in-character system prompt)
if "extract distinct thematic codes" in sys_msg:
return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
# Delphi R2 (rate) or R3 (revise)
if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
theme_ids = [f"theme_{i}" for i in range(5)]
out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
"plausibility": ((seed >> (i*2+1)) % 5) + 1}
for i, tid in enumerate(theme_ids)}}
if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
out["justification"] = "stub-revision"
return out
# Fallback
return {"stub_key": h[:12], "value": rng}
def chat(
self,

View File

View File

@ -0,0 +1,81 @@
import json
import pytest
from pathlib import Path
from app.config import Config
from app.models.interview import SubagentKind, InterviewPhase
from app.services.interviews.adapters import FileSystemPersonaProvider
from app.services.interviews.base import MemoryDigest
from app.services.interviews.zep_writer import InterviewZepWriter
from app.services.interview_orchestrator import InterviewOrchestrator
from app.services.interview_synthesizer import InterviewSynthesizer
from app.utils.llm_client import LLMClient
pytestmark = pytest.mark.integration
INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
class _NullUpdater:
def __init__(self): self.events = []
def add_text_episode(self, graph_id, text): self.events.append(text)
class _StaticMem:
def get_digest(self, agent_id, max_chars=2000):
return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
@pytest.fixture
def seeded_uploads(tmp_path, monkeypatch):
monkeypatch.setenv("LLM_STUB_MODE", "true")
Config.LLM_STUB_MODE = True
sim_dir = tmp_path / "simulations" / "intg_sim"
sim_dir.mkdir(parents=True)
profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
"persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
(sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
return tmp_path
def _make_orch(tmp_path):
sim_dir = tmp_path / "simulations" / "intg_sim"
personas = FileSystemPersonaProvider(
reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
)
llm = LLMClient(api_key="x", base_url="x", model="x")
updater = _NullUpdater()
writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
return InterviewOrchestrator(
llm=llm, memory=_StaticMem(), personas=personas,
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
zep_writer=writer, max_workers=2, language="de",
)
def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
tmp = seeded_uploads
orch = _make_orch(tmp)
pre = orch.run_pre()
assert pre["longitudinal"]["n_responded"] >= 1
post = orch.run_post()
assert "longitudinal" in post
assert "diversity" in post
assert "scenario" in post
assert "delphi" in post
synth = InterviewSynthesizer(store=orch.store)
report = synth.run()
assert "Stakeholder Interview Synthesis" in report
assert "Limitations" in report
csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
assert csv_path.exists()
lines = csv_path.read_text().splitlines()
assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
tmp = seeded_uploads
orch = _make_orch(tmp)
orch.run_pre()
first = orch.run_post()
second = orch.rerun(SubagentKind.SCENARIO)
first_scn = first["scenario"]["run_dir"]
second_scn = second["scenario"]["run_dir"]
assert first_scn != second_scn