test(interviews): end-to-end pipeline test + content-aware LLM stubs for all 4 subagents
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
52bae0a3da
commit
61f13a806d
|
|
@ -47,15 +47,66 @@ class LLMClient:
|
|||
return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)
|
||||
|
||||
def _stub_response_json(self, messages: list[dict]) -> dict:
|
||||
key = self._stub_key(messages)
|
||||
# Deterministic centered Likert + plausible open text
|
||||
digit = sum(ord(c) for c in key) % 5 + 1
|
||||
return {
|
||||
"stub_key": key,
|
||||
"responses": {"item_001": digit, "item_002": digit, "item_003": (digit % 5) + 1},
|
||||
"confidence": {"item_001": 0.7, "item_002": 0.7, "item_003": 0.6},
|
||||
"open_comment": f"stub:{key}",
|
||||
}
|
||||
import hashlib, json as _json
|
||||
sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
|
||||
usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
|
||||
h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
|
||||
seed = int(h[:8], 16)
|
||||
rng = (seed % 5) + 1
|
||||
|
||||
# Longitudinal Likert (12 items)
|
||||
if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
|
||||
ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
|
||||
"mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
|
||||
return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
|
||||
"confidence": {k: 0.6 for k in ids},
|
||||
"open_comment": f"stub:{h[:8]}"}
|
||||
|
||||
# Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
|
||||
if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
|
||||
buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
|
||||
stmts = [f"st_{i+1:02d}" for i in range(24)]
|
||||
# shuffle deterministically
|
||||
order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
|
||||
placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
|
||||
return {
|
||||
"placements": placements,
|
||||
"likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
|
||||
["ax_pres_extr","ax_loc_eu","ax_sci_trad",
|
||||
"ax_ind_col","ax_short_long","ax_mkt_reg"])},
|
||||
}
|
||||
|
||||
# Scenario: S1..S4 × 4 dims
|
||||
if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
|
||||
return {"ratings": {sid: {
|
||||
"desirability": ((seed >> (i*3)) % 7) + 1,
|
||||
"plausibility": ((seed >> (i*3+1)) % 7) + 1,
|
||||
"impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
|
||||
"fairness": ((seed >> (i*3+4)) % 7) + 1,
|
||||
"if_woke_up_response": f"act-{sid}-{h[:4]}",
|
||||
} for i, sid in enumerate(["S1","S2","S3","S4"])}}
|
||||
|
||||
# Delphi R1: q1..q4 free text
|
||||
if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
|
||||
return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
|
||||
|
||||
# Delphi theme extraction (no in-character system prompt)
|
||||
if "extract distinct thematic codes" in sys_msg:
|
||||
return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
|
||||
|
||||
# Delphi R2 (rate) or R3 (revise)
|
||||
if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
|
||||
or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
|
||||
theme_ids = [f"theme_{i}" for i in range(5)]
|
||||
out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
|
||||
"plausibility": ((seed >> (i*2+1)) % 5) + 1}
|
||||
for i, tid in enumerate(theme_ids)}}
|
||||
if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
|
||||
out["justification"] = "stub-revision"
|
||||
return out
|
||||
|
||||
# Fallback
|
||||
return {"stub_key": h[:12], "value": rng}
|
||||
|
||||
def chat(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from app.config import Config
|
||||
from app.models.interview import SubagentKind, InterviewPhase
|
||||
from app.services.interviews.adapters import FileSystemPersonaProvider
|
||||
from app.services.interviews.base import MemoryDigest
|
||||
from app.services.interviews.zep_writer import InterviewZepWriter
|
||||
from app.services.interview_orchestrator import InterviewOrchestrator
|
||||
from app.services.interview_synthesizer import InterviewSynthesizer
|
||||
from app.utils.llm_client import LLMClient
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
|
||||
|
||||
class _NullUpdater:
|
||||
def __init__(self): self.events = []
|
||||
def add_text_episode(self, graph_id, text): self.events.append(text)
|
||||
|
||||
class _StaticMem:
|
||||
def get_digest(self, agent_id, max_chars=2000):
|
||||
return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_uploads(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("LLM_STUB_MODE", "true")
|
||||
Config.LLM_STUB_MODE = True
|
||||
sim_dir = tmp_path / "simulations" / "intg_sim"
|
||||
sim_dir.mkdir(parents=True)
|
||||
profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
|
||||
"persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
|
||||
(sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
|
||||
return tmp_path
|
||||
|
||||
def _make_orch(tmp_path):
|
||||
sim_dir = tmp_path / "simulations" / "intg_sim"
|
||||
personas = FileSystemPersonaProvider(
|
||||
reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
|
||||
)
|
||||
llm = LLMClient(api_key="x", base_url="x", model="x")
|
||||
updater = _NullUpdater()
|
||||
writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
|
||||
return InterviewOrchestrator(
|
||||
llm=llm, memory=_StaticMem(), personas=personas,
|
||||
instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
|
||||
zep_writer=writer, max_workers=2, language="de",
|
||||
)
|
||||
|
||||
def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
|
||||
tmp = seeded_uploads
|
||||
orch = _make_orch(tmp)
|
||||
|
||||
pre = orch.run_pre()
|
||||
assert pre["longitudinal"]["n_responded"] >= 1
|
||||
|
||||
post = orch.run_post()
|
||||
assert "longitudinal" in post
|
||||
assert "diversity" in post
|
||||
assert "scenario" in post
|
||||
assert "delphi" in post
|
||||
|
||||
synth = InterviewSynthesizer(store=orch.store)
|
||||
report = synth.run()
|
||||
assert "Stakeholder Interview Synthesis" in report
|
||||
assert "Limitations" in report
|
||||
|
||||
csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
|
||||
assert csv_path.exists()
|
||||
lines = csv_path.read_text().splitlines()
|
||||
assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
|
||||
|
||||
def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
|
||||
tmp = seeded_uploads
|
||||
orch = _make_orch(tmp)
|
||||
orch.run_pre()
|
||||
first = orch.run_post()
|
||||
second = orch.rerun(SubagentKind.SCENARIO)
|
||||
first_scn = first["scenario"]["run_dir"]
|
||||
second_scn = second["scenario"]["run_dir"]
|
||||
assert first_scn != second_scn
|
||||
Loading…
Reference in New Issue