diff --git a/backend/app/services/interviews/longitudinal.py b/backend/app/services/interviews/longitudinal.py new file mode 100644 index 00000000..4f13ec23 --- /dev/null +++ b/backend/app/services/interviews/longitudinal.py @@ -0,0 +1,109 @@ +from __future__ import annotations +import json +import math +from pathlib import Path +from typing import Optional +from app.models.interview import ( + LikertInstrument, LikertResponse, InterviewPhase, +) +from app.services.interviews.base import StakeholderInterviewer, PersonaRecord +from app.services.interviews.instrument_loader import load_likert_instrument + + +class LongitudinalSubagent: + def __init__(self, llm, memory, instrument_path: Path, language: str = "de"): + self.instrument: LikertInstrument = load_likert_instrument(Path(instrument_path)) + self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language) + self.language = language + + def _schema_hint(self) -> str: + ids = [i.item_id for i in self.instrument.items] + return json.dumps({ + "responses": {k: "" for k in ids}, + "confidence": {k: "" for k in ids}, + "open_comment": "", + }, ensure_ascii=False) + + def _user_prompt(self) -> str: + lines = [ + "Bitte bewerten Sie die folgenden Aussagen auf einer Skala von 1 (lehne stark ab) bis 5 (stimme stark zu)." + if self.language == "de" + else "Please rate the following statements on a scale from 1 (strongly disagree) to 5 (strongly agree)." + ] + for it in self.instrument.items: + txt = it.de if self.language == "de" else it.en + lines.append(f"- [{it.item_id}] {txt}") + return "\n".join(lines) + + def _validator(self, raw: dict) -> Optional[dict]: + if not isinstance(raw, dict): + return None + resp = raw.get("responses") + if not isinstance(resp, dict): + return None + required = {it.item_id for it in self.instrument.items} + if not required.issubset(resp.keys()): + return None + for k, v in resp.items(): + if not isinstance(v, int) or not 1 <= v <= 5: + return None + return raw + + def administer(self, persona: PersonaRecord, phase: InterviewPhase) -> LikertResponse: + raw = self.interviewer.ask_in_character( + persona, + user_prompt=self._user_prompt(), + schema_hint=self._schema_hint(), + validate=self._validator, + ) + return LikertResponse( + agent_id=persona.agent_id, + phase=phase, + responses={k: int(v) for k, v in raw["responses"].items()}, + confidence={k: float(v) for k, v in raw.get("confidence", {}).items()}, + open_comment=raw.get("open_comment"), + ) + + +def run_aggregate(t0: list[LikertResponse], t1: list[LikertResponse]) -> dict: + by_t0 = {r.agent_id: r for r in t0} + by_t1 = {r.agent_id: r for r in t1} + paired = sorted(set(by_t0) & set(by_t1)) + items: set[str] = set() + for r in t0 + t1: + items.update(r.responses.keys()) + per_item: dict[str, dict] = {} + for it in sorted(items): + deltas = [] + for aid in paired: + v0 = by_t0[aid].responses.get(it) + v1 = by_t1[aid].responses.get(it) + if v0 is None or v1 is None: + continue + deltas.append(v1 - v0) + if not deltas: + per_item[it] = {"mean_delta": None, "n": 0} + continue + m = sum(deltas) / len(deltas) + var = sum((d - m) ** 2 for d in deltas) / max(len(deltas) - 1, 1) + per_item[it] = { + "mean_delta": m, + "sd_delta": math.sqrt(var), + "n": len(deltas), + "n_positive": sum(1 for d in deltas if d > 0), + "n_negative": sum(1 for d in deltas if d < 0), + } + per_agent: dict[int, dict] = {} + for aid in paired: + r0 = by_t0[aid].responses + r1 = by_t1[aid].responses + common = set(r0) & set(r1) + total = sum(abs(r1[k] - r0[k]) for k in common) + per_agent[aid] = {"total_abs_drift": total, "n_items": len(common)} + return { + "n_paired": len(paired), + "n_t0_only": len(set(by_t0) - set(by_t1)), + "n_t1_only": len(set(by_t1) - set(by_t0)), + "per_item": per_item, + "per_agent": per_agent, + } diff --git a/backend/scripts/instruments/longitudinal_v1.yaml b/backend/scripts/instruments/longitudinal_v1.yaml new file mode 100644 index 00000000..7a37d18c --- /dev/null +++ b/backend/scripts/instruments/longitudinal_v1.yaml @@ -0,0 +1,47 @@ +name: longitudinal_v1 +version: "1.0" +language_default: de +items: + # Stock status & recovery + - {item_id: stk_1, family: stocks, scale: 5, + de: "Der westliche Dorschbestand wird sich bis 2035 erholen.", + en: "The Western Baltic cod stock will recover by 2035."} + - {item_id: stk_2, family: stocks, scale: 5, + de: "Der Heringsbestand in der westlichen Ostsee ist nicht mehr zu retten.", + en: "The Western Baltic herring stock can no longer be saved.", + reverse_coded: true} + - {item_id: stk_3, family: stocks, scale: 5, + de: "Wissenschaftliche Bestandsschätzungen sind generell zuverlässig.", + en: "Scientific stock assessments are generally reliable."} + # Governance & CFP + - {item_id: gov_1, family: governance, scale: 5, + de: "Die Gemeinsame Fischereipolitik der EU scheitert beim Schutz der Ostseefische.", + en: "The EU Common Fisheries Policy fails to protect Baltic fish.", + reverse_coded: true} + - {item_id: gov_2, family: governance, scale: 5, + de: "Entscheidungen über Fangquoten sollten stärker lokal getroffen werden.", + en: "Decisions on catch quotas should be taken more locally."} + - {item_id: gov_3, family: governance, scale: 5, + de: "Die deutsche Bundesregierung handelt entschlossen bei Fischereifragen.", + en: "The German federal government acts decisively on fisheries issues."} + # Market & MSC + - {item_id: mkt_1, family: market, scale: 5, + de: "Nur MSC-zertifizierter Fisch sollte verkauft werden dürfen.", + en: "Only MSC-certified fish should be allowed for sale."} + - {item_id: mkt_2, family: market, scale: 5, + de: "Importierter Fisch verdrängt die deutsche Kleinfischerei.", + en: "Imported fish displaces German small-scale fisheries."} + - {item_id: mkt_3, family: market, scale: 5, + de: "Verbraucher zahlen gerne mehr für nachhaltigen Ostseefisch.", + en: "Consumers gladly pay more for sustainable Baltic fish."} + # Climate & adaptation + - {item_id: clm_1, family: climate, scale: 5, + de: "Der Klimawandel macht traditionelle Ostseefischerei unmöglich.", + en: "Climate change makes traditional Baltic fisheries impossible.", + reverse_coded: true} + - {item_id: clm_2, family: climate, scale: 5, + de: "Aquakultur ist die Zukunft der deutschen Fischwirtschaft.", + en: "Aquaculture is the future of the German fishing industry."} + - {item_id: clm_3, family: climate, scale: 5, + de: "Die Fischerei muss sich grundlegend an neue Arten anpassen.", + en: "Fisheries must fundamentally adapt to new species."} diff --git a/backend/tests/interviews/test_longitudinal.py b/backend/tests/interviews/test_longitudinal.py new file mode 100644 index 00000000..823e1552 --- /dev/null +++ b/backend/tests/interviews/test_longitudinal.py @@ -0,0 +1,57 @@ +from pathlib import Path +import pytest +from app.models.interview import InterviewPhase +from app.services.interviews.base import PersonaRecord, MemoryDigest +from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate + + +class _FakeMem: + def get_digest(self, agent_id, max_chars=2000): + return MemoryDigest(text="x", available=True) + + +class _CannedLLM: + def __init__(self): self.n = 0 + def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw): + self.n += 1 + return { + "responses": { + "stk_1": 4, "stk_2": 3, "stk_3": 5, + "gov_1": 3, "gov_2": 4, "gov_3": 2, + "mkt_1": 5, "mkt_2": 3, "mkt_3": 4, + "clm_1": 2, "clm_2": 4, "clm_3": 5, + }, + "confidence": { + "stk_1": 0.8, "stk_2": 0.7, "stk_3": 0.9, + "gov_1": 0.6, "gov_2": 0.7, "gov_3": 0.5, + "mkt_1": 0.7, "mkt_2": 0.6, "mkt_3": 0.8, + "clm_1": 0.5, "clm_2": 0.7, "clm_3": 0.6, + }, + "open_comment": "test", + } + + +INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml" + + +def test_longitudinal_administer_one_agent(): + sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT) + persona = PersonaRecord(agent_id=3, name="A", persona="p") + resp = sub.administer(persona, phase=InterviewPhase.T0) + assert resp.agent_id == 3 + assert resp.phase == InterviewPhase.T0 + assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"} + + +def test_longitudinal_aggregate_delta(): + from app.models.interview import LikertResponse + t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0, + responses={"stk_1": 3, "gov_1": 4}, + confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)] + t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1, + responses={"stk_1": 4, "gov_1": 4}, + confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)] + agg = run_aggregate(t0, t1) + assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0 + assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0 + assert agg["n_paired"] == 5