feat(interviews): longitudinal subagent + 12-item Likert instrument
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
289a0cff56
commit
0fcb815cde
|
|
@ -0,0 +1,109 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
from app.models.interview import (
|
||||||
|
LikertInstrument, LikertResponse, InterviewPhase,
|
||||||
|
)
|
||||||
|
from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
|
||||||
|
from app.services.interviews.instrument_loader import load_likert_instrument
|
||||||
|
|
||||||
|
|
||||||
|
class LongitudinalSubagent:
|
||||||
|
def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
|
||||||
|
self.instrument: LikertInstrument = load_likert_instrument(Path(instrument_path))
|
||||||
|
self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
|
||||||
|
self.language = language
|
||||||
|
|
||||||
|
def _schema_hint(self) -> str:
|
||||||
|
ids = [i.item_id for i in self.instrument.items]
|
||||||
|
return json.dumps({
|
||||||
|
"responses": {k: "<int 1-5>" for k in ids},
|
||||||
|
"confidence": {k: "<float 0-1>" for k in ids},
|
||||||
|
"open_comment": "<string, optional>",
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
|
def _user_prompt(self) -> str:
|
||||||
|
lines = [
|
||||||
|
"Bitte bewerten Sie die folgenden Aussagen auf einer Skala von 1 (lehne stark ab) bis 5 (stimme stark zu)."
|
||||||
|
if self.language == "de"
|
||||||
|
else "Please rate the following statements on a scale from 1 (strongly disagree) to 5 (strongly agree)."
|
||||||
|
]
|
||||||
|
for it in self.instrument.items:
|
||||||
|
txt = it.de if self.language == "de" else it.en
|
||||||
|
lines.append(f"- [{it.item_id}] {txt}")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def _validator(self, raw: dict) -> Optional[dict]:
|
||||||
|
if not isinstance(raw, dict):
|
||||||
|
return None
|
||||||
|
resp = raw.get("responses")
|
||||||
|
if not isinstance(resp, dict):
|
||||||
|
return None
|
||||||
|
required = {it.item_id for it in self.instrument.items}
|
||||||
|
if not required.issubset(resp.keys()):
|
||||||
|
return None
|
||||||
|
for k, v in resp.items():
|
||||||
|
if not isinstance(v, int) or not 1 <= v <= 5:
|
||||||
|
return None
|
||||||
|
return raw
|
||||||
|
|
||||||
|
def administer(self, persona: PersonaRecord, phase: InterviewPhase) -> LikertResponse:
|
||||||
|
raw = self.interviewer.ask_in_character(
|
||||||
|
persona,
|
||||||
|
user_prompt=self._user_prompt(),
|
||||||
|
schema_hint=self._schema_hint(),
|
||||||
|
validate=self._validator,
|
||||||
|
)
|
||||||
|
return LikertResponse(
|
||||||
|
agent_id=persona.agent_id,
|
||||||
|
phase=phase,
|
||||||
|
responses={k: int(v) for k, v in raw["responses"].items()},
|
||||||
|
confidence={k: float(v) for k, v in raw.get("confidence", {}).items()},
|
||||||
|
open_comment=raw.get("open_comment"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_aggregate(t0: list[LikertResponse], t1: list[LikertResponse]) -> dict:
|
||||||
|
by_t0 = {r.agent_id: r for r in t0}
|
||||||
|
by_t1 = {r.agent_id: r for r in t1}
|
||||||
|
paired = sorted(set(by_t0) & set(by_t1))
|
||||||
|
items: set[str] = set()
|
||||||
|
for r in t0 + t1:
|
||||||
|
items.update(r.responses.keys())
|
||||||
|
per_item: dict[str, dict] = {}
|
||||||
|
for it in sorted(items):
|
||||||
|
deltas = []
|
||||||
|
for aid in paired:
|
||||||
|
v0 = by_t0[aid].responses.get(it)
|
||||||
|
v1 = by_t1[aid].responses.get(it)
|
||||||
|
if v0 is None or v1 is None:
|
||||||
|
continue
|
||||||
|
deltas.append(v1 - v0)
|
||||||
|
if not deltas:
|
||||||
|
per_item[it] = {"mean_delta": None, "n": 0}
|
||||||
|
continue
|
||||||
|
m = sum(deltas) / len(deltas)
|
||||||
|
var = sum((d - m) ** 2 for d in deltas) / max(len(deltas) - 1, 1)
|
||||||
|
per_item[it] = {
|
||||||
|
"mean_delta": m,
|
||||||
|
"sd_delta": math.sqrt(var),
|
||||||
|
"n": len(deltas),
|
||||||
|
"n_positive": sum(1 for d in deltas if d > 0),
|
||||||
|
"n_negative": sum(1 for d in deltas if d < 0),
|
||||||
|
}
|
||||||
|
per_agent: dict[int, dict] = {}
|
||||||
|
for aid in paired:
|
||||||
|
r0 = by_t0[aid].responses
|
||||||
|
r1 = by_t1[aid].responses
|
||||||
|
common = set(r0) & set(r1)
|
||||||
|
total = sum(abs(r1[k] - r0[k]) for k in common)
|
||||||
|
per_agent[aid] = {"total_abs_drift": total, "n_items": len(common)}
|
||||||
|
return {
|
||||||
|
"n_paired": len(paired),
|
||||||
|
"n_t0_only": len(set(by_t0) - set(by_t1)),
|
||||||
|
"n_t1_only": len(set(by_t1) - set(by_t0)),
|
||||||
|
"per_item": per_item,
|
||||||
|
"per_agent": per_agent,
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
name: longitudinal_v1
|
||||||
|
version: "1.0"
|
||||||
|
language_default: de
|
||||||
|
items:
|
||||||
|
# Stock status & recovery
|
||||||
|
- {item_id: stk_1, family: stocks, scale: 5,
|
||||||
|
de: "Der westliche Dorschbestand wird sich bis 2035 erholen.",
|
||||||
|
en: "The Western Baltic cod stock will recover by 2035."}
|
||||||
|
- {item_id: stk_2, family: stocks, scale: 5,
|
||||||
|
de: "Der Heringsbestand in der westlichen Ostsee ist nicht mehr zu retten.",
|
||||||
|
en: "The Western Baltic herring stock can no longer be saved.",
|
||||||
|
reverse_coded: true}
|
||||||
|
- {item_id: stk_3, family: stocks, scale: 5,
|
||||||
|
de: "Wissenschaftliche Bestandsschätzungen sind generell zuverlässig.",
|
||||||
|
en: "Scientific stock assessments are generally reliable."}
|
||||||
|
# Governance & CFP
|
||||||
|
- {item_id: gov_1, family: governance, scale: 5,
|
||||||
|
de: "Die Gemeinsame Fischereipolitik der EU scheitert beim Schutz der Ostseefische.",
|
||||||
|
en: "The EU Common Fisheries Policy fails to protect Baltic fish.",
|
||||||
|
reverse_coded: true}
|
||||||
|
- {item_id: gov_2, family: governance, scale: 5,
|
||||||
|
de: "Entscheidungen über Fangquoten sollten stärker lokal getroffen werden.",
|
||||||
|
en: "Decisions on catch quotas should be taken more locally."}
|
||||||
|
- {item_id: gov_3, family: governance, scale: 5,
|
||||||
|
de: "Die deutsche Bundesregierung handelt entschlossen bei Fischereifragen.",
|
||||||
|
en: "The German federal government acts decisively on fisheries issues."}
|
||||||
|
# Market & MSC
|
||||||
|
- {item_id: mkt_1, family: market, scale: 5,
|
||||||
|
de: "Nur MSC-zertifizierter Fisch sollte verkauft werden dürfen.",
|
||||||
|
en: "Only MSC-certified fish should be allowed for sale."}
|
||||||
|
- {item_id: mkt_2, family: market, scale: 5,
|
||||||
|
de: "Importierter Fisch verdrängt die deutsche Kleinfischerei.",
|
||||||
|
en: "Imported fish displaces German small-scale fisheries."}
|
||||||
|
- {item_id: mkt_3, family: market, scale: 5,
|
||||||
|
de: "Verbraucher zahlen gerne mehr für nachhaltigen Ostseefisch.",
|
||||||
|
en: "Consumers gladly pay more for sustainable Baltic fish."}
|
||||||
|
# Climate & adaptation
|
||||||
|
- {item_id: clm_1, family: climate, scale: 5,
|
||||||
|
de: "Der Klimawandel macht traditionelle Ostseefischerei unmöglich.",
|
||||||
|
en: "Climate change makes traditional Baltic fisheries impossible.",
|
||||||
|
reverse_coded: true}
|
||||||
|
- {item_id: clm_2, family: climate, scale: 5,
|
||||||
|
de: "Aquakultur ist die Zukunft der deutschen Fischwirtschaft.",
|
||||||
|
en: "Aquaculture is the future of the German fishing industry."}
|
||||||
|
- {item_id: clm_3, family: climate, scale: 5,
|
||||||
|
de: "Die Fischerei muss sich grundlegend an neue Arten anpassen.",
|
||||||
|
en: "Fisheries must fundamentally adapt to new species."}
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
from pathlib import Path
|
||||||
|
import pytest
|
||||||
|
from app.models.interview import InterviewPhase
|
||||||
|
from app.services.interviews.base import PersonaRecord, MemoryDigest
|
||||||
|
from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeMem:
|
||||||
|
def get_digest(self, agent_id, max_chars=2000):
|
||||||
|
return MemoryDigest(text="x", available=True)
|
||||||
|
|
||||||
|
|
||||||
|
class _CannedLLM:
|
||||||
|
def __init__(self): self.n = 0
|
||||||
|
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
|
||||||
|
self.n += 1
|
||||||
|
return {
|
||||||
|
"responses": {
|
||||||
|
"stk_1": 4, "stk_2": 3, "stk_3": 5,
|
||||||
|
"gov_1": 3, "gov_2": 4, "gov_3": 2,
|
||||||
|
"mkt_1": 5, "mkt_2": 3, "mkt_3": 4,
|
||||||
|
"clm_1": 2, "clm_2": 4, "clm_3": 5,
|
||||||
|
},
|
||||||
|
"confidence": {
|
||||||
|
"stk_1": 0.8, "stk_2": 0.7, "stk_3": 0.9,
|
||||||
|
"gov_1": 0.6, "gov_2": 0.7, "gov_3": 0.5,
|
||||||
|
"mkt_1": 0.7, "mkt_2": 0.6, "mkt_3": 0.8,
|
||||||
|
"clm_1": 0.5, "clm_2": 0.7, "clm_3": 0.6,
|
||||||
|
},
|
||||||
|
"open_comment": "test",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
def test_longitudinal_administer_one_agent():
|
||||||
|
sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT)
|
||||||
|
persona = PersonaRecord(agent_id=3, name="A", persona="p")
|
||||||
|
resp = sub.administer(persona, phase=InterviewPhase.T0)
|
||||||
|
assert resp.agent_id == 3
|
||||||
|
assert resp.phase == InterviewPhase.T0
|
||||||
|
assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_longitudinal_aggregate_delta():
|
||||||
|
from app.models.interview import LikertResponse
|
||||||
|
t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0,
|
||||||
|
responses={"stk_1": 3, "gov_1": 4},
|
||||||
|
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
|
||||||
|
t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1,
|
||||||
|
responses={"stk_1": 4, "gov_1": 4},
|
||||||
|
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
|
||||||
|
agg = run_aggregate(t0, t1)
|
||||||
|
assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0
|
||||||
|
assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0
|
||||||
|
assert agg["n_paired"] == 5
|
||||||
Loading…
Reference in New Issue