MicroFish/backend/tests/interviews/test_longitudinal.py

92 lines
3.8 KiB
Python

from pathlib import Path
import pytest
from app.models.interview import InterviewPhase
from app.services.interviews.base import PersonaRecord, MemoryDigest
from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate
class _FakeMem:
def get_digest(self, agent_id, max_chars=2000):
return MemoryDigest(text="x", available=True)
class _CannedLLM:
def __init__(self): self.n = 0
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
self.n += 1
return {
"responses": {
"stk_1": 4, "stk_2": 3, "stk_3": 5,
"gov_1": 3, "gov_2": 4, "gov_3": 2,
"mkt_1": 5, "mkt_2": 3, "mkt_3": 4,
"clm_1": 2, "clm_2": 4, "clm_3": 5,
},
"confidence": {
"stk_1": 0.8, "stk_2": 0.7, "stk_3": 0.9,
"gov_1": 0.6, "gov_2": 0.7, "gov_3": 0.5,
"mkt_1": 0.7, "mkt_2": 0.6, "mkt_3": 0.8,
"clm_1": 0.5, "clm_2": 0.7, "clm_3": 0.6,
},
"open_comment": "test",
}
INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
def test_longitudinal_administer_one_agent():
sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT)
persona = PersonaRecord(agent_id=3, name="A", persona="p")
resp = sub.administer(persona, phase=InterviewPhase.T0)
assert resp.agent_id == 3
assert resp.phase == InterviewPhase.T0
assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"}
def test_longitudinal_aggregate_delta():
from app.models.interview import LikertResponse
t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0,
responses={"stk_1": 3, "gov_1": 4},
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1,
responses={"stk_1": 4, "gov_1": 4},
confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
agg = run_aggregate(t0, t1)
assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0
assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0
assert agg["n_paired"] == 5
def test_longitudinal_accepts_string_likert_values():
"""Real LLMs sometimes return Likert values as JSON strings ('3' not 3).
The validator should coerce them rather than fail the agent."""
from app.models.interview import InterviewPhase
from app.services.interviews.base import PersonaRecord, MemoryDigest
from app.services.interviews.longitudinal import LongitudinalSubagent
from pathlib import Path as _P
class _Mem:
def get_digest(self, agent_id, max_chars=2000):
return MemoryDigest(text="x", available=True)
class _StringLLM:
def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
return {
"responses": { # all strings, not ints
"stk_1": "4", "stk_2": "3", "stk_3": "5",
"gov_1": "3", "gov_2": "4", "gov_3": "2",
"mkt_1": "5", "mkt_2": "3", "mkt_3": "4",
"clm_1": "2", "clm_2": "4", "clm_3": "5",
},
"confidence": {},
"open_comment": "stringified",
}
inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
sub = LongitudinalSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
persona = PersonaRecord(agent_id=99, name="A", persona="p")
resp = sub.administer(persona, phase=InterviewPhase.T0)
assert resp.agent_id == 99
assert resp.responses["stk_1"] == 4
assert isinstance(resp.responses["stk_1"], int)