diff --git a/backend/app/services/interviews/adapters.py b/backend/app/services/interviews/adapters.py new file mode 100644 index 00000000..94431fe9 --- /dev/null +++ b/backend/app/services/interviews/adapters.py @@ -0,0 +1,90 @@ +from __future__ import annotations +import csv +import json +from pathlib import Path +from typing import Optional +from app.services.interviews.base import PersonaRecord, MemoryDigest + + +class FileSystemPersonaProvider: + """Reads OASIS profiles from the simulation's `reddit_profiles.json` and/or `twitter_profiles.csv`. + + If both are present, agents from `reddit_profiles.json` take precedence; twitter-only agents are appended. + """ + + def __init__(self, reddit_path: Optional[Path], twitter_path: Optional[Path]): + self.reddit_path = Path(reddit_path) if reddit_path else None + self.twitter_path = Path(twitter_path) if twitter_path else None + + def _load_reddit(self) -> list[PersonaRecord]: + if not self.reddit_path or not self.reddit_path.exists(): + return [] + data = json.loads(self.reddit_path.read_text(encoding="utf-8")) + out = [] + for row in data: + out.append(PersonaRecord( + agent_id=int(row.get("user_id")), + name=str(row.get("name") or row.get("user_name") or f"agent_{row.get('user_id')}"), + persona=str(row.get("persona") or row.get("bio") or ""), + profession=row.get("profession"), + bio=row.get("bio"), + )) + return out + + def _load_twitter(self) -> list[PersonaRecord]: + if not self.twitter_path or not self.twitter_path.exists(): + return [] + out = [] + with self.twitter_path.open("r", encoding="utf-8", newline="") as f: + for row in csv.DictReader(f): + if not row.get("user_id"): + continue + out.append(PersonaRecord( + agent_id=int(row["user_id"]), + name=str(row.get("name") or row.get("user_name") or f"agent_{row['user_id']}"), + persona=str(row.get("persona") or row.get("bio") or ""), + profession=row.get("profession"), + bio=row.get("bio"), + )) + return out + + def all(self) -> list[PersonaRecord]: + reddit = self._load_reddit() + seen = {p.agent_id for p in reddit} + twitter = [p for p in self._load_twitter() if p.agent_id not in seen] + return reddit + twitter + + +class ZepMemoryProvider: + """Builds a bounded memory digest per agent from Zep entity context. + + Maps `agent_id` (OASIS user_id) to a Zep entity UUID; falls back to the agent_id as a string. + """ + + def __init__(self, entity_reader, graph_id: str, agent_to_entity: dict[int, str] | None = None): + self.reader = entity_reader + self.graph_id = graph_id + self.map = dict(agent_to_entity or {}) + + def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest: + entity_uuid = self.map.get(agent_id) or str(agent_id) + try: + ctx = self.reader.get_entity_with_context(self.graph_id, entity_uuid) + except Exception: + return MemoryDigest(text=f"[no memory for agent {agent_id}]", available=False) + parts: list[str] = [] + name = getattr(ctx, "name", None) + summary = getattr(ctx, "summary", None) + if name: + parts.append(f"Name: {name}") + if summary: + parts.append(f"Summary: {summary}") + edges = getattr(ctx, "related_edges", []) or [] + for e in edges[:20]: + fact = e.get("fact") if isinstance(e, dict) else getattr(e, "fact", None) + if fact: + parts.append(f"- {fact}") + text = "\n".join(parts) + if len(text) > max_chars: + text = text[: max_chars - 1] + "…" + return MemoryDigest(text=text or f"[empty memory for agent {agent_id}]", available=True) diff --git a/backend/tests/interviews/test_adapters.py b/backend/tests/interviews/test_adapters.py new file mode 100644 index 00000000..ab7dee2e --- /dev/null +++ b/backend/tests/interviews/test_adapters.py @@ -0,0 +1,48 @@ +import csv +import json +from pathlib import Path +from app.services.interviews.adapters import ( + FileSystemPersonaProvider, ZepMemoryProvider, +) + +def _write_reddit_profiles(tmp_path: Path): + data = [ + {"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller", + "persona": "I am a small-scale Baltic fisher.", "profession": "fisher", "bio": ""}, + {"user_id": 1, "user_name": "ngo1", "name": "Ines NGO", + "persona": "I work for an environmental NGO.", "profession": "ngo_staff", "bio": ""}, + ] + p = tmp_path / "reddit_profiles.json" + p.write_text(json.dumps(data), encoding="utf-8") + return p + +def test_file_system_persona_provider_reads_reddit_json(tmp_path): + p = _write_reddit_profiles(tmp_path) + provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None) + personas = provider.all() + assert len(personas) == 2 + assert personas[0].name == "Fischer Müller" + assert personas[0].agent_id == 0 + +def test_zep_memory_provider_returns_empty_when_unavailable(): + class _BrokenReader: + def get_entity_with_context(self, *a, **kw): + raise RuntimeError("offline") + prov = ZepMemoryProvider(entity_reader=_BrokenReader(), graph_id="g1", + agent_to_entity={0: "uuid-zero"}) + d = prov.get_digest(0) + assert d.available is False + assert d.text != "" + +def test_zep_memory_provider_truncates_to_max_chars(): + class _R: + def get_entity_with_context(self, *a, **kw): + class _Ctx: + name = "X"; summary = "Y" + related_edges = [{"fact": "very long fact " * 200}] + return _Ctx() + prov = ZepMemoryProvider(entity_reader=_R(), graph_id="g1", + agent_to_entity={5: "uuid-five"}) + d = prov.get_digest(5, max_chars=300) + assert d.available is True + assert len(d.text) <= 300