MicroFish/backend/app/services/interviews/base.py

from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Callable, Optional, Protocol


@dataclass
class PersonaRecord:
    agent_id: int
    name: str
    persona: str
    profession: Optional[str] = None
    bio: Optional[str] = None


@dataclass
class MemoryDigest:
    text: str
    available: bool = True


class MemoryProvider(Protocol):
    def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest: ...


def coerce_int(value: Any) -> Optional[int]:
    """Coerce LLM-returned Likert values into ints.

    Real LLMs frequently return numeric Likert responses as JSON strings
    (e.g. "3" instead of 3). Returns the int if value is an int or a string
    that round-trips through int(); otherwise None. Bools are rejected so
    True/False aren't accepted as 1/0.
    """
    if isinstance(value, bool):
        return None
    if isinstance(value, int):
        return value
    if isinstance(value, str):
        s = value.strip()
        if s and s.lstrip("-").isdigit():
            try:
                return int(s)
            except ValueError:
                return None
    return None


class SchemaValidationFailure(ValueError):
    def __init__(self, agent_id: int, attempts: list[dict]):
        super().__init__(f"agent {agent_id}: schema violation after retry")
        self.agent_id = agent_id
        self.attempts = attempts


class StakeholderInterviewer:
    def __init__(self, llm, memory: MemoryProvider, language: str = "de"):
        self.llm = llm
        self.memory = memory
        self.language = language

    def _system_prompt(self, persona: PersonaRecord, digest: MemoryDigest, schema_hint: str) -> str:
        memory_block = digest.text if digest.available else "[no simulation memory available]"
        lang_note = "Antworte ausschließlich auf Deutsch." if self.language == "de" else "Answer in English."
        return (
            f"You are {persona.name}. {persona.persona}\n\n"
            "You are answering a survey about the future of German fisheries. "
            "Answer strictly in character based on your background, values, and what you experienced "
            "during the simulated social media discourse summarised below.\n\n"
            f"--- simulation memory digest ---\n{memory_block}\n--- end ---\n\n"
            f"{lang_note} Return JSON ONLY matching this schema:\n{schema_hint}"
        )

    def ask_in_character(
        self,
        persona: PersonaRecord,
        user_prompt: str,
        schema_hint: str,
        *,
        temperature: float = 0.3,
        max_tokens: Optional[int] = None,
        validate: Optional[Callable[[dict], Optional[dict]]] = None,
    ) -> dict:
        digest = self.memory.get_digest(persona.agent_id)
        messages = [
            {"role": "system", "content": self._system_prompt(persona, digest, schema_hint)},
            {"role": "user", "content": user_prompt},
        ]
        first = self.llm.chat_json(messages=messages, temperature=temperature, max_tokens=max_tokens)
        if validate is not None:
            validated = validate(first)
            if validated is not None:
                return validated
            messages.append({"role": "assistant", "content": str(first)})
            messages.append({"role": "user", "content":
                "Your previous response did not match the required schema. "
                f"Return ONLY valid JSON matching: {schema_hint}"})
            second = self.llm.chat_json(messages=messages, temperature=0.0, max_tokens=max_tokens)
            validated = validate(second)
            if validated is None:
                raise SchemaValidationFailure(
                    persona.agent_id,
                    attempts=[
                        {"attempt": 1, "raw": first, "schema_hint": schema_hint},
                        {"attempt": 2, "raw": second, "schema_hint": schema_hint},
                    ],
                )
            return validated
        return first