From ae4941df8e45c51c4e18d626202f34494c65b0c2 Mon Sep 17 00:00:00 2001
From: Christian Moellmann <christian.moellmann@uni-hamburg.de>
Date: Sat, 23 May 2026 12:21:21 +0200
Subject: [PATCH] =?UTF-8?q?feat(interviews):=20scenario=20subagent=20with?=
 =?UTF-8?q?=204=20futures=20=C3=97=204=20dimensions=20+=20polarity=20matri?=
 =?UTF-8?q?x?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 backend/app/services/interviews/scenario.py  | 80 ++++++++++++++++++++
 backend/scripts/instruments/scenario_v1.yaml | 51 +++++++++++++
 backend/tests/interviews/test_scenario.py    | 34 +++++++++
 3 files changed, 165 insertions(+)
 create mode 100644 backend/app/services/interviews/scenario.py
 create mode 100644 backend/scripts/instruments/scenario_v1.yaml
 create mode 100644 backend/tests/interviews/test_scenario.py
diff --git a/backend/app/services/interviews/scenario.py b/backend/app/services/interviews/scenario.py
new file mode 100644
index 00000000..f78239fb
--- /dev/null
+++ b/backend/app/services/interviews/scenario.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+import json
+import statistics
+from pathlib import Path
+from typing import Optional
+import yaml
+from app.models.interview import ScenarioRating, ScenarioResponse
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
+
+class ScenarioSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        with Path(instrument_path).open("r", encoding="utf-8") as f:
+            self.instrument = yaml.safe_load(f)
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _schema_hint(self) -> str:
+        sids = [s["scenario_id"] for s in self.instrument["scenarios"]]
+        return json.dumps({
+            "ratings": {sid: {
+                "desirability": "<int 1-7>",
+                "plausibility": "<int 1-7>",
+                "impact_on_my_group": "<int 1-7>",
+                "fairness": "<int 1-7>",
+                "if_woke_up_response": "<string>",
+            } for sid in sids}
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        head = ("Bewerten Sie jedes der folgenden Szenarien auf vier Dimensionen (1-7) "
+                "und beantworten Sie kurz, was Sie tun würden, wenn Sie in dieser Welt aufwachten.") \
+               if self.language == "de" else \
+               ("Rate each of the following scenarios on four dimensions (1-7) "
+                "and briefly answer what you would do if you woke up in this world.")
+        blocks = []
+        for s in self.instrument["scenarios"]:
+            label = s["label_de"] if self.language == "de" else s["label_en"]
+            desc = s["description_de"] if self.language == "de" else s["description_en"]
+            blocks.append(f"--- {s['scenario_id']}: {label} ---\n{desc}")
+        return head + "\n\n" + "\n\n".join(blocks)
+
+    def _validate(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        sids = {s["scenario_id"] for s in self.instrument["scenarios"]}
+        ratings = raw.get("ratings", {})
+        if set(ratings.keys()) != sids: return None
+        for v in ratings.values():
+            if not isinstance(v, dict): return None
+            for k in ("desirability", "plausibility", "impact_on_my_group", "fairness"):
+                if not isinstance(v.get(k), int) or not 1 <= v[k] <= 7: return None
+            if not isinstance(v.get("if_woke_up_response", ""), str): return None
+        return raw
+
+    def administer(self, persona: PersonaRecord) -> ScenarioResponse:
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(), validate=self._validate,
+        )
+        ratings = {sid: ScenarioRating(**v) for sid, v in raw["ratings"].items()}
+        return ScenarioResponse(agent_id=persona.agent_id, ratings=ratings)
+
+def polarity_matrix(responses: list[ScenarioResponse]) -> dict:
+    matrix: dict[str, dict] = {}
+    sids: set[str] = set()
+    for r in responses: sids.update(r.ratings.keys())
+    for sid in sorted(sids):
+        vals = [r.ratings[sid] for r in responses if sid in r.ratings]
+        if not vals:
+            matrix[sid] = {"n": 0}
+            continue
+        matrix[sid] = {
+            "n": len(vals),
+            "mean_desirability": statistics.mean(v.desirability for v in vals),
+            "mean_plausibility": statistics.mean(v.plausibility for v in vals),
+            "mean_impact": statistics.mean(v.impact_on_my_group for v in vals),
+            "mean_fairness": statistics.mean(v.fairness for v in vals),
+            "sd_desirability": statistics.pstdev([v.desirability for v in vals]) if len(vals) > 1 else 0.0,
+            "sd_plausibility": statistics.pstdev([v.plausibility for v in vals]) if len(vals) > 1 else 0.0,
+        }
+    return matrix
diff --git a/backend/scripts/instruments/scenario_v1.yaml b/backend/scripts/instruments/scenario_v1.yaml
new file mode 100644
index 00000000..5c150b80
--- /dev/null
+++ b/backend/scripts/instruments/scenario_v1.yaml
@@ -0,0 +1,51 @@
+name: scenario_v1
+version: "1.0"
+language_default: de
+scenarios:
+  - scenario_id: S1
+    label_de: "Erholung 2040"
+    label_en: "Recovery 2040"
+    description_de: |
+      Bis 2040 haben sich Dorsch- und Heringsbestände in der westlichen Ostsee
+      deutlich erholt. MSC-Zertifizierung ist branchenweit Standard. Die kleine
+      Küstenfischerei hat sich stabilisiert; die Politik gilt als erfolgreich.
+    description_en: |
+      By 2040, Western Baltic cod and herring stocks have substantially recovered.
+      MSC certification is industry-wide standard. Small-scale coastal fisheries
+      have stabilised; policy is regarded as successful.
+  - scenario_id: S2
+    label_de: "Kollaps 2040"
+    label_en: "Collapse 2040"
+    description_de: |
+      Bis 2040 sind Dorsch- und Heringsbestände zusammengebrochen. Die Flotte
+      ist halbiert, Aquakultur dominiert den Markt, Häfen veröden.
+    description_en: |
+      By 2040, cod and herring stocks have collapsed. The fleet is halved,
+      aquaculture dominates the market, harbour towns decline.
+  - scenario_id: S3
+    label_de: "Festung Europa 2040"
+    label_en: "Fortress Europe 2040"
+    description_de: |
+      Bis 2040 verfolgt die EU eine protektionistische Politik mit hohen Importzöllen,
+      Meeresschutzgebiete bedecken 30% der Ostsee, Sportfischerei ist stark eingeschränkt.
+    description_en: |
+      By 2040, the EU pursues a protectionist policy with high import tariffs,
+      MPAs cover 30% of the Baltic, recreational fishing is strongly curtailed.
+  - scenario_id: S4
+    label_de: "Privatisierung 2040"
+    label_en: "Privatisation 2040"
+    description_de: |
+      Bis 2040 sind Fangrechte als handelbare Quoten (ITQs) etabliert. Die Branche
+      hat sich konsolidiert; nur große, kapitalstarke Unternehmen sind übrig.
+    description_en: |
+      By 2040, fishing rights are tradable quotas (ITQs). The industry has
+      consolidated; only large, well-capitalised firms remain.
+dimensions:
+  - {dimension_id: desirability, scale: 7,
+     de: "Wie wünschenswert ist dieses Szenario?", en: "How desirable is this scenario?"}
+  - {dimension_id: plausibility, scale: 7,
+     de: "Wie plausibel ist dieses Szenario?",   en: "How plausible is this scenario?"}
+  - {dimension_id: impact_on_my_group, scale: 7,
+     de: "Wie stark trifft es Ihre Gruppe?",     en: "How strongly does it affect your group?"}
+  - {dimension_id: fairness, scale: 7,
+     de: "Wie fair ist dieses Szenario?",        en: "How fair is this scenario?"}
diff --git a/backend/tests/interviews/test_scenario.py b/backend/tests/interviews/test_scenario.py
new file mode 100644
index 00000000..567290d1
--- /dev/null
+++ b/backend/tests/interviews/test_scenario.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"ratings": {sid: {
+            "desirability": 4, "plausibility": 3, "impact_on_my_group": 5, "fairness": 3,
+            "if_woke_up_response": f"act-on-{sid}",
+        } for sid in ("S1", "S2", "S3", "S4")}}
+
+def test_scenario_administer():
+    sub = ScenarioSubagent(llm=_LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert set(resp.ratings.keys()) == {"S1", "S2", "S3", "S4"}
+    assert resp.ratings["S1"].desirability == 4
+
+def test_polarity_matrix():
+    from app.models.interview import ScenarioResponse, ScenarioRating
+    responses = [ScenarioResponse(agent_id=i, ratings={
+        "S1": ScenarioRating(desirability=5, plausibility=4, impact_on_my_group=5, fairness=4,
+                              if_woke_up_response="x"),
+    }) for i in range(3)]
+    m = polarity_matrix(responses)
+    assert "S1" in m
+    assert m["S1"]["mean_desirability"] == 5
+    assert m["S1"]["n"] == 3