diff --git a/backend/app/__init__.py b/backend/app/__init__.py
index aba624bb..fdc49112 100644
--- a/backend/app/__init__.py
+++ b/backend/app/__init__.py
@@ -47,6 +47,20 @@ def create_app(config_class=Config):
     SimulationRunner.register_cleanup()
     if should_log_startup:
         logger.info("已注册模拟进程清理函数")
+
+    # Install interview lifecycle hooks on the SimulationManager class.
+    # Hooks are stored on the class itself (not on a particular instance), so
+    # any fresh `SimulationManager()` constructed later (e.g. per request in
+    # the Flask API) will see them.  We still bridge `_notify_on_completed`
+    # into SimulationRunner via a transient instance so the runner's monitor
+    # thread fires the completed hooks when a simulation process exits.
+    from .services.simulation_manager import SimulationManager
+    from .services.interviews.lifecycle import install_hooks
+
+    install_hooks(SimulationManager)
+    SimulationRunner.register_on_completed(SimulationManager()._notify_on_completed)
+    if should_log_startup:
+        logger.info("已安装面试生命周期钩子")
     
     # 请求日志中间件
     @app.before_request
@@ -63,10 +77,8 @@ def create_app(config_class=Config):
         return response
     
     # 注册蓝图
-    from .api import graph_bp, simulation_bp, report_bp
-    app.register_blueprint(graph_bp, url_prefix='/api/graph')
-    app.register_blueprint(simulation_bp, url_prefix='/api/simulation')
-    app.register_blueprint(report_bp, url_prefix='/api/report')
+    from .api import register_blueprints
+    register_blueprints(app)
     
     # 健康检查
     @app.route('/health')
diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py
index ffda743a..396750f2 100644
--- a/backend/app/api/__init__.py
+++ b/backend/app/api/__init__.py
@@ -2,13 +2,22 @@
 API路由模块
 """
 
-from flask import Blueprint
+from flask import Blueprint, Flask
 
 graph_bp = Blueprint('graph', __name__)
 simulation_bp = Blueprint('simulation', __name__)
 report_bp = Blueprint('report', __name__)
+interview_bp = Blueprint('interview', __name__)
 
 from . import graph  # noqa: E402, F401
 from . import simulation  # noqa: E402, F401
 from . import report  # noqa: E402, F401
+from . import interview  # noqa: E402, F401
 
+
+def register_blueprints(app: Flask) -> None:
+    """Register all API blueprints on *app* with their canonical URL prefixes."""
+    app.register_blueprint(graph_bp, url_prefix='/api/graph')
+    app.register_blueprint(simulation_bp, url_prefix='/api/simulation')
+    app.register_blueprint(report_bp, url_prefix='/api/report')
+    app.register_blueprint(interview_bp, url_prefix='/api/interview')
diff --git a/backend/app/api/interview.py b/backend/app/api/interview.py
new file mode 100644
index 00000000..e638aaab
--- /dev/null
+++ b/backend/app/api/interview.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+import threading
+import traceback
+import uuid
+from pathlib import Path
+from flask import Blueprint, jsonify, request, send_file
+from app.config import Config
+from app.models.interview import SubagentKind, InterviewPhase
+from app.services.interviews.adapters import FileSystemPersonaProvider, ZepMemoryProvider
+from app.services.interviews.zep_writer import InterviewZepWriter
+from app.services.interview_orchestrator import InterviewOrchestrator
+from app.services.interview_synthesizer import InterviewSynthesizer
+from app.services.interviews.storage import InterviewStore
+from app.utils.llm_client import LLMClient
+from app.utils.logger import get_logger
+
+from . import interview_bp
+
+logger = get_logger(__name__)
+
+
+class _NullUpdater:
+    """No-op stand-in for ``ZepGraphMemoryUpdater`` used when Zep is unavailable.
+
+    Exposes ``add_text_episode`` so ``InterviewZepWriter._emit`` succeeds silently —
+    the interview pipeline still produces local artefacts; Zep just isn't updated.
+    """
+
+    def add_text_episode(self, graph_id, text):  # noqa: ARG002 - matches real API
+        return None
+
+
+class _NullMemory:
+    """Fallback memory provider that always reports unavailable digests."""
+
+    def get_digest(self, agent_id, max_chars=2000):  # noqa: ARG002 - matches Protocol
+        from app.services.interviews.base import MemoryDigest
+        return MemoryDigest(text="[memory unavailable]", available=False)
+
+_TASKS: dict[str, dict] = {}
+_LOCK = threading.Lock()
+
+INSTRUMENT_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+
+def _uploads_root() -> Path:
+    return Path(getattr(Config, "UPLOADS_DIR", "uploads"))
+
+
+def _load_graph_id(sim_id: str) -> str:
+    """Read the Zep ``graph_id`` for a simulation from its persisted state.
+
+    The graph_id is written by ``SimulationManager`` into
+    ``uploads/simulations/{sim_id}/state.json``.  Returns ``""`` if the state
+    file is missing or unreadable — callers should treat empty graph_id as
+    "Zep unavailable" and fall back to the null memory/writer path.
+    """
+    try:
+        from app.services.simulation_manager import SimulationManager
+        state = SimulationManager().get_simulation(sim_id)
+        if state and state.graph_id:
+            return state.graph_id
+    except Exception as e:  # pragma: no cover - defensive
+        logger.warning(f"_load_graph_id({sim_id}) failed: {e!r}")
+    return ""
+
+
+def _build_orchestrator(sim_id: str) -> InterviewOrchestrator:
+    sim_dir = _uploads_root() / "simulations" / sim_id
+    reddit = sim_dir / "reddit_profiles.json"
+    twitter = sim_dir / "twitter_profiles.csv"
+    personas = FileSystemPersonaProvider(
+        reddit_path=reddit if reddit.exists() else None,
+        twitter_path=twitter if twitter.exists() else None,
+    )
+    # Build agent_id -> Zep entity uuid map from the persisted profile files.
+    agent_to_entity = personas.agent_to_entity()
+
+    # Resolve the graph_id from the simulation's persisted state — NOT from a
+    # ``graph_id.txt`` (nothing in the codebase writes such a file).
+    graph_id = _load_graph_id(sim_id)
+
+    memory: object
+    zep_writer: InterviewZepWriter
+    if not graph_id:
+        logger.warning(
+            f"interview: no graph_id for sim {sim_id} — Zep memory/writer disabled "
+            "(simulation state missing or graph_id empty)"
+        )
+        memory = _NullMemory()
+        zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
+    else:
+        try:
+            from app.services.zep_entity_reader import ZepEntityReader
+            from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
+
+            reader = ZepEntityReader()
+            updater = ZepGraphMemoryUpdater(graph_id=graph_id)
+            memory = ZepMemoryProvider(
+                reader, graph_id=graph_id, agent_to_entity=agent_to_entity
+            )
+            zep_writer = InterviewZepWriter(memory_updater=updater, graph_id=graph_id)
+            if not agent_to_entity:
+                logger.warning(
+                    f"interview: empty agent_to_entity map for sim {sim_id} — "
+                    "memory digests will be unavailable. Check that profile files "
+                    "include `source_entity_uuid`."
+                )
+        except Exception as e:
+            logger.warning(
+                f"interview: Zep init failed for sim {sim_id} ({e!r}); "
+                "falling back to null memory/writer"
+            )
+            memory = _NullMemory()
+            zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
+    llm = LLMClient(api_key=Config.LLM_API_KEY, base_url=Config.LLM_BASE_URL,
+                    model=Config.LLM_MODEL_NAME)
+    return InterviewOrchestrator(
+        llm=llm, memory=memory, personas=personas,
+        instrument_dir=INSTRUMENT_DIR, store_root=_uploads_root(), sim_id=sim_id,
+        zep_writer=zep_writer, max_workers=Config.INTERVIEW_MAX_WORKERS,
+        language=Config.INTERVIEW_DEFAULT_LANGUAGE,
+    )
+
+
+def _run_task(task_id: str, fn) -> None:
+    with _LOCK:
+        _TASKS[task_id] = {"status": "running", "progress": {}, "result": None, "error": None}
+    try:
+        result = fn(task_id)
+        with _LOCK:
+            _TASKS[task_id]["status"] = "completed"; _TASKS[task_id]["result"] = result
+    except Exception as e:
+        with _LOCK:
+            _TASKS[task_id]["status"] = "failed"
+            _TASKS[task_id]["error"] = repr(e)
+            _TASKS[task_id]["traceback"] = traceback.format_exc()
+
+
+def _start_task(fn) -> str:
+    task_id = uuid.uuid4().hex[:12]
+    with _LOCK:
+        _TASKS[task_id] = {"status": "queued", "progress": {}, "result": None, "error": None}
+    threading.Thread(target=_run_task, args=(task_id, fn), daemon=True).start()
+    return task_id
+
+
+def _envelope(data=None, error=None, status: int = 200):
+    body = {"success": error is None, "data": data or {}, "error": error}
+    return jsonify(body), status
+
+
+@interview_bp.route("/<sim_id>/pre", methods=["POST"])
+def post_pre(sim_id: str):
+    orch = _build_orchestrator(sim_id)
+    task_id = _start_task(lambda tid: orch.run_pre())
+    return _envelope({"task_id": task_id})
+
+
+@interview_bp.route("/<sim_id>/post", methods=["POST"])
+def post_post(sim_id: str):
+    orch = _build_orchestrator(sim_id)
+    def run(tid):
+        out = orch.run_post()
+        synth = InterviewSynthesizer(store=orch.store)
+        out["synthesis"] = synth.run()[:1000]  # short preview
+        return out
+    task_id = _start_task(run)
+    return _envelope({"task_id": task_id})
+
+
+@interview_bp.route("/<sim_id>/rerun", methods=["POST"])
+def post_rerun(sim_id: str):
+    body = request.get_json(silent=True) or {}
+    sub = body.get("subagent")
+    try: subagent = SubagentKind(sub)
+    except ValueError: return _envelope(error=f"unknown subagent {sub!r}", status=400)
+    orch = _build_orchestrator(sim_id)
+    task_id = _start_task(lambda tid: orch.rerun(subagent))
+    return _envelope({"task_id": task_id})
+
+
+@interview_bp.route("/<sim_id>/status", methods=["GET"])
+def get_status(sim_id: str):
+    task_id = request.args.get("task_id")
+    with _LOCK:
+        task = _TASKS.get(task_id)
+    if task is None: return _envelope(error="unknown task_id", status=404)
+    return _envelope({"status": task["status"], "progress": task.get("progress", {}),
+                      "result": task.get("result"), "error": task.get("error")})
+
+
+@interview_bp.route("/<sim_id>/results/<subagent>", methods=["GET"])
+def get_results(sim_id: str, subagent: str):
+    try: sub = SubagentKind(subagent)
+    except ValueError: return _envelope(error=f"unknown subagent {subagent!r}", status=400)
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    phase = InterviewPhase.T1 if sub != SubagentKind.LONGITUDINAL else InterviewPhase.T1
+    run = store.latest_run(phase, sub)
+    if run is None: return _envelope(error="no results yet", status=404)
+    agg = (run / "aggregate.json")
+    if not agg.exists(): return _envelope(error="aggregate missing", status=404)
+    import json as _j
+    return _envelope({"aggregate": _j.loads(agg.read_text(encoding="utf-8")),
+                      "run_dir": str(run)})
+
+
+@interview_bp.route("/<sim_id>/results/synthesis", methods=["GET"])
+def get_synthesis(sim_id: str):
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    report = store.base / "synthesis" / "report.md"
+    if not report.exists():
+        synth = InterviewSynthesizer(store=store)
+        synth.run()
+    return _envelope({"report_markdown": report.read_text(encoding="utf-8")})
+
+
+@interview_bp.route("/<sim_id>/export.csv", methods=["GET"])
+def get_export_csv(sim_id: str):
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    csv_path = store.base / "synthesis" / "exports" / "all_responses.csv"
+    if not csv_path.exists():
+        InterviewSynthesizer(store=store).run()
+    return send_file(csv_path, mimetype="text/csv", as_attachment=True,
+                     download_name=f"{sim_id}_interviews.csv")
diff --git a/backend/app/config.py b/backend/app/config.py
index de63e2b4..a63ba39b 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -39,6 +39,8 @@ class Config:
     MAX_CONTENT_LENGTH = 50 * 1024 * 1024  # 50MB
     UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
     ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'}
+    # Root directory for simulation uploads (used by the interview subsystem)
+    UPLOADS_DIR = os.environ.get("UPLOADS_DIR", os.path.join(os.path.dirname(__file__), '../uploads'))
     
     # 文本处理配置
     DEFAULT_CHUNK_SIZE = 500  # 默认切块大小
@@ -62,6 +64,12 @@ class Config:
     REPORT_AGENT_MAX_TOOL_CALLS = int(os.environ.get('REPORT_AGENT_MAX_TOOL_CALLS', '5'))
     REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2'))
     REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5'))
+
+    # Interview subsystem
+    INTERVIEW_MAX_TOKENS_PER_RUN = int(os.environ.get("INTERVIEW_MAX_TOKENS_PER_RUN", 15_000_000))
+    INTERVIEW_MAX_WORKERS = int(os.environ.get("INTERVIEW_MAX_WORKERS", 8))
+    INTERVIEW_DEFAULT_LANGUAGE = os.environ.get("INTERVIEW_DEFAULT_LANGUAGE", "de")
+    LLM_STUB_MODE = os.environ.get("LLM_STUB_MODE", "false").lower() == "true"
     
     @classmethod
     def validate(cls) -> list[str]:
diff --git a/backend/app/models/interview.py b/backend/app/models/interview.py
new file mode 100644
index 00000000..980efc82
--- /dev/null
+++ b/backend/app/models/interview.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+from enum import Enum
+from typing import Optional
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+class InterviewPhase(str, Enum):
+    T0 = "T0"
+    T1 = "T1"
+
+class SubagentKind(str, Enum):
+    LONGITUDINAL = "longitudinal"
+    DIVERSITY = "diversity"
+    DELPHI = "delphi"
+    SCENARIO = "scenario"
+
+class LikertItem(BaseModel):
+    item_id: str
+    de: str
+    en: str
+    scale: int = Field(ge=3, le=7)
+    family: Optional[str] = None
+    reverse_coded: bool = False
+
+    @field_validator("scale")
+    @classmethod
+    def odd_scale(cls, v: int) -> int:
+        if v not in (3, 5, 7):
+            raise ValueError("scale must be 3, 5, or 7")
+        return v
+
+class LikertInstrument(BaseModel):
+    name: str
+    version: str = "1.0"
+    language_default: str = "de"
+    items: list[LikertItem]
+
+    @model_validator(mode="after")
+    def unique_item_ids(self) -> "LikertInstrument":
+        ids = [i.item_id for i in self.items]
+        if len(set(ids)) != len(ids):
+            raise ValueError("duplicate item_id in instrument")
+        return self
+
+class LikertResponse(BaseModel):
+    agent_id: int
+    phase: InterviewPhase
+    responses: dict[str, int]
+    confidence: dict[str, float] = Field(default_factory=dict)
+    open_comment: Optional[str] = None
+    memory_available: bool = True
+    failed_items: list[str] = Field(default_factory=list)
+
+    @model_validator(mode="after")
+    def values_in_range(self) -> "LikertResponse":
+        for k, v in self.responses.items():
+            if not 1 <= v <= 5:
+                raise ValueError(f"response {k}={v} out of 1..5 range")
+        for k, v in self.confidence.items():
+            if not 0.0 <= v <= 1.0:
+                raise ValueError(f"confidence {k}={v} out of 0..1 range")
+        return self
+
+class QSortStatement(BaseModel):
+    statement_id: str
+    de: str
+    en: str
+
+class QSortInstrument(BaseModel):
+    name: str
+    version: str = "1.0"
+    statements: list[QSortStatement]
+    distribution: list[int]  # e.g. [2,3,4,6,4,3,2] for -3..+3
+
+class QSortResponse(BaseModel):
+    agent_id: int
+    placements: dict[str, int]  # statement_id -> bucket (-3..+3)
+    likert_axes: dict[str, int]  # axis_id -> 1..7
+
+class DelphiOpenResponse(BaseModel):
+    agent_id: int
+    round: int = 1
+    answers: dict[str, str]  # question_id -> free text
+
+class DelphiRatingResponse(BaseModel):
+    agent_id: int
+    round: int
+    ratings: dict[str, dict[str, int]]  # theme_id -> {importance, plausibility}
+    justification: Optional[str] = None
+
+class ScenarioRating(BaseModel):
+    desirability: int = Field(ge=1, le=7)
+    plausibility: int = Field(ge=1, le=7)
+    impact_on_my_group: int = Field(ge=1, le=7)
+    fairness: int = Field(ge=1, le=7)
+    if_woke_up_response: str
+
+class ScenarioResponse(BaseModel):
+    agent_id: int
+    ratings: dict[str, ScenarioRating]  # scenario_id -> rating
diff --git a/backend/app/services/interview_orchestrator.py b/backend/app/services/interview_orchestrator.py
new file mode 100644
index 00000000..d87e90ea
--- /dev/null
+++ b/backend/app/services/interview_orchestrator.py
@@ -0,0 +1,222 @@
+from __future__ import annotations
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Protocol
+from app.models.interview import (
+    InterviewPhase, SubagentKind, LikertResponse, QSortResponse,
+    DelphiOpenResponse, DelphiRatingResponse, ScenarioResponse,
+)
+from app.services.interviews.base import PersonaRecord, SchemaValidationFailure
+from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate as longitudinal_aggregate
+from app.services.interviews.diversity import DiversitySubagent, run_typology
+from app.services.interviews.delphi import (
+    DelphiSubagent, extract_themes, convergence_metrics, group_stats_from_r2,
+)
+from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
+from app.services.interviews.storage import InterviewStore
+from app.services.interviews.instrument_loader import freeze_snapshot
+
+
+class PersonaProvider(Protocol):
+    def all(self) -> list[PersonaRecord]: ...
+
+
+class InterviewOrchestrator:
+    def __init__(
+        self, llm, memory, personas: PersonaProvider,
+        instrument_dir: Path, store_root: Path, sim_id: str,
+        zep_writer, max_workers: int = 8, language: str = "de",
+    ):
+        self.llm = llm
+        self.memory = memory
+        self.personas = personas
+        self.instrument_dir = Path(instrument_dir)
+        self.store = InterviewStore(root=store_root, sim_id=sim_id)
+        self.zep_writer = zep_writer
+        self.max_workers = max_workers
+        self.language = language
+        # Freeze snapshot once per orchestrator lifetime
+        freeze_snapshot(
+            instruments={
+                "longitudinal": self.instrument_dir / "longitudinal_v1.yaml",
+                "diversity":    self.instrument_dir / "diversity_v1.yaml",
+                "delphi":       self.instrument_dir / "delphi_v1.yaml",
+                "scenario":     self.instrument_dir / "scenario_v1.yaml",
+            },
+            out_path=self.store.base / "instruments_used.json",
+        )
+
+    # --- Generic per-agent runner ---
+    def _fan_out(self, run_dir, agent_fn, personas, audit_label):
+        ok: list = []
+        failed: list[int] = []
+        with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
+            futures = {pool.submit(agent_fn, p): p for p in personas}
+            for fut in as_completed(futures):
+                p = futures[fut]
+                try:
+                    out = fut.result()
+                    ok.append(out)
+                    self.store.append_response(run_dir, out)
+                except SchemaValidationFailure as e:
+                    failed.append(p.agent_id)
+                    self.store.audit(run_dir, agent_id=p.agent_id,
+                                     event="schema_validation_failure",
+                                     detail={"label": audit_label, "attempts": e.attempts})
+                except Exception as e:
+                    failed.append(p.agent_id)
+                    self.store.audit(run_dir, agent_id=p.agent_id,
+                                     event="agent_failed", detail=f"{audit_label}: {e!r}")
+        return ok, failed
+
+    # --- Pre-phase (T0) ---
+    def run_pre(self) -> dict:
+        sub = LongitudinalSubagent(self.llm, self.memory,
+                                   self.instrument_dir / "longitudinal_v1.yaml",
+                                   language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T0),
+            self.personas.all(), audit_label="longitudinal_T0",
+        )
+        for r in ok:
+            persona = next(p for p in self.personas.all() if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"longitudinal": {"n_responded": len(ok), "n_failed": len(failed),
+                                 "run_dir": str(run_dir)}}
+
+    # --- Post-phase (T1) ---
+    def run_post(self) -> dict:
+        personas = self.personas.all()
+        out: dict = {}
+        with ThreadPoolExecutor(max_workers=4) as pool:
+            futures = {
+                "longitudinal": pool.submit(self._post_longitudinal, personas),
+                "diversity":    pool.submit(self._post_diversity, personas),
+                "scenario":     pool.submit(self._post_scenario, personas),
+            }
+            for name, fut in futures.items():
+                try: out[name] = fut.result()
+                except Exception as e: out[name] = {"error": repr(e)}
+        # Delphi runs sequentially (R1 → R2 → R3) and uses the LLM for theme extraction
+        try: out["delphi"] = self._post_delphi(personas)
+        except Exception as e: out["delphi"] = {"error": repr(e)}
+        return out
+
+    def _post_longitudinal(self, personas) -> dict:
+        sub = LongitudinalSubagent(self.llm, self.memory,
+                                   self.instrument_dir / "longitudinal_v1.yaml",
+                                   language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T1),
+            personas, audit_label="longitudinal_T1",
+        )
+        # Aggregate using T0 + T1
+        t0_path = self.store.latest_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        t0_raw = self.store.read_responses(t0_path) if t0_path else []
+        t0 = [LikertResponse(**d) for d in t0_raw]
+        agg = longitudinal_aggregate(t0, ok)
+        self.store.write_aggregate(run_dir, agg)
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
+            except Exception: pass
+        try: self.zep_writer.write_aggregate(SubagentKind.LONGITUDINAL,
+                                             f"n_paired={agg['n_paired']}")
+        except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_diversity(self, personas) -> dict:
+        sub = DiversitySubagent(self.llm, self.memory,
+                                self.instrument_dir / "diversity_v1.yaml",
+                                language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DIVERSITY)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p), personas, audit_label="diversity",
+        )
+        typology = run_typology(ok)
+        self.store.write_named(run_dir, "typology.json", typology)
+        self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
+                                             "clusters": typology["clusters"]})
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.DIVERSITY, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_scenario(self, personas) -> dict:
+        sub = ScenarioSubagent(self.llm, self.memory,
+                               self.instrument_dir / "scenario_v1.yaml",
+                               language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.SCENARIO)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p), personas, audit_label="scenario",
+        )
+        matrix = polarity_matrix(ok)
+        self.store.write_named(run_dir, "polarity_matrix.json", matrix)
+        self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
+                                             "polarity": matrix})
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.SCENARIO, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_delphi(self, personas) -> dict:
+        sub = DelphiSubagent(self.llm, self.memory,
+                             self.instrument_dir / "delphi_v1.yaml",
+                             language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DELPHI)
+        # Round 1
+        r1_ok, r1_failed = self._fan_out(
+            run_dir, lambda p: sub.administer_round1(p), personas, audit_label="delphi_r1",
+        )
+        # Move all R1 responses into a dedicated file
+        for r in r1_ok: self.store.append_jsonl(run_dir, "round1_themes.jsonl", r)
+        # Extract themes from R1
+        themes = extract_themes(r1_ok, llm=self.llm)
+        self.store.write_named(run_dir, "themes.json", {"themes": themes})
+        # Round 2
+        r2_ok, r2_failed = self._fan_out(
+            run_dir, lambda p: sub.administer_round2(p, themes),
+            [p for p in personas if p.agent_id in {r.agent_id for r in r1_ok}],
+            audit_label="delphi_r2",
+        )
+        for r in r2_ok: self.store.append_jsonl(run_dir, "round2_ratings.jsonl", r)
+        gstats = group_stats_from_r2(r2_ok)
+        # Round 3
+        r2_by = {r.agent_id: r for r in r2_ok}
+        r3_personas = [p for p in personas if p.agent_id in r2_by]
+        def r3_call(p): return sub.administer_round3(p, themes, gstats, r2_by[p.agent_id])
+        r3_ok, r3_failed = self._fan_out(run_dir, r3_call, r3_personas, audit_label="delphi_r3")
+        for r in r3_ok: self.store.append_jsonl(run_dir, "round3_revisions.jsonl", r)
+        # Convergence
+        conv = convergence_metrics(r2_ok, r3_ok)
+        self.store.write_named(run_dir, "convergence.json", conv)
+        self.store.write_aggregate(run_dir, {
+            "n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
+            "n_failed_r1": len(r1_failed), "n_failed_r2": len(r2_failed), "n_failed_r3": len(r3_failed),
+            "themes": themes,
+        })
+        for r in r3_ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.DELPHI, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
+                "run_dir": str(run_dir)}
+
+    # --- Re-run a single subagent ---
+    def rerun(self, subagent: SubagentKind) -> dict:
+        personas = self.personas.all()
+        if subagent == SubagentKind.LONGITUDINAL: return {"longitudinal": self._post_longitudinal(personas)}
+        if subagent == SubagentKind.DIVERSITY:    return {"diversity":    self._post_diversity(personas)}
+        if subagent == SubagentKind.SCENARIO:     return {"scenario":     self._post_scenario(personas)}
+        if subagent == SubagentKind.DELPHI:       return {"delphi":       self._post_delphi(personas)}
+        raise ValueError(f"unknown subagent {subagent}")
diff --git a/backend/app/services/interview_synthesizer.py b/backend/app/services/interview_synthesizer.py
new file mode 100644
index 00000000..a74609ae
--- /dev/null
+++ b/backend/app/services/interview_synthesizer.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+import csv
+import json
+from pathlib import Path
+from app.models.interview import InterviewPhase, SubagentKind
+from app.services.interviews.storage import InterviewStore
+
+
+class InterviewSynthesizer:
+    def __init__(self, store: InterviewStore):
+        self.store = store
+
+    def _maybe(self, phase: InterviewPhase, sub: SubagentKind) -> dict | None:
+        run = self.store.latest_run(phase, sub)
+        if run is None:
+            return None
+        agg = run / "aggregate.json"
+        if not agg.exists():
+            return None
+        return {"run_dir": str(run), "aggregate": json.loads(agg.read_text(encoding="utf-8"))}
+
+    def _instrument_hashes(self) -> dict:
+        snap = self.store.base / "instruments_used.json"
+        if not snap.exists():
+            return {}
+        try:
+            data = json.loads(snap.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+        return {k: v.get("hash") for k, v in data.items()}
+
+    def _limitations_text(self, present: dict[str, bool]) -> str:
+        lines = [
+            "## Limitations",
+            "- **Simulated, not real stakeholders.** Responses reflect how the seed-document discourse "
+            "and the LLM jointly encode each stakeholder type, not what an actual fisher or NGO "
+            "staffer would say. The instrument measures the *model of the stakeholder*, not the stakeholder.",
+            "- **Memory digest is lossy.** Each agent's experience of OASIS is summarised to bounded length; "
+            "agents do not have full episodic recall.",
+            "- **LLM acquiescence and centrality bias.** Likert scales with LLM respondents skew toward 3–4 "
+            "of 5; check per-item distribution shape before drawing conclusions.",
+            "- **N is what it is.** `n_responded` and `n_failed` are printed verbatim per subagent; no smoothing.",
+            "- **Instrument provenance.** Hashes of frozen instruments are listed below; an identical run "
+            "is reproducible from these snapshots.",
+        ]
+        for k, ok in present.items():
+            if not ok:
+                lines.append(f"- *{k}* subagent results are missing for this run.")
+        return "\n".join(lines)
+
+    def run(self) -> str:
+        sections: list[str] = []
+        sections.append("# Stakeholder Interview Synthesis\n")
+
+        long_t0 = self._maybe(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        long_t1 = self._maybe(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
+        if long_t1:
+            agg = long_t1["aggregate"]
+            sections.append("## Longitudinal opinion drift (T0 → T1)")
+            sections.append(f"- N paired: {agg.get('n_paired', 'NA')}")
+            per_item = agg.get("per_item", {})
+            top = sorted(per_item.items(),
+                         key=lambda kv: abs(kv[1].get("mean_delta") or 0), reverse=True)[:5]
+            sections.append("- Largest mean shifts:")
+            for k, v in top:
+                sections.append(f"  - `{k}`: Δ̄ = {v.get('mean_delta'):+0.2f}  (n={v.get('n')})")
+
+        diversity = self._maybe(InterviewPhase.T1, SubagentKind.DIVERSITY)
+        if diversity:
+            clusters = diversity["aggregate"].get("clusters", [])
+            sections.append("## Stakeholder typology")
+            sections.append(f"- N agents: {diversity['aggregate'].get('n', 'NA')}")
+            sections.append(f"- Clusters: {len(clusters)}")
+            for c in clusters:
+                sections.append(f"  - cluster {c['cluster_id']}: n={c['n']}, "
+                                f"top loadings = {list(c['top_loadings'].keys())[:5]}")
+
+        delphi = self._maybe(InterviewPhase.T1, SubagentKind.DELPHI)
+        if delphi:
+            agg = delphi["aggregate"]
+            sections.append("## Delphi consensus")
+            sections.append(f"- Rounds completed: R1={agg.get('n_r1')}, R2={agg.get('n_r2')}, R3={agg.get('n_r3')}")
+            themes = agg.get("themes", [])
+            sections.append(f"- Themes: {[t.get('label') for t in themes]}")
+
+        scenario = self._maybe(InterviewPhase.T1, SubagentKind.SCENARIO)
+        if scenario:
+            pol = scenario["aggregate"].get("polarity", {})
+            sections.append("## Scenario evaluation")
+            for sid in sorted(pol):
+                v = pol[sid]
+                if v.get("n", 0) == 0:
+                    continue
+                sections.append(
+                    f"- **{sid}**: n={v['n']}, desirability {v['mean_desirability']:.2f}, "
+                    f"plausibility {v['mean_plausibility']:.2f}, impact {v['mean_impact']:.2f}, "
+                    f"fairness {v['mean_fairness']:.2f}")
+
+        sections.append("")
+        sections.append(self._limitations_text({
+            "longitudinal": bool(long_t1),
+            "diversity":    bool(diversity),
+            "delphi":       bool(delphi),
+            "scenario":     bool(scenario),
+        }))
+        sections.append("")
+        sections.append("### Instrument provenance")
+        for name, h in self._instrument_hashes().items():
+            sections.append(f"- `{name}`: hash `{h}`")
+
+        report = "\n\n".join(sections)
+        out_dir = self.store.base / "synthesis"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "report.md").write_text(report, encoding="utf-8")
+        self._write_tidy_csv(out_dir / "exports" / "all_responses.csv")
+        return report
+
+    def _write_tidy_csv(self, csv_path: Path) -> None:
+        csv_path.parent.mkdir(parents=True, exist_ok=True)
+        rows: list[dict] = []
+        for phase in (InterviewPhase.T0, InterviewPhase.T1):
+            for sub in SubagentKind:
+                run = self.store.latest_run(phase, sub)
+                if run is None:
+                    continue
+                files = ["responses.jsonl", "round1_themes.jsonl",
+                         "round2_ratings.jsonl", "round3_revisions.jsonl"]
+                for fname in files:
+                    for rec in self.store.read_responses(run, fname):
+                        flat = self._flatten(rec, phase=phase.value, subagent=sub.value)
+                        rows.extend(flat)
+        if not rows:
+            csv_path.write_text("phase,subagent,agent_id,key,value\n", encoding="utf-8")
+            return
+        fieldnames = sorted({k for r in rows for k in r.keys()})
+        with csv_path.open("w", encoding="utf-8", newline="") as f:
+            w = csv.DictWriter(f, fieldnames=fieldnames)
+            w.writeheader()
+            for r in rows:
+                w.writerow(r)
+
+    def _flatten(self, rec: dict, *, phase: str, subagent: str) -> list[dict]:
+        out: list[dict] = []
+        aid = rec.get("agent_id")
+        for key, val in rec.items():
+            if key == "agent_id":
+                continue
+            if isinstance(val, dict):
+                for k2, v2 in val.items():
+                    if isinstance(v2, dict):
+                        for k3, v3 in v2.items():
+                            out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                                        "key": f"{key}.{k2}.{k3}", "value": v3})
+                    else:
+                        out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                                    "key": f"{key}.{k2}", "value": v2})
+            else:
+                out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                            "key": key, "value": val})
+        return out
diff --git a/backend/app/services/interviews/__init__.py b/backend/app/services/interviews/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/app/services/interviews/adapters.py b/backend/app/services/interviews/adapters.py
new file mode 100644
index 00000000..06d05e94
--- /dev/null
+++ b/backend/app/services/interviews/adapters.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+import csv
+import json
+from pathlib import Path
+from typing import Optional
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+
+
+class FileSystemPersonaProvider:
+    """Reads OASIS profiles from the simulation's `reddit_profiles.json` and/or `twitter_profiles.csv`.
+
+    If both are present, agents from `reddit_profiles.json` take precedence; twitter-only agents are appended.
+    """
+
+    def __init__(self, reddit_path: Optional[Path], twitter_path: Optional[Path]):
+        self.reddit_path = Path(reddit_path) if reddit_path else None
+        self.twitter_path = Path(twitter_path) if twitter_path else None
+
+    def _load_reddit(self) -> list[PersonaRecord]:
+        if not self.reddit_path or not self.reddit_path.exists():
+            return []
+        data = json.loads(self.reddit_path.read_text(encoding="utf-8"))
+        out = []
+        for row in data:
+            out.append(PersonaRecord(
+                agent_id=int(row.get("user_id")),
+                name=str(row.get("name") or row.get("user_name") or f"agent_{row.get('user_id')}"),
+                persona=str(row.get("persona") or row.get("bio") or ""),
+                profession=row.get("profession"),
+                bio=row.get("bio"),
+            ))
+        return out
+
+    def _load_twitter(self) -> list[PersonaRecord]:
+        if not self.twitter_path or not self.twitter_path.exists():
+            return []
+        out = []
+        with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
+            for row in csv.DictReader(f):
+                if not row.get("user_id"):
+                    continue
+                out.append(PersonaRecord(
+                    agent_id=int(row["user_id"]),
+                    name=str(row.get("name") or row.get("user_name") or f"agent_{row['user_id']}"),
+                    persona=str(row.get("persona") or row.get("bio") or ""),
+                    profession=row.get("profession"),
+                    bio=row.get("bio"),
+                ))
+        return out
+
+    def all(self) -> list[PersonaRecord]:
+        reddit = self._load_reddit()
+        seen = {p.agent_id for p in reddit}
+        twitter = [p for p in self._load_twitter() if p.agent_id not in seen]
+        return reddit + twitter
+
+    def agent_to_entity(self) -> dict[int, str]:
+        """Build the ``{agent_id: zep_entity_uuid}`` map from the persisted profile files.
+
+        Both writers (``oasis_profile_generator._save_reddit_json`` and
+        ``_save_twitter_csv``) emit ``source_entity_uuid`` per agent.  Reddit takes
+        precedence; rows with a missing/blank uuid are skipped.
+        Returns an empty dict if neither file is present or no row has the field.
+        """
+        mapping: dict[int, str] = {}
+
+        # Reddit JSON
+        if self.reddit_path and self.reddit_path.exists():
+            try:
+                rows = json.loads(self.reddit_path.read_text(encoding="utf-8"))
+                for row in rows:
+                    uid = row.get("user_id")
+                    uuid_ = row.get("source_entity_uuid")
+                    if uid is None or not uuid_:
+                        continue
+                    mapping[int(uid)] = str(uuid_)
+            except (json.JSONDecodeError, ValueError, TypeError):
+                pass
+
+        # Twitter CSV (only fills agents not already mapped)
+        if self.twitter_path and self.twitter_path.exists():
+            try:
+                with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
+                    for row in csv.DictReader(f):
+                        uid = row.get("user_id")
+                        uuid_ = row.get("source_entity_uuid")
+                        if not uid or not uuid_:
+                            continue
+                        try:
+                            uid_int = int(uid)
+                        except (TypeError, ValueError):
+                            continue
+                        if uid_int not in mapping:
+                            mapping[uid_int] = str(uuid_)
+            except OSError:
+                pass
+
+        return mapping
+
+
+class ZepMemoryProvider:
+    """Builds a bounded memory digest per agent from Zep entity context.
+
+    Maps `agent_id` (OASIS user_id) to a Zep entity UUID; falls back to the agent_id as a string.
+    """
+
+    def __init__(self, entity_reader, graph_id: str, agent_to_entity: dict[int, str] | None = None):
+        self.reader = entity_reader
+        self.graph_id = graph_id
+        self.map = dict(agent_to_entity or {})
+
+    def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest:
+        entity_uuid = self.map.get(agent_id) or str(agent_id)
+        try:
+            ctx = self.reader.get_entity_with_context(self.graph_id, entity_uuid)
+        except Exception:
+            return MemoryDigest(text=f"[no memory for agent {agent_id}]", available=False)
+        parts: list[str] = []
+        name = getattr(ctx, "name", None)
+        summary = getattr(ctx, "summary", None)
+        if name:
+            parts.append(f"Name: {name}")
+        if summary:
+            parts.append(f"Summary: {summary}")
+        edges = getattr(ctx, "related_edges", []) or []
+        for e in edges[:20]:
+            fact = e.get("fact") if isinstance(e, dict) else getattr(e, "fact", None)
+            if fact:
+                parts.append(f"- {fact}")
+        text = "\n".join(parts)
+        if len(text) > max_chars:
+            text = text[: max_chars - 1] + "…"
+        return MemoryDigest(text=text or f"[empty memory for agent {agent_id}]", available=True)
diff --git a/backend/app/services/interviews/base.py b/backend/app/services/interviews/base.py
new file mode 100644
index 00000000..0eb2f821
--- /dev/null
+++ b/backend/app/services/interviews/base.py
@@ -0,0 +1,107 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional, Protocol
+
+
+@dataclass
+class PersonaRecord:
+    agent_id: int
+    name: str
+    persona: str
+    profession: Optional[str] = None
+    bio: Optional[str] = None
+
+
+@dataclass
+class MemoryDigest:
+    text: str
+    available: bool = True
+
+
+class MemoryProvider(Protocol):
+    def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest: ...
+
+
+def coerce_int(value: Any) -> Optional[int]:
+    """Coerce LLM-returned Likert values into ints.
+
+    Real LLMs frequently return numeric Likert responses as JSON strings
+    (e.g. "3" instead of 3). Returns the int if value is an int or a string
+    that round-trips through int(); otherwise None. Bools are rejected so
+    True/False aren't accepted as 1/0.
+    """
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int):
+        return value
+    if isinstance(value, str):
+        s = value.strip()
+        if s and s.lstrip("-").isdigit():
+            try:
+                return int(s)
+            except ValueError:
+                return None
+    return None
+
+
+class SchemaValidationFailure(ValueError):
+    def __init__(self, agent_id: int, attempts: list[dict]):
+        super().__init__(f"agent {agent_id}: schema violation after retry")
+        self.agent_id = agent_id
+        self.attempts = attempts
+
+
+class StakeholderInterviewer:
+    def __init__(self, llm, memory: MemoryProvider, language: str = "de"):
+        self.llm = llm
+        self.memory = memory
+        self.language = language
+
+    def _system_prompt(self, persona: PersonaRecord, digest: MemoryDigest, schema_hint: str) -> str:
+        memory_block = digest.text if digest.available else "[no simulation memory available]"
+        lang_note = "Antworte ausschließlich auf Deutsch." if self.language == "de" else "Answer in English."
+        return (
+            f"You are {persona.name}. {persona.persona}\n\n"
+            "You are answering a survey about the future of German fisheries. "
+            "Answer strictly in character based on your background, values, and what you experienced "
+            "during the simulated social media discourse summarised below.\n\n"
+            f"--- simulation memory digest ---\n{memory_block}\n--- end ---\n\n"
+            f"{lang_note} Return JSON ONLY matching this schema:\n{schema_hint}"
+        )
+
+    def ask_in_character(
+        self,
+        persona: PersonaRecord,
+        user_prompt: str,
+        schema_hint: str,
+        *,
+        temperature: float = 0.3,
+        max_tokens: Optional[int] = None,
+        validate: Optional[Callable[[dict], Optional[dict]]] = None,
+    ) -> dict:
+        digest = self.memory.get_digest(persona.agent_id)
+        messages = [
+            {"role": "system", "content": self._system_prompt(persona, digest, schema_hint)},
+            {"role": "user", "content": user_prompt},
+        ]
+        first = self.llm.chat_json(messages=messages, temperature=temperature, max_tokens=max_tokens)
+        if validate is not None:
+            validated = validate(first)
+            if validated is not None:
+                return validated
+            messages.append({"role": "assistant", "content": str(first)})
+            messages.append({"role": "user", "content":
+                "Your previous response did not match the required schema. "
+                f"Return ONLY valid JSON matching: {schema_hint}"})
+            second = self.llm.chat_json(messages=messages, temperature=0.0, max_tokens=max_tokens)
+            validated = validate(second)
+            if validated is None:
+                raise SchemaValidationFailure(
+                    persona.agent_id,
+                    attempts=[
+                        {"attempt": 1, "raw": first, "schema_hint": schema_hint},
+                        {"attempt": 2, "raw": second, "schema_hint": schema_hint},
+                    ],
+                )
+            return validated
+        return first
diff --git a/backend/app/services/interviews/delphi.py b/backend/app/services/interviews/delphi.py
new file mode 100644
index 00000000..198da793
--- /dev/null
+++ b/backend/app/services/interviews/delphi.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+import json
+import statistics
+from pathlib import Path
+from typing import Optional
+import yaml
+from app.models.interview import (
+    DelphiOpenResponse, DelphiRatingResponse,
+)
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
+
+
+class DelphiSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        with Path(instrument_path).open("r", encoding="utf-8") as f:
+            self.instrument = yaml.safe_load(f)
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.llm = llm
+        self.language = language
+
+    # --- Round 1: open questions ---
+    def _r1_schema(self) -> str:
+        return json.dumps({
+            "answers": {q["question_id"]: "<string>" for q in self.instrument["questions"]}
+        }, ensure_ascii=False)
+
+    def _r1_prompt(self) -> str:
+        lines = ["Bitte beantworten Sie offen:" if self.language == "de" else "Please answer openly:"]
+        for q in self.instrument["questions"]:
+            txt = q["de"] if self.language == "de" else q["en"]
+            lines.append(f"[{q['question_id']}] {txt}")
+        return "\n".join(lines)
+
+    def _r1_validate(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        ans = raw.get("answers")
+        if not isinstance(ans, dict): return None
+        required = {q["question_id"] for q in self.instrument["questions"]}
+        if not required.issubset(ans.keys()): return None
+        return raw
+
+    def administer_round1(self, persona: PersonaRecord) -> DelphiOpenResponse:
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._r1_prompt(),
+            schema_hint=self._r1_schema(), validate=self._r1_validate,
+        )
+        return DelphiOpenResponse(agent_id=persona.agent_id, round=1,
+                                  answers={k: str(v) for k, v in raw["answers"].items()})
+
+    # --- Round 2: rate themes ---
+    def _r2_schema(self, theme_ids: list[str]) -> str:
+        return json.dumps({
+            "ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids}
+        }, ensure_ascii=False)
+
+    def _r2_prompt(self, themes: list[dict]) -> str:
+        head = "Bewerten Sie jedes Thema nach Wichtigkeit (1-5) und Plausibilität (1-5):" if self.language == "de" \
+               else "Rate each theme on importance (1-5) and plausibility (1-5):"
+        body = [f"- [{t['theme_id']}] {t['label']}" for t in themes]
+        return head + "\n" + "\n".join(body)
+
+    def _r2_validate(self, theme_ids: list[str]):
+        def v(raw: dict) -> Optional[dict]:
+            if not isinstance(raw, dict): return None
+            ratings = raw.get("ratings", {})
+            if set(ratings.keys()) != set(theme_ids): return None
+            for tid, r in ratings.items():
+                if not isinstance(r, dict): return None
+                coerced: dict[str, int] = {}
+                for key in ("importance", "plausibility"):
+                    iv = coerce_int(r.get(key))
+                    if iv is None or not 1 <= iv <= 5: return None
+                    coerced[key] = iv
+                ratings[tid] = coerced
+            return raw
+        return v
+
+    def administer_round2(self, persona: PersonaRecord, themes: list[dict]) -> DelphiRatingResponse:
+        theme_ids = [t["theme_id"] for t in themes]
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._r2_prompt(themes),
+            schema_hint=self._r2_schema(theme_ids), validate=self._r2_validate(theme_ids),
+        )
+        return DelphiRatingResponse(agent_id=persona.agent_id, round=2,
+                                    ratings={k: dict(v) for k, v in raw["ratings"].items()})
+
+    # --- Round 3: revise after seeing group stats ---
+    def administer_round3(
+        self, persona: PersonaRecord, themes: list[dict], group_stats: dict, own_r2: DelphiRatingResponse
+    ) -> DelphiRatingResponse:
+        theme_ids = [t["theme_id"] for t in themes]
+        head = ("Sie sehen unten die anonymisierten Gruppenwerte (Median, IQR). "
+                "Bitte überarbeiten Sie Ihre Bewertungen, wenn Sie möchten, und begründen Sie kurz.") \
+               if self.language == "de" else \
+               ("Below are the anonymised group values (median, IQR). "
+                "Please revise your ratings if you wish and add a short justification.")
+        ctx_lines = []
+        for t in themes:
+            tid = t["theme_id"]
+            gs = group_stats.get(tid, {})
+            own = own_r2.ratings.get(tid, {})
+            ctx_lines.append(
+                f"[{tid}] {t['label']} — group importance median={gs.get('imp_median')}, "
+                f"IQR={gs.get('imp_iqr')}; plausibility median={gs.get('plaus_median')}, "
+                f"IQR={gs.get('plaus_iqr')}. Your R2: imp={own.get('importance')}, plaus={own.get('plausibility')}."
+            )
+        prompt = head + "\n\n" + "\n".join(ctx_lines)
+        schema = json.dumps({
+            "ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids},
+            "justification": "<string>",
+        }, ensure_ascii=False)
+
+        def validate(raw):
+            if not isinstance(raw, dict): return None
+            ratings = raw.get("ratings", {})
+            if set(ratings.keys()) != set(theme_ids): return None
+            for tid, r in ratings.items():
+                if not isinstance(r, dict): return None
+                coerced: dict[str, int] = {}
+                for key in ("importance", "plausibility"):
+                    iv = coerce_int(r.get(key))
+                    if iv is None or not 1 <= iv <= 5: return None
+                    coerced[key] = iv
+                ratings[tid] = coerced
+            return raw
+
+        raw = self.interviewer.ask_in_character(persona, user_prompt=prompt,
+                                                schema_hint=schema, validate=validate)
+        return DelphiRatingResponse(
+            agent_id=persona.agent_id, round=3,
+            ratings={k: dict(v) for k, v in raw["ratings"].items()},
+            justification=raw.get("justification"),
+        )
+
+
+def extract_themes(round1: list[DelphiOpenResponse], llm) -> list[dict]:
+    text_blocks = []
+    for r in round1:
+        for qid, ans in r.answers.items():
+            text_blocks.append(f"[agent {r.agent_id} {qid}] {ans}")
+    schema = json.dumps({"themes": [{"theme_id": "<string>", "label": "<short string>"}]}, ensure_ascii=False)
+    messages = [
+        {"role": "system", "content":
+            "You extract distinct thematic codes from open-ended German fisheries survey responses. "
+            f"Return JSON ONLY matching: {schema}. Use stable theme_ids of form theme_0, theme_1, …"},
+        {"role": "user", "content": "Responses:\n" + "\n".join(text_blocks) + "\n\nReturn up to 12 distinct themes."},
+    ]
+    raw = llm.chat_json(messages=messages, temperature=0.0)
+    themes = raw.get("themes", []) if isinstance(raw, dict) else []
+    out = []
+    for i, t in enumerate(themes):
+        if isinstance(t, dict) and "label" in t:
+            out.append({"theme_id": t.get("theme_id") or f"theme_{i}", "label": str(t["label"])})
+    return out
+
+
+def _iqr(xs: list[float]) -> float:
+    if not xs: return 0.0
+    xs = sorted(xs)
+    q1 = statistics.quantiles(xs, n=4)[0] if len(xs) >= 4 else xs[0]
+    q3 = statistics.quantiles(xs, n=4)[2] if len(xs) >= 4 else xs[-1]
+    return q3 - q1
+
+
+def convergence_metrics(r2: list[DelphiRatingResponse], r3: list[DelphiRatingResponse]) -> dict:
+    by_r2 = {r.agent_id: r for r in r2}
+    by_r3 = {r.agent_id: r for r in r3}
+    themes: set[str] = set()
+    for r in r2 + r3:
+        themes.update(r.ratings.keys())
+    out: dict[str, dict] = {}
+    for t in sorted(themes):
+        imp_r2 = [by_r2[a].ratings[t]["importance"] for a in by_r2 if t in by_r2[a].ratings]
+        imp_r3 = [by_r3[a].ratings[t]["importance"] for a in by_r3 if t in by_r3[a].ratings]
+        plaus_r2 = [by_r2[a].ratings[t]["plausibility"] for a in by_r2 if t in by_r2[a].ratings]
+        plaus_r3 = [by_r3[a].ratings[t]["plausibility"] for a in by_r3 if t in by_r3[a].ratings]
+        out[t] = {
+            "imp_median_r2": statistics.median(imp_r2) if imp_r2 else None,
+            "imp_median_r3": statistics.median(imp_r3) if imp_r3 else None,
+            "imp_iqr_r2": _iqr(imp_r2),
+            "imp_iqr_r3": _iqr(imp_r3),
+            "delta_iqr_importance": _iqr(imp_r3) - _iqr(imp_r2),
+            "plaus_iqr_r2": _iqr(plaus_r2),
+            "plaus_iqr_r3": _iqr(plaus_r3),
+            "delta_iqr_plausibility": _iqr(plaus_r3) - _iqr(plaus_r2),
+        }
+    return out
+
+
+def group_stats_from_r2(r2: list[DelphiRatingResponse]) -> dict:
+    themes: set[str] = set()
+    for r in r2: themes.update(r.ratings.keys())
+    stats: dict[str, dict] = {}
+    for t in themes:
+        imps = [r.ratings[t]["importance"] for r in r2 if t in r.ratings]
+        plauss = [r.ratings[t]["plausibility"] for r in r2 if t in r.ratings]
+        stats[t] = {
+            "imp_median": statistics.median(imps) if imps else None,
+            "imp_iqr": _iqr(imps),
+            "plaus_median": statistics.median(plauss) if plauss else None,
+            "plaus_iqr": _iqr(plauss),
+        }
+    return stats
diff --git a/backend/app/services/interviews/diversity.py b/backend/app/services/interviews/diversity.py
new file mode 100644
index 00000000..2c129828
--- /dev/null
+++ b/backend/app/services/interviews/diversity.py
@@ -0,0 +1,140 @@
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Optional
+import numpy as np
+from sklearn.decomposition import PCA
+from sklearn.cluster import KMeans
+import yaml
+from app.models.interview import QSortResponse
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
+from app.services.interviews.instrument_loader import InstrumentValidationError
+
+
+class DiversitySubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        self.instrument = self._load(Path(instrument_path))
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _load(self, path: Path) -> dict:
+        with path.open("r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        if not isinstance(data, dict) or "statements" not in data or "distribution" not in data:
+            raise InstrumentValidationError(f"invalid diversity instrument: {path}")
+        if sum(data["distribution"]) != len(data["statements"]):
+            raise InstrumentValidationError("distribution sum must equal number of statements")
+        return data
+
+    def _schema_hint(self) -> str:
+        return json.dumps({
+            "placements": {s["statement_id"]: "<int in -3..+3>" for s in self.instrument["statements"]},
+            "likert_axes": {a["axis_id"]: "<int 1-7>" for a in self.instrument["likert_axes"]},
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        dist = self.instrument["distribution"]
+        buckets = list(range(-3, 4))
+        bucket_desc = ", ".join(f"{b}:{n}" for b, n in zip(buckets, dist))
+        lines = [
+            ("Ordnen Sie jede Aussage genau einer Box von -3 (lehne stark ab) bis +3 (stimme stark zu) zu. "
+             f"Die Verteilung ist erzwungen: {bucket_desc}.") if self.language == "de" else
+            ("Place every statement into exactly one box from -3 (strongly disagree) to +3 (strongly agree). "
+             f"The distribution is forced: {bucket_desc}."),
+            "",
+            "Statements:",
+        ]
+        for s in self.instrument["statements"]:
+            txt = s["de"] if self.language == "de" else s["en"]
+            lines.append(f"- [{s['statement_id']}] {txt}")
+        lines += ["", "Then rate each axis from 1 to 7:"]
+        for a in self.instrument["likert_axes"]:
+            txt = a["de"] if self.language == "de" else a["en"]
+            lines.append(f"- [{a['axis_id']}] {txt}")
+        return "\n".join(lines)
+
+    def _validator(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict):
+            return None
+        placements = raw.get("placements", {})
+        axes = raw.get("likert_axes", {})
+        statements = {s["statement_id"] for s in self.instrument["statements"]}
+        if set(placements.keys()) != statements:
+            return None
+        dist = self.instrument["distribution"]
+        target = {b: n for b, n in zip(range(-3, 4), dist)}
+        got: dict[int, int] = {}
+        coerced_p: dict[str, int] = {}
+        for k, v in placements.items():
+            iv = coerce_int(v)
+            if iv is None or not -3 <= iv <= 3:
+                return None
+            coerced_p[k] = iv
+            got[iv] = got.get(iv, 0) + 1
+        if got != target:
+            return None
+        coerced_a: dict[str, int] = {}
+        for a in self.instrument["likert_axes"]:
+            iv = coerce_int(axes.get(a["axis_id"]))
+            if iv is None or not 1 <= iv <= 7:
+                return None
+            coerced_a[a["axis_id"]] = iv
+        raw["placements"] = coerced_p
+        raw["likert_axes"] = coerced_a
+        return raw
+
+    def administer(self, persona: PersonaRecord) -> QSortResponse:
+        raw = self.interviewer.ask_in_character(
+            persona,
+            user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(),
+            validate=self._validator,
+        )
+        return QSortResponse(
+            agent_id=persona.agent_id,
+            placements={k: int(v) for k, v in raw["placements"].items()},
+            likert_axes={k: int(v) for k, v in raw["likert_axes"].items()},
+        )
+
+
+def _vectorize(r: QSortResponse, statements: list[str], axes: list[str]) -> np.ndarray:
+    return np.array(
+        [r.placements.get(s, 0) for s in statements] +
+        [r.likert_axes.get(a, 4) for a in axes],
+        dtype=float,
+    )
+
+
+def run_typology(responses: list[QSortResponse], n_clusters: int = 4) -> dict:
+    if not responses:
+        return {"n": 0, "clusters": [], "pca": {"components": [], "explained_variance": []}}
+    statements = sorted({k for r in responses for k in r.placements})
+    axes = sorted({k for r in responses for k in r.likert_axes})
+    X = np.vstack([_vectorize(r, statements, axes) for r in responses])
+    n_clusters = min(n_clusters, len(responses))
+    pca = PCA(n_components=min(5, X.shape[1], X.shape[0]))
+    pcs = pca.fit_transform(X)
+    km = KMeans(n_clusters=n_clusters, n_init=10, random_state=0)
+    labels = km.fit_predict(X)
+    clusters = []
+    for c in range(n_clusters):
+        members = [responses[i].agent_id for i in range(len(responses)) if labels[i] == c]
+        centroid = km.cluster_centers_[c]
+        clusters.append({
+            "cluster_id": int(c),
+            "n": len(members),
+            "agent_ids": members,
+            "top_loadings": {
+                statements[i] if i < len(statements) else axes[i - len(statements)]: float(centroid[i])
+                for i in np.argsort(np.abs(centroid))[::-1][:8].tolist()
+            },
+        })
+    return {
+        "n": len(responses),
+        "clusters": clusters,
+        "pca": {
+            "components": pcs.tolist(),
+            "explained_variance": pca.explained_variance_ratio_.tolist(),
+            "agent_ids": [r.agent_id for r in responses],
+        },
+    }
diff --git a/backend/app/services/interviews/instrument_loader.py b/backend/app/services/interviews/instrument_loader.py
new file mode 100644
index 00000000..6d35d8a1
--- /dev/null
+++ b/backend/app/services/interviews/instrument_loader.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+import hashlib
+import json
+from pathlib import Path
+import yaml
+from pydantic import ValidationError
+from app.models.interview import (
+    LikertInstrument, QSortInstrument,
+)
+
+class InstrumentValidationError(ValueError):
+    pass
+
+def _parse_yaml(path: Path) -> dict:
+    if not path.exists():
+        raise InstrumentValidationError(f"instrument file not found: {path}")
+    try:
+        with path.open("r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    except yaml.YAMLError as e:
+        raise InstrumentValidationError(f"YAML parse error in {path}: {e}") from e
+    if not isinstance(data, dict):
+        raise InstrumentValidationError(f"top-level YAML must be a mapping in {path}")
+    return data
+
+def load_likert_instrument(path: Path) -> LikertInstrument:
+    data = _parse_yaml(Path(path))
+    try:
+        return LikertInstrument(**data)
+    except ValidationError as e:
+        raise InstrumentValidationError(str(e)) from e
+
+def load_qsort_instrument(path: Path) -> QSortInstrument:
+    data = _parse_yaml(Path(path))
+    try:
+        return QSortInstrument(**data)
+    except ValidationError as e:
+        raise InstrumentValidationError(str(e)) from e
+
+def instrument_hash(path: Path) -> str:
+    data = Path(path).read_bytes()
+    return hashlib.sha256(data).hexdigest()[:16]
+
+def freeze_snapshot(instruments: dict[str, Path], out_path: Path) -> dict:
+    snapshot = {
+        name: {
+            "path": str(p),
+            "hash": instrument_hash(p),
+            "content": _parse_yaml(p),
+        }
+        for name, p in instruments.items()
+    }
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2), encoding="utf-8")
+    return snapshot
diff --git a/backend/app/services/interviews/lifecycle.py b/backend/app/services/interviews/lifecycle.py
new file mode 100644
index 00000000..5e2d351d
--- /dev/null
+++ b/backend/app/services/interviews/lifecycle.py
@@ -0,0 +1,72 @@
+"""
+Interview lifecycle hook installer (Task 20).
+
+install_hooks(manager) registers two callbacks on a SimulationManager:
+  - on_ready  → spawn T0 longitudinal pre-survey in a background thread
+  - on_completed → spawn full post-sim batch + synthesis in a background thread
+
+Both hooks are best-effort: failures are logged but never propagate to the
+calling thread.
+"""
+
+from __future__ import annotations
+
+import threading
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def install_hooks(manager) -> None:
+    """Attach interview lifecycle callbacks to a SimulationManager.
+
+    on_ready  → spawn T0 longitudinal in a background thread
+    on_completed → spawn full post-sim batch in a background thread
+    Hooks are best-effort; failures only log.
+    """
+
+    def _on_ready(state) -> None:
+        sim_id = (
+            getattr(state, "simulation_id", None)
+            or getattr(state, "sim_id", None)
+            or getattr(state, "id", None)
+        )
+        if not sim_id:
+            return
+        threading.Thread(target=_run_pre, args=(sim_id,), daemon=True).start()
+
+    def _on_completed(state) -> None:
+        sim_id = (
+            getattr(state, "simulation_id", None)
+            or getattr(state, "sim_id", None)
+            or getattr(state, "id", None)
+        )
+        if not sim_id:
+            return
+        threading.Thread(target=_run_post, args=(sim_id,), daemon=True).start()
+
+    manager.register_on_ready(_on_ready)
+    manager.register_on_completed(_on_completed)
+
+
+def _run_pre(sim_id: str) -> None:
+    try:
+        from app.api.interview import _build_orchestrator
+
+        orch = _build_orchestrator(sim_id)
+        orch.run_pre()
+    except Exception as e:
+        logger.warning(f"auto pre-survey failed for {sim_id}: {e!r}")
+
+
+def _run_post(sim_id: str) -> None:
+    try:
+        from app.api.interview import _build_orchestrator
+        from app.services.interview_synthesizer import InterviewSynthesizer
+
+        orch = _build_orchestrator(sim_id)
+        orch.run_post()
+        InterviewSynthesizer(store=orch.store).run()
+    except Exception as e:
+        logger.warning(f"auto post-survey failed for {sim_id}: {e!r}")
diff --git a/backend/app/services/interviews/longitudinal.py b/backend/app/services/interviews/longitudinal.py
new file mode 100644
index 00000000..6ef7b811
--- /dev/null
+++ b/backend/app/services/interviews/longitudinal.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+import json
+import math
+from pathlib import Path
+from typing import Optional
+from app.models.interview import (
+    LikertInstrument, LikertResponse, InterviewPhase,
+)
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
+from app.services.interviews.instrument_loader import load_likert_instrument
+
+
+class LongitudinalSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        self.instrument: LikertInstrument = load_likert_instrument(Path(instrument_path))
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _schema_hint(self) -> str:
+        ids = [i.item_id for i in self.instrument.items]
+        return json.dumps({
+            "responses": {k: "<int 1-5>" for k in ids},
+            "confidence": {k: "<float 0-1>" for k in ids},
+            "open_comment": "<string, optional>",
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        lines = [
+            "Bitte bewerten Sie die folgenden Aussagen auf einer Skala von 1 (lehne stark ab) bis 5 (stimme stark zu)."
+            if self.language == "de"
+            else "Please rate the following statements on a scale from 1 (strongly disagree) to 5 (strongly agree)."
+        ]
+        for it in self.instrument.items:
+            txt = it.de if self.language == "de" else it.en
+            lines.append(f"- [{it.item_id}] {txt}")
+        return "\n".join(lines)
+
+    def _validator(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict):
+            return None
+        resp = raw.get("responses")
+        if not isinstance(resp, dict):
+            return None
+        required = {it.item_id for it in self.instrument.items}
+        if not required.issubset(resp.keys()):
+            return None
+        coerced: dict[str, int] = {}
+        for k, v in resp.items():
+            iv = coerce_int(v)
+            if iv is None or not 1 <= iv <= 5:
+                return None
+            coerced[k] = iv
+        raw["responses"] = coerced
+        return raw
+
+    def administer(self, persona: PersonaRecord, phase: InterviewPhase) -> LikertResponse:
+        raw = self.interviewer.ask_in_character(
+            persona,
+            user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(),
+            validate=self._validator,
+        )
+        return LikertResponse(
+            agent_id=persona.agent_id,
+            phase=phase,
+            responses={k: int(v) for k, v in raw["responses"].items()},
+            confidence={k: float(v) for k, v in raw.get("confidence", {}).items()},
+            open_comment=raw.get("open_comment"),
+        )
+
+
+def run_aggregate(t0: list[LikertResponse], t1: list[LikertResponse]) -> dict:
+    by_t0 = {r.agent_id: r for r in t0}
+    by_t1 = {r.agent_id: r for r in t1}
+    paired = sorted(set(by_t0) & set(by_t1))
+    items: set[str] = set()
+    for r in t0 + t1:
+        items.update(r.responses.keys())
+    per_item: dict[str, dict] = {}
+    for it in sorted(items):
+        deltas = []
+        for aid in paired:
+            v0 = by_t0[aid].responses.get(it)
+            v1 = by_t1[aid].responses.get(it)
+            if v0 is None or v1 is None:
+                continue
+            deltas.append(v1 - v0)
+        if not deltas:
+            per_item[it] = {"mean_delta": None, "n": 0}
+            continue
+        m = sum(deltas) / len(deltas)
+        var = sum((d - m) ** 2 for d in deltas) / max(len(deltas) - 1, 1)
+        per_item[it] = {
+            "mean_delta": m,
+            "sd_delta": math.sqrt(var),
+            "n": len(deltas),
+            "n_positive": sum(1 for d in deltas if d > 0),
+            "n_negative": sum(1 for d in deltas if d < 0),
+        }
+    per_agent: dict[int, dict] = {}
+    for aid in paired:
+        r0 = by_t0[aid].responses
+        r1 = by_t1[aid].responses
+        common = set(r0) & set(r1)
+        total = sum(abs(r1[k] - r0[k]) for k in common)
+        per_agent[aid] = {"total_abs_drift": total, "n_items": len(common)}
+    return {
+        "n_paired": len(paired),
+        "n_t0_only": len(set(by_t0) - set(by_t1)),
+        "n_t1_only": len(set(by_t1) - set(by_t0)),
+        "per_item": per_item,
+        "per_agent": per_agent,
+    }
diff --git a/backend/app/services/interviews/scenario.py b/backend/app/services/interviews/scenario.py
new file mode 100644
index 00000000..1b1e8468
--- /dev/null
+++ b/backend/app/services/interviews/scenario.py
@@ -0,0 +1,82 @@
+from __future__ import annotations
+import json
+import statistics
+from pathlib import Path
+from typing import Optional
+import yaml
+from app.models.interview import ScenarioRating, ScenarioResponse
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord, coerce_int
+
+class ScenarioSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        with Path(instrument_path).open("r", encoding="utf-8") as f:
+            self.instrument = yaml.safe_load(f)
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _schema_hint(self) -> str:
+        sids = [s["scenario_id"] for s in self.instrument["scenarios"]]
+        return json.dumps({
+            "ratings": {sid: {
+                "desirability": "<int 1-7>",
+                "plausibility": "<int 1-7>",
+                "impact_on_my_group": "<int 1-7>",
+                "fairness": "<int 1-7>",
+                "if_woke_up_response": "<string>",
+            } for sid in sids}
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        head = ("Bewerten Sie jedes der folgenden Szenarien auf vier Dimensionen (1-7) "
+                "und beantworten Sie kurz, was Sie tun würden, wenn Sie in dieser Welt aufwachten.") \
+               if self.language == "de" else \
+               ("Rate each of the following scenarios on four dimensions (1-7) "
+                "and briefly answer what you would do if you woke up in this world.")
+        blocks = []
+        for s in self.instrument["scenarios"]:
+            label = s["label_de"] if self.language == "de" else s["label_en"]
+            desc = s["description_de"] if self.language == "de" else s["description_en"]
+            blocks.append(f"--- {s['scenario_id']}: {label} ---\n{desc}")
+        return head + "\n\n" + "\n\n".join(blocks)
+
+    def _validate(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        sids = {s["scenario_id"] for s in self.instrument["scenarios"]}
+        ratings = raw.get("ratings", {})
+        if set(ratings.keys()) != sids: return None
+        for sid, v in ratings.items():
+            if not isinstance(v, dict): return None
+            for k in ("desirability", "plausibility", "impact_on_my_group", "fairness"):
+                iv = coerce_int(v.get(k))
+                if iv is None or not 1 <= iv <= 7: return None
+                v[k] = iv
+            if not isinstance(v.get("if_woke_up_response", ""), str): return None
+        return raw
+
+    def administer(self, persona: PersonaRecord) -> ScenarioResponse:
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(), validate=self._validate,
+        )
+        ratings = {sid: ScenarioRating(**v) for sid, v in raw["ratings"].items()}
+        return ScenarioResponse(agent_id=persona.agent_id, ratings=ratings)
+
+def polarity_matrix(responses: list[ScenarioResponse]) -> dict:
+    matrix: dict[str, dict] = {}
+    sids: set[str] = set()
+    for r in responses: sids.update(r.ratings.keys())
+    for sid in sorted(sids):
+        vals = [r.ratings[sid] for r in responses if sid in r.ratings]
+        if not vals:
+            matrix[sid] = {"n": 0}
+            continue
+        matrix[sid] = {
+            "n": len(vals),
+            "mean_desirability": statistics.mean(v.desirability for v in vals),
+            "mean_plausibility": statistics.mean(v.plausibility for v in vals),
+            "mean_impact": statistics.mean(v.impact_on_my_group for v in vals),
+            "mean_fairness": statistics.mean(v.fairness for v in vals),
+            "sd_desirability": statistics.pstdev([v.desirability for v in vals]) if len(vals) > 1 else 0.0,
+            "sd_plausibility": statistics.pstdev([v.plausibility for v in vals]) if len(vals) > 1 else 0.0,
+        }
+    return matrix
diff --git a/backend/app/services/interviews/storage.py b/backend/app/services/interviews/storage.py
new file mode 100644
index 00000000..9ba23d49
--- /dev/null
+++ b/backend/app/services/interviews/storage.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+import json
+import time
+import uuid
+from pathlib import Path
+from typing import Any
+from pydantic import BaseModel
+from app.models.interview import InterviewPhase, SubagentKind
+
+
+class InterviewStore:
+    def __init__(self, root: Path, sim_id: str):
+        self.base = Path(root) / "simulations" / sim_id / "interviews"
+        self.base.mkdir(parents=True, exist_ok=True)
+
+    def start_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path:
+        run_id = time.strftime("%Y%m%dT%H%M%S") + "-" + uuid.uuid4().hex[:6]
+        run_dir = self.base / phase.value / subagent.value / run_id
+        run_dir.mkdir(parents=True, exist_ok=True)
+        meta = {"run_id": run_id, "phase": phase.value, "subagent": subagent.value,
+                "created_at": time.time()}
+        (run_dir / "run.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+        return run_dir
+
+    def append_response(self, run_dir: Path, model: BaseModel) -> None:
+        path = run_dir / "responses.jsonl"
+        with path.open("a", encoding="utf-8") as f:
+            f.write(model.model_dump_json() + "\n")
+
+    def append_jsonl(self, run_dir: Path, filename: str, payload: dict | BaseModel) -> None:
+        path = run_dir / filename
+        with path.open("a", encoding="utf-8") as f:
+            if isinstance(payload, BaseModel):
+                f.write(payload.model_dump_json() + "\n")
+            else:
+                f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+
+    def read_responses(self, run_dir: Path, filename: str = "responses.jsonl") -> list[dict]:
+        path = run_dir / filename
+        if not path.exists():
+            return []
+        return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
+
+    def write_aggregate(self, run_dir: Path, payload: dict) -> None:
+        (run_dir / "aggregate.json").write_text(
+            json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def write_named(self, run_dir: Path, name: str, payload: Any) -> None:
+        (run_dir / name).write_text(
+            json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def audit(
+        self,
+        run_dir: Path,
+        agent_id: int | None,
+        event: str,
+        detail: str | dict = "",
+    ) -> None:
+        entry = {"ts": time.time(), "agent_id": agent_id, "event": event, "detail": detail}
+        with (run_dir / "audit.jsonl").open("a", encoding="utf-8") as f:
+            f.write(json.dumps(entry, ensure_ascii=False, default=str) + "\n")
+
+    def mark_latest(self, run_dir: Path) -> None:
+        pointer = run_dir.parent / "latest.json"
+        pointer.write_text(json.dumps({
+            "run_dir": str(run_dir.relative_to(self.base)),
+        }), encoding="utf-8")
+
+    def latest_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path | None:
+        pointer = self.base / phase.value / subagent.value / "latest.json"
+        if not pointer.exists():
+            return None
+        rel = json.loads(pointer.read_text())["run_dir"]
+        path = self.base / rel
+        return path if path.exists() else None
diff --git a/backend/app/services/interviews/zep_writer.py b/backend/app/services/interviews/zep_writer.py
new file mode 100644
index 00000000..fdd9f185
--- /dev/null
+++ b/backend/app/services/interviews/zep_writer.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+from typing import Any, Optional
+from app.models.interview import (
+    LikertResponse, QSortResponse, DelphiRatingResponse, ScenarioResponse, SubagentKind,
+)
+
+class InterviewZepWriter:
+    """Writes interview episodes (per-agent responses, aggregates) to a Zep graph.
+
+    Expects ``memory_updater`` to expose ``add_text_episode(graph_id, text)`` — that
+    is the method the real ``ZepGraphMemoryUpdater`` provides for synchronous text
+    writes outside the agent-activity batch pipeline.  A no-op shim with the same
+    method is acceptable for tests and stub mode.
+    """
+    def __init__(self, memory_updater, graph_id: str):
+        self.updater = memory_updater
+        self.graph_id = graph_id
+
+    def _emit(self, text: str) -> None:
+        if hasattr(self.updater, "add_text_episode"):
+            self.updater.add_text_episode(self.graph_id, text)
+        else:
+            raise RuntimeError(
+                "memory_updater is missing add_text_episode(graph_id, text); "
+                "InterviewZepWriter requires the explicit text-episode API."
+            )
+
+    def _summarize_likert(self, r: LikertResponse) -> str:
+        mean_v = sum(r.responses.values()) / max(len(r.responses), 1)
+        top = sorted(r.responses.items(), key=lambda kv: -kv[1])[:3]
+        bot = sorted(r.responses.items(), key=lambda kv: kv[1])[:3]
+        return (f"mean={mean_v:.2f}; agrees with {[k for k,_ in top]}; "
+                f"disagrees with {[k for k,_ in bot]}")
+
+    def _summarize_qsort(self, r: QSortResponse) -> str:
+        plus = [k for k, v in r.placements.items() if v >= 2]
+        minus = [k for k, v in r.placements.items() if v <= -2]
+        return f"+strongly:{plus}; -strongly:{minus}"
+
+    def _summarize_scenario(self, r: ScenarioResponse) -> str:
+        parts = [f"{sid}: des={rt.desirability} plaus={rt.plausibility}"
+                 for sid, rt in r.ratings.items()]
+        return "; ".join(parts)
+
+    def write_per_agent(
+        self, subagent: SubagentKind, response: Any, agent_name: str,
+        phase: Optional[str] = None,
+    ) -> None:
+        if isinstance(response, LikertResponse):
+            phase = phase or response.phase.value
+            summary = self._summarize_likert(response)
+        elif isinstance(response, QSortResponse):
+            phase = phase or "T1"
+            summary = self._summarize_qsort(response)
+        elif isinstance(response, ScenarioResponse):
+            phase = phase or "T1"
+            summary = self._summarize_scenario(response)
+        elif isinstance(response, DelphiRatingResponse):
+            phase = phase or f"T1/R{response.round}"
+            summary = f"round={response.round}; {len(response.ratings)} themes rated"
+        else:
+            phase = phase or "T1"
+            summary = str(response)[:200]
+        text = f"Agent {agent_name} (interview/{subagent.value}/{phase}): {summary}"
+        self._emit(text)
+
+    def write_aggregate(self, subagent: SubagentKind, summary: str) -> None:
+        self._emit(f"Interview aggregate ({subagent.value}): {summary}")
diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py
index 7704a627..9360e18c 100644
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@@ -1090,11 +1090,13 @@ class OasisProfileGenerator:
         
         with open(file_path, 'w', newline='', encoding='utf-8') as f:
             writer = csv.writer(f)
-            
-            # 写入OASIS要求的表头
-            headers = ['user_id', 'name', 'username', 'user_char', 'description']
+
+            # 写入表头：OASIS要求的5列 + 额外的source_entity_uuid列（反向链接到Zep实体）。
+            # OASIS按列名读取，额外的列不会影响其行为，但允许下游（面试子系统等）
+            # 重建 agent_id -> Zep entity uuid 的映射。
+            headers = ['user_id', 'name', 'username', 'user_char', 'description', 'source_entity_uuid']
             writer.writerow(headers)
-            
+
             # 写入数据行
             for idx, profile in enumerate(profiles):
                 # user_char: 完整人设（bio + persona），用于LLM系统提示
@@ -1103,16 +1105,17 @@ class OasisProfileGenerator:
                     user_char = f"{profile.bio} {profile.persona}"
                 # 处理换行符（CSV中用空格替代）
                 user_char = user_char.replace('\n', ' ').replace('\r', ' ')
-                
+
                 # description: 简短简介，用于外部显示
                 description = profile.bio.replace('\n', ' ').replace('\r', ' ')
-                
+
                 row = [
                     idx,                    # user_id: 从0开始的顺序ID
                     profile.name,           # name: 真实姓名
                     profile.user_name,      # username: 用户名
                     user_char,              # user_char: 完整人设（内部LLM使用）
-                    description             # description: 简短简介（外部显示）
+                    description,            # description: 简短简介（外部显示）
+                    profile.source_entity_uuid or "",  # source_entity_uuid: Zep实体UUID
                 ]
                 writer.writerow(row)
         
@@ -1184,12 +1187,18 @@ class OasisProfileGenerator:
                 item["profession"] = profile.profession
             if profile.interested_topics:
                 item["interested_topics"] = profile.interested_topics
-            
+            # source_entity_uuid: 反向链接到Zep实体，下游（面试子系统等）需要此映射以
+            # 在Zep图谱中查找Agent的上下文。仅在存在时写入。
+            if profile.source_entity_uuid:
+                item["source_entity_uuid"] = profile.source_entity_uuid
+            if profile.source_entity_type:
+                item["source_entity_type"] = profile.source_entity_type
+
             data.append(item)
-        
+
         with open(file_path, 'w', encoding='utf-8') as f:
             json.dump(data, f, ensure_ascii=False, indent=2)
-        
+
         logger.info(f"已保存 {len(profiles)} 个Reddit Profile到 {file_path} (JSON格式，包含user_id字段)")
     
     # 保留旧方法名作为别名，保持向后兼容
diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py
index 0d161a90..50b7890a 100644
--- a/backend/app/services/simulation_manager.py
+++ b/backend/app/services/simulation_manager.py
@@ -115,24 +115,31 @@ class SimulationState:
 class SimulationManager:
     """
     模拟管理器
-    
+
     核心功能：
     1. 从Zep图谱读取实体并过滤
     2. 生成OASIS Agent Profile
     3. 使用LLM智能生成模拟配置参数
     4. 准备预设脚本所需的所有文件
     """
-    
+
     # 模拟数据存储目录
     SIMULATION_DATA_DIR = os.path.join(
-        os.path.dirname(__file__), 
+        os.path.dirname(__file__),
         '../../uploads/simulations'
     )
-    
+
+    # Class-level hook registries so callbacks survive across instances.
+    # The Flask API endpoints construct fresh `SimulationManager()` instances per request,
+    # while lifecycle hooks are registered once at app startup — storing the lists on the
+    # instance would silently drop those hooks on every request.
+    _on_ready_hooks: list = []
+    _on_completed_hooks: list = []
+
     def __init__(self):
         # 确保目录存在
         os.makedirs(self.SIMULATION_DATA_DIR, exist_ok=True)
-        
+
         # 内存中的模拟状态缓存
         self._simulations: Dict[str, SimulationState] = {}
     
@@ -191,6 +198,46 @@ class SimulationManager:
         self._simulations[simulation_id] = state
         return state
     
+    # ------------------------------------------------------------------
+    # Lifecycle hook registration (class-level — see class docstring)
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def register_on_ready(cls, fn) -> None:
+        """Register a callback invoked when a simulation transitions to READY.
+
+        Class-level so hooks registered at app startup remain visible to every
+        SimulationManager() instance constructed later (e.g. per-request in Flask).
+        """
+        cls._on_ready_hooks.append(fn)
+
+    @classmethod
+    def register_on_completed(cls, fn) -> None:
+        """Register a callback invoked when a simulation transitions to COMPLETED.
+
+        Class-level so hooks registered at app startup remain visible to every
+        SimulationManager() instance constructed later (e.g. per-request in Flask).
+        """
+        cls._on_completed_hooks.append(fn)
+
+    def _notify_on_ready(self, state: "SimulationState") -> None:
+        """Invoke all on_ready hooks; exceptions are isolated per hook."""
+        for fn in list(type(self)._on_ready_hooks):
+            try:
+                fn(state)
+            except Exception as e:
+                logger.warning(f"on_ready hook failed: {e!r}")
+
+    def _notify_on_completed(self, state: "SimulationState") -> None:
+        """Invoke all on_completed hooks; exceptions are isolated per hook."""
+        for fn in list(type(self)._on_completed_hooks):
+            try:
+                fn(state)
+            except Exception as e:
+                logger.warning(f"on_completed hook failed: {e!r}")
+
+    # ------------------------------------------------------------------
+
     def create_simulation(
         self,
         project_id: str,
@@ -441,7 +488,8 @@ class SimulationManager:
             # 更新状态
             state.status = SimulationStatus.READY
             self._save_simulation_state(state)
-            
+            self._notify_on_ready(state)
+
             logger.info(f"模拟准备完成: {simulation_id}, "
                        f"entities={state.entities_count}, profiles={state.profiles_count}")
             
diff --git a/backend/app/services/simulation_runner.py b/backend/app/services/simulation_runner.py
index e86021f8..942f522f 100644
--- a/backend/app/services/simulation_runner.py
+++ b/backend/app/services/simulation_runner.py
@@ -226,7 +226,29 @@ class SimulationRunner:
     
     # 图谱记忆更新配置
     _graph_memory_enabled: Dict[str, bool] = {}  # simulation_id -> enabled
-    
+
+    # Completion callbacks registered from outside (e.g. SimulationManager lifecycle hooks).
+    # Each callable receives the SimulationRunState that just transitioned to COMPLETED.
+    _on_completed_callbacks: list = []
+
+    @classmethod
+    def register_on_completed(cls, fn) -> None:
+        """Register a callback invoked when a simulation transitions to COMPLETED.
+
+        The callback receives the SimulationRunState instance.  It is called from
+        the monitor daemon thread, so keep it short or hand off to another thread.
+        """
+        cls._on_completed_callbacks.append(fn)
+
+    @classmethod
+    def _fire_on_completed(cls, state: SimulationRunState) -> None:
+        """Invoke all registered on_completed callbacks; exceptions are isolated."""
+        for fn in list(cls._on_completed_callbacks):
+            try:
+                fn(state)
+            except Exception as e:
+                logger.warning(f"on_completed callback failed: {e!r}")
+
     @classmethod
     def get_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]:
         """获取运行状态"""
@@ -528,6 +550,7 @@ class SimulationRunner:
                 state.runner_status = RunnerStatus.COMPLETED
                 state.completed_at = datetime.now().isoformat()
                 logger.info(f"模拟完成: {simulation_id}")
+                cls._fire_on_completed(state)
             else:
                 state.runner_status = RunnerStatus.FAILED
                 # 从主日志文件读取错误信息
@@ -638,6 +661,7 @@ class SimulationRunner:
                                         state.runner_status = RunnerStatus.COMPLETED
                                         state.completed_at = datetime.now().isoformat()
                                         logger.info(f"所有平台模拟已完成: {state.simulation_id}")
+                                        cls._fire_on_completed(state)
                                 
                                 # 更新轮次信息（从 round_end 事件）
                                 elif event_type == "round_end":
diff --git a/backend/app/services/zep_graph_memory_updater.py b/backend/app/services/zep_graph_memory_updater.py
index e034fee2..86a4e1e2 100644
--- a/backend/app/services/zep_graph_memory_updater.py
+++ b/backend/app/services/zep_graph_memory_updater.py
@@ -337,6 +337,44 @@ class ZepGraphMemoryUpdater:
         self._total_activities += 1
         logger.debug(f"添加活动到Zep队列: {activity.agent_name} - {activity.action_type}")
     
+    def add_text_episode(self, graph_id: str, text: str) -> None:
+        """
+        直接将一段文本写入Zep图谱（同步发送，不经过批量队列）
+
+        用于面试子系统（InterviewZepWriter）等需要立即写入、不属于
+        agent活动流水线的场景。绕过 _send_batch_activities 的批量逻辑，
+        但仍带重试。
+
+        Args:
+            graph_id: 目标图谱ID（允许覆盖 self.graph_id，便于多图场景）
+            text: 要发送的文本内容
+        """
+        if not text:
+            return
+        target_graph_id = graph_id or self.graph_id
+        if not target_graph_id:
+            logger.warning("add_text_episode 调用时未指定graph_id，跳过")
+            return
+
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                self.client.graph.add(
+                    graph_id=target_graph_id,
+                    type="text",
+                    data=text,
+                )
+                self._total_sent += 1
+                self._total_items_sent += 1
+                logger.debug(f"add_text_episode 发送成功 (graph={target_graph_id}, len={len(text)})")
+                return
+            except Exception as e:
+                if attempt < self.MAX_RETRIES - 1:
+                    logger.warning(f"add_text_episode 失败 (尝试 {attempt + 1}/{self.MAX_RETRIES}): {e}")
+                    time.sleep(self.RETRY_DELAY * (attempt + 1))
+                else:
+                    logger.error(f"add_text_episode 失败，已重试{self.MAX_RETRIES}次: {e}")
+                    self._failed_count += 1
+
     def add_activity_from_dict(self, data: Dict[str, Any], platform: str):
         """
         从字典数据添加活动
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..9b22ac02 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -32,6 +32,82 @@ class LLMClient:
             base_url=self.base_url
         )
     
+    def _stub_key(self, messages: list[dict]) -> str:
+        user_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+        sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
+        # Allow callers to embed an explicit stub_key=... token
+        for chunk in user_msg.split():
+            if chunk.startswith("stub_key="):
+                return chunk[len("stub_key="):]
+        import hashlib
+        return hashlib.sha256((sys_msg + "|" + user_msg).encode("utf-8")).hexdigest()[:12]
+
+    def _stub_response(self, messages: list[dict]) -> str:
+        import json as _json
+        return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)
+
+    def _stub_response_json(self, messages: list[dict]) -> dict:
+        import hashlib, json as _json
+        sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
+        usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+        h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
+        seed = int(h[:8], 16)
+        rng = (seed % 5) + 1
+
+        # Longitudinal Likert (12 items)
+        if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
+            ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                   "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
+            return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
+                    "confidence": {k: 0.6 for k in ids},
+                    "open_comment": f"stub:{h[:8]}"}
+
+        # Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
+        if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
+            buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+            stmts = [f"st_{i+1:02d}" for i in range(24)]
+            # shuffle deterministically
+            order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
+            placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
+            return {
+                "placements": placements,
+                "likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
+                    ["ax_pres_extr","ax_loc_eu","ax_sci_trad",
+                     "ax_ind_col","ax_short_long","ax_mkt_reg"])},
+            }
+
+        # Scenario: S1..S4 × 4 dims
+        if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
+            return {"ratings": {sid: {
+                "desirability": ((seed >> (i*3)) % 7) + 1,
+                "plausibility": ((seed >> (i*3+1)) % 7) + 1,
+                "impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
+                "fairness": ((seed >> (i*3+4)) % 7) + 1,
+                "if_woke_up_response": f"act-{sid}-{h[:4]}",
+            } for i, sid in enumerate(["S1","S2","S3","S4"])}}
+
+        # Delphi R1: q1..q4 free text
+        if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
+            return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
+
+        # Delphi theme extraction (no in-character system prompt)
+        if "extract distinct thematic codes" in sys_msg:
+            return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
+
+        # Delphi R2 (rate) or R3 (revise)
+        if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
+           or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
+            theme_ids = [f"theme_{i}" for i in range(5)]
+            out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
+                                     "plausibility": ((seed >> (i*2+1)) % 5) + 1}
+                               for i, tid in enumerate(theme_ids)}}
+            if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
+                out["justification"] = "stub-revision"
+            return out
+
+        # Fallback
+        return {"stub_key": h[:12], "value": rng}
+
     def chat(
         self,
         messages: List[Dict[str, str]],
@@ -41,16 +117,20 @@ class LLMClient:
     ) -> str:
         """
         发送聊天请求
-        
+
         Args:
             messages: 消息列表
             temperature: 温度参数
             max_tokens: 最大token数
             response_format: 响应格式（如JSON模式）
-            
+
         Returns:
             模型响应文本
         """
+        from app.config import Config
+        if getattr(Config, "LLM_STUB_MODE", False):
+            return self._stub_response(messages)
+
         kwargs = {
             "model": self.model,
             "messages": messages,
@@ -75,15 +155,19 @@ class LLMClient:
     ) -> Dict[str, Any]:
         """
         发送聊天请求并返回JSON
-        
+
         Args:
             messages: 消息列表
             temperature: 温度参数
             max_tokens: 最大token数
-            
+
         Returns:
             解析后的JSON对象
         """
+        from app.config import Config
+        if getattr(Config, "LLM_STUB_MODE", False):
+            return self._stub_response_json(messages)
+
         response = self.chat(
             messages=messages,
             temperature=temperature,
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 8c65b729..88fa3d13 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -32,6 +32,11 @@ dependencies = [
     # 工具库
     "python-dotenv>=1.0.0",
     "pydantic>=2.0.0",
+    "PyYAML>=6.0",
+    "scikit-learn>=1.4",
+    "scipy>=1.12",
+    "numpy>=1.26",
+    "pandas>=2.1",
 ]
 
 [project.optional-dependencies]
diff --git a/backend/pytest.ini b/backend/pytest.ini
new file mode 100644
index 00000000..60f69ff1
--- /dev/null
+++ b/backend/pytest.ini
@@ -0,0 +1,8 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -ra --strict-markers
+markers =
+    integration: marks integration tests (deselect with -m 'not integration')
diff --git a/backend/scripts/instruments/__init__.py b/backend/scripts/instruments/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/scripts/instruments/delphi_v1.yaml b/backend/scripts/instruments/delphi_v1.yaml
new file mode 100644
index 00000000..bb7650dc
--- /dev/null
+++ b/backend/scripts/instruments/delphi_v1.yaml
@@ -0,0 +1,9 @@
+name: delphi_v1
+version: "1.0"
+language_default: de
+rounds: 3
+questions:
+  - {question_id: q1, de: "Welche drei Faktoren werden die deutsche Fischerei bis 2040 am stärksten prägen?", en: "Which three factors will most shape German fisheries by 2040?"}
+  - {question_id: q2, de: "Welche Akteurinnen und Akteure sind heute entscheidend, werden aber unterschätzt?", en: "Which actors are decisive today but underestimated?"}
+  - {question_id: q3, de: "Was sollte sich in den nächsten fünf Jahren ändern, damit die Fischerei eine Zukunft hat?", en: "What should change in the next five years for fisheries to have a future?"}
+  - {question_id: q4, de: "Welcher Trend macht Ihnen am meisten Hoffnung – und welcher am meisten Sorge?", en: "Which trend gives you most hope — and which most concern?"}
diff --git a/backend/scripts/instruments/diversity_v1.yaml b/backend/scripts/instruments/diversity_v1.yaml
new file mode 100644
index 00000000..7c47cd96
--- /dev/null
+++ b/backend/scripts/instruments/diversity_v1.yaml
@@ -0,0 +1,36 @@
+name: diversity_v1
+version: "1.0"
+language_default: de
+distribution: [2, 3, 4, 6, 4, 3, 2]   # buckets from -3 to +3, total 24
+statements:
+  - {statement_id: st_01, de: "Die Ostsee gehört den Fischern, die hier seit Generationen leben.", en: "The Baltic belongs to fishers who have lived here for generations."}
+  - {statement_id: st_02, de: "MSC-Zertifizierung schützt vor allem große Konzerne.", en: "MSC certification mainly protects large corporations."}
+  - {statement_id: st_03, de: "Wissenschaftliche Quoten sind die einzige Grundlage für Politik.", en: "Scientific quotas are the only legitimate basis for policy."}
+  - {statement_id: st_04, de: "Aquakultur kann Ostseefischerei ersetzen.", en: "Aquaculture can replace Baltic fisheries."}
+  - {statement_id: st_05, de: "Sportfischer schaden den Beständen mehr als die Berufsfischer.", en: "Recreational anglers harm stocks more than commercial fishers."}
+  - {statement_id: st_06, de: "Die EU-Fischereipolitik kennt die Ostsee nicht.", en: "EU fisheries policy doesn't understand the Baltic."}
+  - {statement_id: st_07, de: "Großtechnische Fischerei ist effizienter und damit nachhaltiger.", en: "Industrial fisheries are more efficient and therefore more sustainable."}
+  - {statement_id: st_08, de: "Wer Fisch isst, sollte mehr dafür bezahlen.", en: "Those who eat fish should pay more for it."}
+  - {statement_id: st_09, de: "Die Kleinfischerei muss subventioniert werden.", en: "Small-scale fisheries must be subsidised."}
+  - {statement_id: st_10, de: "Marine Schutzgebiete sind reine Symbolpolitik.", en: "Marine protected areas are mere symbolism."}
+  - {statement_id: st_11, de: "Russlands Krieg ändert alles in der Ostsee.", en: "Russia's war changes everything in the Baltic."}
+  - {statement_id: st_12, de: "Nur drastische Reduktion der Fangmengen rettet die Bestände.", en: "Only drastic catch reductions will save the stocks."}
+  - {statement_id: st_13, de: "NGOs übertreiben die Krise systematisch.", en: "NGOs systematically exaggerate the crisis."}
+  - {statement_id: st_14, de: "Klimawandel ist das eigentliche Problem, nicht die Fischerei.", en: "Climate change is the real problem, not fisheries."}
+  - {statement_id: st_15, de: "Tradition zählt mehr als kurzfristige Bestandszahlen.", en: "Tradition matters more than short-term stock numbers."}
+  - {statement_id: st_16, de: "Verbraucher entscheiden über die Zukunft des Fisches.", en: "Consumers decide the future of fish."}
+  - {statement_id: st_17, de: "Ohne Generalstreik der Fischer ändert sich nichts.", en: "Without a fishers' general strike, nothing will change."}
+  - {statement_id: st_18, de: "Die Bundesregierung sollte Kutter aufkaufen und stilllegen.", en: "The federal government should buy out and decommission boats."}
+  - {statement_id: st_19, de: "Die Dorschkrise ist Folge gescheiterter Politik.", en: "The cod crisis is the result of policy failure."}
+  - {statement_id: st_20, de: "Ostsee-Aquakultur ist ökologisch problematisch.", en: "Baltic aquaculture is ecologically problematic."}
+  - {statement_id: st_21, de: "Junge Menschen werden keinen Fischereibetrieb mehr übernehmen.", en: "Young people will no longer take over fishing businesses."}
+  - {statement_id: st_22, de: "Markt regelt sich selbst, auch beim Fisch.", en: "The market regulates itself, also for fish."}
+  - {statement_id: st_23, de: "Lokale Genossenschaften sind die Lösung.", en: "Local cooperatives are the solution."}
+  - {statement_id: st_24, de: "In 20 Jahren gibt es keine deutsche Ostseefischerei mehr.", en: "In 20 years there will be no German Baltic fisheries left."}
+likert_axes:
+  - {axis_id: ax_pres_extr, scale: 7, de: "Bewahrung (1) vs. Nutzung (7)", en: "Preservation (1) vs. Extraction (7)"}
+  - {axis_id: ax_loc_eu,    scale: 7, de: "Lokal (1) vs. EU-zentral (7)",  en: "Local (1) vs. EU-central (7)"}
+  - {axis_id: ax_sci_trad,  scale: 7, de: "Wissenschaft (1) vs. Tradition (7)", en: "Science-led (1) vs. Tradition-led (7)"}
+  - {axis_id: ax_ind_col,   scale: 7, de: "Individuum (1) vs. Kollektiv (7)", en: "Individual (1) vs. Collective (7)"}
+  - {axis_id: ax_short_long,scale: 7, de: "Kurzfristig (1) vs. Langfristig (7)", en: "Short-term (1) vs. Long-term (7)"}
+  - {axis_id: ax_mkt_reg,   scale: 7, de: "Markt (1) vs. Regulierung (7)", en: "Market (1) vs. Regulation (7)"}
diff --git a/backend/scripts/instruments/longitudinal_v1.yaml b/backend/scripts/instruments/longitudinal_v1.yaml
new file mode 100644
index 00000000..7a37d18c
--- /dev/null
+++ b/backend/scripts/instruments/longitudinal_v1.yaml
@@ -0,0 +1,47 @@
+name: longitudinal_v1
+version: "1.0"
+language_default: de
+items:
+  # Stock status & recovery
+  - {item_id: stk_1, family: stocks, scale: 5,
+     de: "Der westliche Dorschbestand wird sich bis 2035 erholen.",
+     en: "The Western Baltic cod stock will recover by 2035."}
+  - {item_id: stk_2, family: stocks, scale: 5,
+     de: "Der Heringsbestand in der westlichen Ostsee ist nicht mehr zu retten.",
+     en: "The Western Baltic herring stock can no longer be saved.",
+     reverse_coded: true}
+  - {item_id: stk_3, family: stocks, scale: 5,
+     de: "Wissenschaftliche Bestandsschätzungen sind generell zuverlässig.",
+     en: "Scientific stock assessments are generally reliable."}
+  # Governance & CFP
+  - {item_id: gov_1, family: governance, scale: 5,
+     de: "Die Gemeinsame Fischereipolitik der EU scheitert beim Schutz der Ostseefische.",
+     en: "The EU Common Fisheries Policy fails to protect Baltic fish.",
+     reverse_coded: true}
+  - {item_id: gov_2, family: governance, scale: 5,
+     de: "Entscheidungen über Fangquoten sollten stärker lokal getroffen werden.",
+     en: "Decisions on catch quotas should be taken more locally."}
+  - {item_id: gov_3, family: governance, scale: 5,
+     de: "Die deutsche Bundesregierung handelt entschlossen bei Fischereifragen.",
+     en: "The German federal government acts decisively on fisheries issues."}
+  # Market & MSC
+  - {item_id: mkt_1, family: market, scale: 5,
+     de: "Nur MSC-zertifizierter Fisch sollte verkauft werden dürfen.",
+     en: "Only MSC-certified fish should be allowed for sale."}
+  - {item_id: mkt_2, family: market, scale: 5,
+     de: "Importierter Fisch verdrängt die deutsche Kleinfischerei.",
+     en: "Imported fish displaces German small-scale fisheries."}
+  - {item_id: mkt_3, family: market, scale: 5,
+     de: "Verbraucher zahlen gerne mehr für nachhaltigen Ostseefisch.",
+     en: "Consumers gladly pay more for sustainable Baltic fish."}
+  # Climate & adaptation
+  - {item_id: clm_1, family: climate, scale: 5,
+     de: "Der Klimawandel macht traditionelle Ostseefischerei unmöglich.",
+     en: "Climate change makes traditional Baltic fisheries impossible.",
+     reverse_coded: true}
+  - {item_id: clm_2, family: climate, scale: 5,
+     de: "Aquakultur ist die Zukunft der deutschen Fischwirtschaft.",
+     en: "Aquaculture is the future of the German fishing industry."}
+  - {item_id: clm_3, family: climate, scale: 5,
+     de: "Die Fischerei muss sich grundlegend an neue Arten anpassen.",
+     en: "Fisheries must fundamentally adapt to new species."}
diff --git a/backend/scripts/instruments/scenario_v1.yaml b/backend/scripts/instruments/scenario_v1.yaml
new file mode 100644
index 00000000..5c150b80
--- /dev/null
+++ b/backend/scripts/instruments/scenario_v1.yaml
@@ -0,0 +1,51 @@
+name: scenario_v1
+version: "1.0"
+language_default: de
+scenarios:
+  - scenario_id: S1
+    label_de: "Erholung 2040"
+    label_en: "Recovery 2040"
+    description_de: |
+      Bis 2040 haben sich Dorsch- und Heringsbestände in der westlichen Ostsee
+      deutlich erholt. MSC-Zertifizierung ist branchenweit Standard. Die kleine
+      Küstenfischerei hat sich stabilisiert; die Politik gilt als erfolgreich.
+    description_en: |
+      By 2040, Western Baltic cod and herring stocks have substantially recovered.
+      MSC certification is industry-wide standard. Small-scale coastal fisheries
+      have stabilised; policy is regarded as successful.
+  - scenario_id: S2
+    label_de: "Kollaps 2040"
+    label_en: "Collapse 2040"
+    description_de: |
+      Bis 2040 sind Dorsch- und Heringsbestände zusammengebrochen. Die Flotte
+      ist halbiert, Aquakultur dominiert den Markt, Häfen veröden.
+    description_en: |
+      By 2040, cod and herring stocks have collapsed. The fleet is halved,
+      aquaculture dominates the market, harbour towns decline.
+  - scenario_id: S3
+    label_de: "Festung Europa 2040"
+    label_en: "Fortress Europe 2040"
+    description_de: |
+      Bis 2040 verfolgt die EU eine protektionistische Politik mit hohen Importzöllen,
+      Meeresschutzgebiete bedecken 30% der Ostsee, Sportfischerei ist stark eingeschränkt.
+    description_en: |
+      By 2040, the EU pursues a protectionist policy with high import tariffs,
+      MPAs cover 30% of the Baltic, recreational fishing is strongly curtailed.
+  - scenario_id: S4
+    label_de: "Privatisierung 2040"
+    label_en: "Privatisation 2040"
+    description_de: |
+      Bis 2040 sind Fangrechte als handelbare Quoten (ITQs) etabliert. Die Branche
+      hat sich konsolidiert; nur große, kapitalstarke Unternehmen sind übrig.
+    description_en: |
+      By 2040, fishing rights are tradable quotas (ITQs). The industry has
+      consolidated; only large, well-capitalised firms remain.
+dimensions:
+  - {dimension_id: desirability, scale: 7,
+     de: "Wie wünschenswert ist dieses Szenario?", en: "How desirable is this scenario?"}
+  - {dimension_id: plausibility, scale: 7,
+     de: "Wie plausibel ist dieses Szenario?",   en: "How plausible is this scenario?"}
+  - {dimension_id: impact_on_my_group, scale: 7,
+     de: "Wie stark trifft es Ihre Gruppe?",     en: "How strongly does it affect your group?"}
+  - {dimension_id: fairness, scale: 7,
+     de: "Wie fair ist dieses Szenario?",        en: "How fair is this scenario?"}
diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
new file mode 100644
index 00000000..2ba3931d
--- /dev/null
+++ b/backend/tests/conftest.py
@@ -0,0 +1,17 @@
+import os
+import sys
+import pathlib
+import pytest
+
+ROOT = pathlib.Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+
+os.environ.setdefault("LLM_API_KEY", "test")
+os.environ.setdefault("LLM_BASE_URL", "https://example.invalid")
+os.environ.setdefault("LLM_MODEL_NAME", "test-model")
+os.environ.setdefault("ZEP_API_KEY", "test")
+
+@pytest.fixture
+def tmp_uploads(tmp_path, monkeypatch):
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    return tmp_path
diff --git a/backend/tests/integration/__init__.py b/backend/tests/integration/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/tests/integration/test_interview_pipeline.py b/backend/tests/integration/test_interview_pipeline.py
new file mode 100644
index 00000000..54bb0540
--- /dev/null
+++ b/backend/tests/integration/test_interview_pipeline.py
@@ -0,0 +1,81 @@
+import json
+import pytest
+from pathlib import Path
+from app.config import Config
+from app.models.interview import SubagentKind, InterviewPhase
+from app.services.interviews.adapters import FileSystemPersonaProvider
+from app.services.interviews.base import MemoryDigest
+from app.services.interviews.zep_writer import InterviewZepWriter
+from app.services.interview_orchestrator import InterviewOrchestrator
+from app.services.interview_synthesizer import InterviewSynthesizer
+from app.utils.llm_client import LLMClient
+
+pytestmark = pytest.mark.integration
+
+INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+class _NullUpdater:
+    def __init__(self): self.events = []
+    def add_text_episode(self, graph_id, text): self.events.append(text)
+
+class _StaticMem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
+
+@pytest.fixture
+def seeded_uploads(tmp_path, monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    Config.LLM_STUB_MODE = True
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    sim_dir.mkdir(parents=True)
+    profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
+                 "persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
+    (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
+    return tmp_path
+
+def _make_orch(tmp_path):
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    personas = FileSystemPersonaProvider(
+        reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
+    )
+    llm = LLMClient(api_key="x", base_url="x", model="x")
+    updater = _NullUpdater()
+    writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
+    return InterviewOrchestrator(
+        llm=llm, memory=_StaticMem(), personas=personas,
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
+        zep_writer=writer, max_workers=2, language="de",
+    )
+
+def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+
+    pre = orch.run_pre()
+    assert pre["longitudinal"]["n_responded"] >= 1
+
+    post = orch.run_post()
+    assert "longitudinal" in post
+    assert "diversity" in post
+    assert "scenario" in post
+    assert "delphi" in post
+
+    synth = InterviewSynthesizer(store=orch.store)
+    report = synth.run()
+    assert "Stakeholder Interview Synthesis" in report
+    assert "Limitations" in report
+
+    csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
+    assert csv_path.exists()
+    lines = csv_path.read_text().splitlines()
+    assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
+
+def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+    orch.run_pre()
+    first = orch.run_post()
+    second = orch.rerun(SubagentKind.SCENARIO)
+    first_scn = first["scenario"]["run_dir"]
+    second_scn = second["scenario"]["run_dir"]
+    assert first_scn != second_scn
diff --git a/backend/tests/interviews/__init__.py b/backend/tests/interviews/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/tests/interviews/test_adapters.py b/backend/tests/interviews/test_adapters.py
new file mode 100644
index 00000000..977d5997
--- /dev/null
+++ b/backend/tests/interviews/test_adapters.py
@@ -0,0 +1,123 @@
+import csv
+import json
+from pathlib import Path
+from app.services.interviews.adapters import (
+    FileSystemPersonaProvider, ZepMemoryProvider,
+)
+
+def _write_reddit_profiles(tmp_path: Path):
+    data = [
+        {"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
+         "persona": "I am a small-scale Baltic fisher.", "profession": "fisher", "bio": ""},
+        {"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
+         "persona": "I work for an environmental NGO.", "profession": "ngo_staff", "bio": ""},
+    ]
+    p = tmp_path / "reddit_profiles.json"
+    p.write_text(json.dumps(data), encoding="utf-8")
+    return p
+
+def test_file_system_persona_provider_reads_reddit_json(tmp_path):
+    p = _write_reddit_profiles(tmp_path)
+    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
+    personas = provider.all()
+    assert len(personas) == 2
+    assert personas[0].name == "Fischer Müller"
+    assert personas[0].agent_id == 0
+
+def test_zep_memory_provider_returns_empty_when_unavailable():
+    class _BrokenReader:
+        def get_entity_with_context(self, *a, **kw):
+            raise RuntimeError("offline")
+    prov = ZepMemoryProvider(entity_reader=_BrokenReader(), graph_id="g1",
+                             agent_to_entity={0: "uuid-zero"})
+    d = prov.get_digest(0)
+    assert d.available is False
+    assert d.text != ""
+
+def test_zep_memory_provider_truncates_to_max_chars():
+    class _R:
+        def get_entity_with_context(self, *a, **kw):
+            class _Ctx:
+                name = "X"; summary = "Y"
+                related_edges = [{"fact": "very long fact " * 200}]
+            return _Ctx()
+    prov = ZepMemoryProvider(entity_reader=_R(), graph_id="g1",
+                             agent_to_entity={5: "uuid-five"})
+    d = prov.get_digest(5, max_chars=300)
+    assert d.available is True
+    assert len(d.text) <= 300
+
+
+def test_agent_to_entity_from_reddit_json(tmp_path):
+    """C5: ``FileSystemPersonaProvider.agent_to_entity()`` must reconstruct the
+    ``{agent_id: zep_entity_uuid}`` map from a reddit_profiles.json that
+    includes ``source_entity_uuid``.
+    """
+    data = [
+        {"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
+         "persona": "p", "profession": "fisher",
+         "source_entity_uuid": "uuid-zero"},
+        {"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
+         "persona": "p", "profession": "ngo_staff",
+         "source_entity_uuid": "uuid-one"},
+        # Row with no uuid must be skipped.
+        {"user_id": 2, "user_name": "gov1", "name": "Gov Agent",
+         "persona": "p", "profession": "official"},
+    ]
+    p = tmp_path / "reddit_profiles.json"
+    p.write_text(json.dumps(data), encoding="utf-8")
+
+    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
+    mapping = provider.agent_to_entity()
+
+    assert mapping == {0: "uuid-zero", 1: "uuid-one"}
+    # Map values are strings, keys are ints.
+    for k, v in mapping.items():
+        assert isinstance(k, int)
+        assert isinstance(v, str)
+
+
+def test_agent_to_entity_empty_when_no_field(tmp_path):
+    """C5: if no row has ``source_entity_uuid``, return an empty dict — not
+    a crash, not partial garbage."""
+    data = [{"user_id": 0, "user_name": "u", "name": "A", "persona": "p"}]
+    p = tmp_path / "reddit_profiles.json"
+    p.write_text(json.dumps(data), encoding="utf-8")
+    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
+    assert provider.agent_to_entity() == {}
+
+
+def test_agent_to_entity_falls_back_to_twitter_csv(tmp_path):
+    """C5: when only twitter_profiles.csv exists, the helper must still
+    extract uuids from the CSV's ``source_entity_uuid`` column.
+    """
+    p = tmp_path / "twitter_profiles.csv"
+    with p.open("w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
+        writer.writerow([0, "A0", "u0", "char", "desc", "uuid-zero"])
+        writer.writerow([1, "A1", "u1", "char", "desc", ""])  # skipped (blank uuid)
+        writer.writerow([2, "A2", "u2", "char", "desc", "uuid-two"])
+
+    provider = FileSystemPersonaProvider(reddit_path=None, twitter_path=p)
+    assert provider.agent_to_entity() == {0: "uuid-zero", 2: "uuid-two"}
+
+
+def test_agent_to_entity_reddit_takes_precedence(tmp_path):
+    """C5: when both files exist, Reddit JSON wins; Twitter CSV only fills
+    agents not already mapped."""
+    reddit = tmp_path / "reddit_profiles.json"
+    reddit.write_text(json.dumps([
+        {"user_id": 0, "user_name": "u0", "name": "A0", "persona": "p",
+         "source_entity_uuid": "reddit-zero"},
+    ]), encoding="utf-8")
+
+    twitter = tmp_path / "twitter_profiles.csv"
+    with twitter.open("w", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        writer.writerow(["user_id", "name", "username", "user_char", "description", "source_entity_uuid"])
+        writer.writerow([0, "A0", "u0", "char", "desc", "twitter-zero"])  # ignored
+        writer.writerow([1, "A1", "u1", "char", "desc", "twitter-one"])  # used
+
+    provider = FileSystemPersonaProvider(reddit_path=reddit, twitter_path=twitter)
+    assert provider.agent_to_entity() == {0: "reddit-zero", 1: "twitter-one"}
diff --git a/backend/tests/interviews/test_api_interview.py b/backend/tests/interviews/test_api_interview.py
new file mode 100644
index 00000000..7e55d627
--- /dev/null
+++ b/backend/tests/interviews/test_api_interview.py
@@ -0,0 +1,155 @@
+import json
+import os
+from pathlib import Path
+import pytest
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    Config.UPLOADS_DIR = str(tmp_path)
+    # Seed a minimal reddit_profiles.json
+    sim_dir = tmp_path / "simulations" / "sim_test"
+    sim_dir.mkdir(parents=True)
+    profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
+                 "persona": "p", "profession": "fisher"} for i in range(3)]
+    (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
+    from flask import Flask
+    from app.api import register_blueprints
+    app = Flask(__name__)
+    register_blueprints(app)
+    return app.test_client()
+
+def test_post_pre_returns_task_id(client):
+    res = client.post("/api/interview/sim_test/pre")
+    assert res.status_code == 200
+    body = res.get_json()
+    assert body["success"] is True
+    assert "task_id" in body["data"]
+
+def test_status_endpoint_returns_progress(client):
+    res = client.post("/api/interview/sim_test/pre")
+    task_id = res.get_json()["data"]["task_id"]
+    res2 = client.get(f"/api/interview/sim_test/status?task_id={task_id}")
+    assert res2.status_code == 200
+    assert "status" in res2.get_json()["data"]
+
+def test_unknown_subagent_returns_400(client):
+    res = client.post("/api/interview/sim_test/rerun",
+                      json={"subagent": "nonsense"})
+    assert res.status_code == 400
+
+
+def test_build_orchestrator_reads_graph_id_from_state(tmp_path, monkeypatch):
+    """C1+C2: ``_build_orchestrator`` must resolve the Zep graph_id from
+    ``state.json`` (written by ``SimulationManager``), not from the
+    nonexistent ``graph_id.txt``.  The graph_id then must reach the
+    ``InterviewZepWriter`` instead of being silently swallowed.
+    """
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    monkeypatch.setenv("ZEP_API_KEY", "test-fake-key")
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    Config.UPLOADS_DIR = str(tmp_path)
+    Config.ZEP_API_KEY = "test-fake-key"
+
+    # SimulationManager's data dir is class-level — point it at tmp_path.
+    from app.services.simulation_manager import SimulationManager
+    sim_root = tmp_path / "simulations"
+    sim_root.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
+
+    sim_id = "sim_graphid"
+    sim_dir = sim_root / sim_id
+    sim_dir.mkdir(parents=True)
+    # Seed a profile file so FileSystemPersonaProvider can work.
+    (sim_dir / "reddit_profiles.json").write_text(
+        json.dumps([
+            {"user_id": 0, "user_name": "u0", "name": "A0",
+             "persona": "p", "profession": "fisher",
+             "source_entity_uuid": "uuid-zero"},
+            {"user_id": 1, "user_name": "u1", "name": "A1",
+             "persona": "p", "profession": "fisher",
+             "source_entity_uuid": "uuid-one"},
+        ]),
+        encoding="utf-8",
+    )
+    # Seed state.json with the graph_id.
+    state_doc = {
+        "simulation_id": sim_id,
+        "project_id": "p",
+        "graph_id": "graph-from-state",
+        "status": "ready",
+        "enable_twitter": False,
+        "enable_reddit": True,
+    }
+    (sim_dir / "state.json").write_text(json.dumps(state_doc), encoding="utf-8")
+
+    # Patch ZepGraphMemoryUpdater + ZepEntityReader so we don't hit the network.
+    import app.services.zep_graph_memory_updater as zgmu
+    import app.services.zep_entity_reader as zer
+
+    class _FakeUpdater:
+        def __init__(self, graph_id, api_key=None):
+            self.graph_id = graph_id
+
+        def add_text_episode(self, graph_id, text):
+            return None
+
+    class _FakeReader:
+        def __init__(self, api_key=None):
+            pass
+
+        def get_entity_with_context(self, graph_id, entity_uuid):
+            return None
+
+    monkeypatch.setattr(zgmu, "ZepGraphMemoryUpdater", _FakeUpdater)
+    monkeypatch.setattr(zer, "ZepEntityReader", _FakeReader)
+
+    from app.api.interview import _build_orchestrator
+
+    orch = _build_orchestrator(sim_id)
+    assert orch.zep_writer.graph_id == "graph-from-state"
+    # Updater on the writer must be the real (or fake) ZepGraphMemoryUpdater path,
+    # NOT the null updater — i.e. its graph_id must match.
+    assert getattr(orch.zep_writer.updater, "graph_id", None) == "graph-from-state"
+
+    # ZepMemoryProvider must have received the agent_to_entity map (C5).
+    assert hasattr(orch.memory, "map")
+    assert orch.memory.map == {0: "uuid-zero", 1: "uuid-one"}
+
+
+def test_build_orchestrator_falls_back_when_state_missing(tmp_path, monkeypatch):
+    """C1+C2: when ``state.json`` is missing, the orchestrator must still be
+    constructed with the null updater/memory path (not crash, not silently
+    pass a bare ``ZepGraphMemoryUpdater()`` that would error out).
+    """
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    Config.UPLOADS_DIR = str(tmp_path)
+
+    from app.services.simulation_manager import SimulationManager
+    sim_root = tmp_path / "simulations"
+    sim_root.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setattr(SimulationManager, "SIMULATION_DATA_DIR", str(sim_root))
+
+    sim_id = "sim_no_state"
+    sim_dir = sim_root / sim_id
+    sim_dir.mkdir(parents=True)
+    (sim_dir / "reddit_profiles.json").write_text(
+        json.dumps([{"user_id": 0, "user_name": "u0", "name": "A0",
+                     "persona": "p", "profession": "fisher"}]),
+        encoding="utf-8",
+    )
+
+    from app.api.interview import _build_orchestrator
+
+    orch = _build_orchestrator(sim_id)
+    assert orch.zep_writer.graph_id == ""
+    # Null updater path: writer must still respond to _emit without raising.
+    orch.zep_writer._emit("hello")
diff --git a/backend/tests/interviews/test_base_interviewer.py b/backend/tests/interviews/test_base_interviewer.py
new file mode 100644
index 00000000..03295867
--- /dev/null
+++ b/backend/tests/interviews/test_base_interviewer.py
@@ -0,0 +1,96 @@
+import json
+import pytest
+from app.services.interviews.base import (
+    StakeholderInterviewer, MemoryDigest, PersonaRecord, SchemaValidationFailure,
+    coerce_int,
+)
+
+
+def test_coerce_int_accepts_real_int():
+    assert coerce_int(3) == 3
+    assert coerce_int(-2) == -2
+    assert coerce_int(0) == 0
+
+
+def test_coerce_int_accepts_numeric_strings():
+    assert coerce_int("3") == 3
+    assert coerce_int(" 4 ") == 4
+    assert coerce_int("-2") == -2
+
+
+def test_coerce_int_rejects_non_numeric():
+    assert coerce_int("3.5") is None
+    assert coerce_int("abc") is None
+    assert coerce_int(None) is None
+    assert coerce_int([3]) is None
+    assert coerce_int(3.5) is None
+
+
+def test_coerce_int_rejects_bool():
+    """True/False should NOT silently coerce to 1/0 even though Python says they're ints."""
+    assert coerce_int(True) is None
+    assert coerce_int(False) is None
+
+
+class _FakeLLM:
+    def __init__(self, responses):
+        self.responses = list(responses)
+        self.calls = []
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        self.calls.append(messages)
+        return self.responses.pop(0)
+
+class _FakeMemory:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text=f"digest-for-{agent_id}", available=True)
+
+def test_in_character_prompt_includes_persona_and_memory():
+    llm = _FakeLLM([{"x": 1}])
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    persona = PersonaRecord(agent_id=7, name="A", persona="I am a small-scale Baltic fisher.")
+    out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="{...}")
+    assert out == {"x": 1}
+    sys_msg = llm.calls[0][0]["content"]
+    assert "small-scale Baltic fisher" in sys_msg
+    assert "digest-for-7" in sys_msg
+
+def test_schema_retry_on_first_failure():
+    bad_then_good = [{}, {"responses": {"a": 3}}]
+    llm = _FakeLLM(bad_then_good)
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    def validator(d):
+        return d if "responses" in d else None
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x", validate=validator)
+    assert out == {"responses": {"a": 3}}
+    assert len(llm.calls) == 2
+
+def test_two_failures_raise():
+    llm = _FakeLLM([{}, {}])
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    with pytest.raises(ValueError):
+        interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x",
+                                     validate=lambda d: d if "responses" in d else None)
+
+
+def test_schema_failure_captures_both_raw_attempts():
+    bad1 = {"oops": "no responses key"}
+    bad2 = {"still": "wrong shape"}
+    llm = _FakeLLM([bad1, bad2])
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    persona = PersonaRecord(agent_id=42, name="A", persona="p")
+    with pytest.raises(SchemaValidationFailure) as exc_info:
+        interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x",
+                                     validate=lambda d: d if "responses" in d else None)
+    err = exc_info.value
+    assert err.agent_id == 42
+    assert len(err.attempts) == 2
+    assert err.attempts[0]["raw"] == bad1
+    assert err.attempts[1]["raw"] == bad2
+    assert err.attempts[0]["attempt"] == 1
+    assert err.attempts[1]["attempt"] == 2
diff --git a/backend/tests/interviews/test_delphi.py b/backend/tests/interviews/test_delphi.py
new file mode 100644
index 00000000..e55cab7a
--- /dev/null
+++ b/backend/tests/interviews/test_delphi.py
@@ -0,0 +1,84 @@
+from pathlib import Path
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.delphi import (
+    DelphiSubagent, extract_themes, convergence_metrics,
+)
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "delphi_v1.yaml"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _R1LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"answers": {
+            "q1": "Klimawandel, Quoten, Generationswechsel",
+            "q2": "MSC, Aquakultur",
+            "q3": "Russland, EU-Politik",
+            "q4": "Verbraucherpreise",
+        }}
+
+class _R2LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"ratings": {f"theme_{i}": {"importance": 4, "plausibility": 3} for i in range(5)}}
+
+class _ExtractLLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"themes": [
+            {"theme_id": "theme_0", "label": "Klimawandel"},
+            {"theme_id": "theme_1", "label": "Quoten"},
+            {"theme_id": "theme_2", "label": "MSC"},
+            {"theme_id": "theme_3", "label": "EU-Politik"},
+            {"theme_id": "theme_4", "label": "Generationswechsel"},
+        ]}
+
+def test_delphi_round1_open():
+    sub = DelphiSubagent(llm=_R1LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=2, name="A", persona="p")
+    resp = sub.administer_round1(persona)
+    assert resp.round == 1
+    assert len(resp.answers) == 4
+
+def test_extract_themes_aggregates():
+    from app.models.interview import DelphiOpenResponse
+    r1 = [DelphiOpenResponse(agent_id=i, answers={"q1": "Klimawandel", "q2": "MSC"}) for i in range(3)]
+    themes = extract_themes(r1, llm=_ExtractLLM())
+    assert len(themes) == 5
+    assert all("theme_id" in t for t in themes)
+
+def test_convergence_metrics():
+    from app.models.interview import DelphiRatingResponse
+    r2 = [DelphiRatingResponse(agent_id=i, round=2,
+            ratings={"t1": {"importance": 3, "plausibility": 3}}) for i in range(5)]
+    r3 = [DelphiRatingResponse(agent_id=i, round=3,
+            ratings={"t1": {"importance": 4, "plausibility": 4}}) for i in range(5)]
+    conv = convergence_metrics(r2, r3)
+    assert "t1" in conv
+    assert conv["t1"]["delta_iqr_importance"] is not None
+
+
+def test_delphi_r2_accepts_string_ratings():
+    """Delphi R2/R3 ratings should accept stringified importance/plausibility ints."""
+    from app.services.interviews.base import PersonaRecord, MemoryDigest
+    from app.services.interviews.delphi import DelphiSubagent
+    from pathlib import Path as _P
+
+    class _Mem:
+        def get_digest(self, agent_id, max_chars=2000):
+            return MemoryDigest(text="x", available=True)
+
+    class _StringLLM:
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            return {"ratings": {
+                "t1": {"importance": "4", "plausibility": "3"},
+                "t2": {"importance": "5", "plausibility": "2"},
+            }}
+
+    inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "delphi_v1.yaml"
+    sub = DelphiSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    themes = [{"theme_id": "t1", "label": "T1"}, {"theme_id": "t2", "label": "T2"}]
+    resp = sub.administer_round2(persona, themes)
+    assert resp.ratings["t1"]["importance"] == 4
+    assert isinstance(resp.ratings["t1"]["importance"], int)
diff --git a/backend/tests/interviews/test_diversity.py b/backend/tests/interviews/test_diversity.py
new file mode 100644
index 00000000..d8eb45d3
--- /dev/null
+++ b/backend/tests/interviews/test_diversity.py
@@ -0,0 +1,78 @@
+from pathlib import Path
+import numpy as np
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.diversity import (
+    DiversitySubagent, run_typology,
+)
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _CannedLLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        # Place all 24 statements into legal buckets per the forced distribution
+        placements = {}
+        buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+        for i in range(24):
+            placements[f"st_{i+1:02d}"] = buckets[i]
+        return {
+            "placements": placements,
+            "likert_axes": {"ax_pres_extr": 5, "ax_loc_eu": 3, "ax_sci_trad": 4,
+                            "ax_ind_col": 4, "ax_short_long": 5, "ax_mkt_reg": 3},
+        }
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "diversity_v1.yaml"
+
+def test_diversity_administer():
+    sub = DiversitySubagent(llm=_CannedLLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert len(resp.placements) == 24
+    assert set(resp.likert_axes.keys()) == {
+        "ax_pres_extr","ax_loc_eu","ax_sci_trad","ax_ind_col","ax_short_long","ax_mkt_reg"
+    }
+
+def test_typology_runs_pca_kmeans():
+    from app.models.interview import QSortResponse
+    rng = np.random.default_rng(42)
+    responses = []
+    for aid in range(20):
+        placements = {f"st_{i+1:02d}": int(rng.integers(-3, 4)) for i in range(24)}
+        axes = {f"ax_{j}": int(rng.integers(1, 8)) for j in range(6)}
+        responses.append(QSortResponse(agent_id=aid, placements=placements, likert_axes=axes))
+    result = run_typology(responses, n_clusters=3)
+    assert "clusters" in result
+    assert len(result["clusters"]) == 3
+    assert "pca" in result
+    assert len(result["pca"]["components"]) >= 2
+
+
+def test_diversity_accepts_string_likert_values():
+    """Diversity placements + axes should accept stringified ints."""
+    from app.services.interviews.base import PersonaRecord, MemoryDigest
+    from app.services.interviews.diversity import DiversitySubagent
+    from pathlib import Path as _P
+
+    class _Mem:
+        def get_digest(self, agent_id, max_chars=2000):
+            return MemoryDigest(text="x", available=True)
+
+    buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+
+    class _StringLLM:
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            return {
+                "placements": {f"st_{i+1:02d}": str(buckets[i]) for i in range(24)},
+                "likert_axes": {a: "4" for a in (
+                    "ax_pres_extr","ax_loc_eu","ax_sci_trad",
+                    "ax_ind_col","ax_short_long","ax_mkt_reg")},
+            }
+
+    inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "diversity_v1.yaml"
+    sub = DiversitySubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
+    persona = PersonaRecord(agent_id=7, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert isinstance(resp.placements["st_01"], int)
+    assert isinstance(resp.likert_axes["ax_pres_extr"], int)
+    assert resp.likert_axes["ax_pres_extr"] == 4
diff --git a/backend/tests/interviews/test_instrument_loader.py b/backend/tests/interviews/test_instrument_loader.py
new file mode 100644
index 00000000..dfb0852e
--- /dev/null
+++ b/backend/tests/interviews/test_instrument_loader.py
@@ -0,0 +1,44 @@
+import pytest
+from app.services.interviews.instrument_loader import (
+    load_likert_instrument, InstrumentValidationError,
+)
+
+def _write(tmp_path, text):
+    p = tmp_path / "inst.yaml"
+    p.write_text(text, encoding="utf-8")
+    return p
+
+def test_loads_valid_likert(tmp_path):
+    p = _write(tmp_path, """
+name: longitudinal_v1
+version: "1.0"
+language_default: de
+items:
+  - item_id: stk_1
+    de: "Der westliche Dorschbestand wird sich erholen"
+    en: "Western cod stock will recover"
+    scale: 5
+    family: stocks
+""")
+    inst = load_likert_instrument(p)
+    assert inst.name == "longitudinal_v1"
+    assert len(inst.items) == 1
+
+def test_rejects_duplicate_item_id(tmp_path):
+    p = _write(tmp_path, """
+name: x
+items:
+  - {item_id: a, de: d, en: e, scale: 5}
+  - {item_id: a, de: d, en: e, scale: 5}
+""")
+    with pytest.raises(InstrumentValidationError):
+        load_likert_instrument(p)
+
+def test_rejects_missing_required_field(tmp_path):
+    p = _write(tmp_path, """
+name: x
+items:
+  - {item_id: a, de: d, scale: 5}
+""")
+    with pytest.raises(InstrumentValidationError):
+        load_likert_instrument(p)
diff --git a/backend/tests/interviews/test_lifecycle.py b/backend/tests/interviews/test_lifecycle.py
new file mode 100644
index 00000000..f8d2c952
--- /dev/null
+++ b/backend/tests/interviews/test_lifecycle.py
@@ -0,0 +1,26 @@
+"""
+Tests for interview lifecycle hook installer (Task 20).
+"""
+
+from app.services.interviews.lifecycle import install_hooks
+
+
+class _StubMgr:
+    def __init__(self):
+        self.ready = []
+        self.completed = []
+
+    def register_on_ready(self, fn):
+        self.ready.append(fn)
+
+    def register_on_completed(self, fn):
+        self.completed.append(fn)
+
+
+def test_install_hooks_registers_two_callables():
+    mgr = _StubMgr()
+    install_hooks(mgr)
+    assert len(mgr.ready) == 1
+    assert len(mgr.completed) == 1
+    assert callable(mgr.ready[0])
+    assert callable(mgr.completed[0])
diff --git a/backend/tests/interviews/test_llm_stub.py b/backend/tests/interviews/test_llm_stub.py
new file mode 100644
index 00000000..6be5ed2a
--- /dev/null
+++ b/backend/tests/interviews/test_llm_stub.py
@@ -0,0 +1,17 @@
+import json
+from app.utils.llm_client import LLMClient
+
+
+def test_stub_mode_returns_deterministic_canned_json(monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    client = LLMClient(api_key="x", base_url="x", model="x")
+    messages = [
+        {"role": "system", "content": "You are persona_42. Return JSON."},
+        {"role": "user", "content": "stub_key=longitudinal:item_001"},
+    ]
+    out1 = client.chat_json(messages=messages, temperature=0.0)
+    out2 = client.chat_json(messages=messages, temperature=0.0)
+    assert out1 == out2
+    assert isinstance(out1, dict)
diff --git a/backend/tests/interviews/test_longitudinal.py b/backend/tests/interviews/test_longitudinal.py
new file mode 100644
index 00000000..006c293a
--- /dev/null
+++ b/backend/tests/interviews/test_longitudinal.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+import pytest
+from app.models.interview import InterviewPhase
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate
+
+
+class _FakeMem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+
+class _CannedLLM:
+    def __init__(self): self.n = 0
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        self.n += 1
+        return {
+            "responses": {
+                "stk_1": 4, "stk_2": 3, "stk_3": 5,
+                "gov_1": 3, "gov_2": 4, "gov_3": 2,
+                "mkt_1": 5, "mkt_2": 3, "mkt_3": 4,
+                "clm_1": 2, "clm_2": 4, "clm_3": 5,
+            },
+            "confidence": {
+                "stk_1": 0.8, "stk_2": 0.7, "stk_3": 0.9,
+                "gov_1": 0.6, "gov_2": 0.7, "gov_3": 0.5,
+                "mkt_1": 0.7, "mkt_2": 0.6, "mkt_3": 0.8,
+                "clm_1": 0.5, "clm_2": 0.7, "clm_3": 0.6,
+            },
+            "open_comment": "test",
+        }
+
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
+
+
+def test_longitudinal_administer_one_agent():
+    sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=3, name="A", persona="p")
+    resp = sub.administer(persona, phase=InterviewPhase.T0)
+    assert resp.agent_id == 3
+    assert resp.phase == InterviewPhase.T0
+    assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"}
+
+
+def test_longitudinal_aggregate_delta():
+    from app.models.interview import LikertResponse
+    t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0,
+                         responses={"stk_1": 3, "gov_1": 4},
+                         confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
+    t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1,
+                         responses={"stk_1": 4, "gov_1": 4},
+                         confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
+    agg = run_aggregate(t0, t1)
+    assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0
+    assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0
+    assert agg["n_paired"] == 5
+
+
+def test_longitudinal_accepts_string_likert_values():
+    """Real LLMs sometimes return Likert values as JSON strings ('3' not 3).
+    The validator should coerce them rather than fail the agent."""
+    from app.models.interview import InterviewPhase
+    from app.services.interviews.base import PersonaRecord, MemoryDigest
+    from app.services.interviews.longitudinal import LongitudinalSubagent
+    from pathlib import Path as _P
+
+    class _Mem:
+        def get_digest(self, agent_id, max_chars=2000):
+            return MemoryDigest(text="x", available=True)
+
+    class _StringLLM:
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            return {
+                "responses": {  # all strings, not ints
+                    "stk_1": "4", "stk_2": "3", "stk_3": "5",
+                    "gov_1": "3", "gov_2": "4", "gov_3": "2",
+                    "mkt_1": "5", "mkt_2": "3", "mkt_3": "4",
+                    "clm_1": "2", "clm_2": "4", "clm_3": "5",
+                },
+                "confidence": {},
+                "open_comment": "stringified",
+            }
+
+    inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
+    sub = LongitudinalSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
+    persona = PersonaRecord(agent_id=99, name="A", persona="p")
+    resp = sub.administer(persona, phase=InterviewPhase.T0)
+    assert resp.agent_id == 99
+    assert resp.responses["stk_1"] == 4
+    assert isinstance(resp.responses["stk_1"], int)
diff --git a/backend/tests/interviews/test_models.py b/backend/tests/interviews/test_models.py
new file mode 100644
index 00000000..e575d118
--- /dev/null
+++ b/backend/tests/interviews/test_models.py
@@ -0,0 +1,30 @@
+import pytest
+from pydantic import ValidationError
+from app.models.interview import (
+    LikertItem, LikertInstrument, LikertResponse,
+    InterviewPhase, SubagentKind,
+)
+
+def test_likert_item_requires_de_and_en():
+    item = LikertItem(item_id="x1", de="Frage", en="Question", scale=5)
+    assert item.scale == 5
+
+def test_likert_item_rejects_bad_scale():
+    with pytest.raises(ValidationError):
+        LikertItem(item_id="x1", de="d", en="e", scale=2)
+
+def test_likert_instrument_unique_item_ids():
+    with pytest.raises(ValidationError):
+        LikertInstrument(
+            name="t",
+            items=[LikertItem(item_id="a", de="d", en="e", scale=5),
+                   LikertItem(item_id="a", de="d", en="e", scale=5)],
+        )
+
+def test_likert_response_validates_scale_range():
+    with pytest.raises(ValidationError):
+        LikertResponse(agent_id=1, phase=InterviewPhase.T0,
+                       responses={"a": 6}, confidence={"a": 0.5})
+
+def test_subagent_kind_enum():
+    assert SubagentKind.LONGITUDINAL.value == "longitudinal"
diff --git a/backend/tests/interviews/test_orchestrator.py b/backend/tests/interviews/test_orchestrator.py
new file mode 100644
index 00000000..8d380eaf
--- /dev/null
+++ b/backend/tests/interviews/test_orchestrator.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+import pytest
+from app.models.interview import InterviewPhase, SubagentKind
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interview_orchestrator import (
+    InterviewOrchestrator, PersonaProvider,
+)
+
+INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        sys_text = next((m["content"] for m in messages if m["role"] == "system"), "")
+        if "longitudinal" in sys_text or "stk_" in (messages[-1].get("content") or ""):
+            return {
+                "responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                                             "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
+                "confidence": {}, "open_comment": "ok",
+            }
+        return {}
+
+class _Personas(PersonaProvider):
+    def __init__(self, n=3):
+        self._items = [PersonaRecord(agent_id=i, name=f"A{i}", persona="p") for i in range(n)]
+    def all(self): return list(self._items)
+
+class _NoopZep:
+    def write_per_agent(self, *a, **kw): pass
+    def write_aggregate(self, *a, **kw): pass
+
+def test_pre_phase_runs_longitudinal_only(tmp_path):
+    orch = InterviewOrchestrator(
+        llm=_LLM(), memory=_Mem(), personas=_Personas(3),
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim1",
+        zep_writer=_NoopZep(), max_workers=2,
+    )
+    result = orch.run_pre()
+    assert result["longitudinal"]["n_responded"] == 3
+    assert "diversity" not in result  # only longitudinal in pre-phase
+
+def test_partial_failure_does_not_kill_run(tmp_path):
+    class _FlakyLLM:
+        def __init__(self): self.n = 0
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            self.n += 1
+            if self.n % 2 == 0:
+                raise RuntimeError("simulated LLM 5xx")
+            return {
+                "responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                                             "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
+                "confidence": {}, "open_comment": "ok",
+            }
+    orch = InterviewOrchestrator(
+        llm=_FlakyLLM(), memory=_Mem(), personas=_Personas(4),
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim2",
+        zep_writer=_NoopZep(), max_workers=1,
+    )
+    result = orch.run_pre()
+    assert result["longitudinal"]["n_responded"] < 4
+    assert result["longitudinal"]["n_failed"] > 0
+
+
+def test_schema_failure_audit_captures_raw_llm_output(tmp_path):
+    """When an agent's LLM output fails the schema validator twice, the audit log
+    should preserve both raw outputs so we can debug what the model actually said."""
+    bad_response = {"wrong": "shape, no responses key"}
+    class _BadLLM:
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            return bad_response  # always fails Longitudinal validator
+    orch = InterviewOrchestrator(
+        llm=_BadLLM(), memory=_Mem(), personas=_Personas(1),
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim3",
+        zep_writer=_NoopZep(), max_workers=1,
+    )
+    result = orch.run_pre()
+    assert result["longitudinal"]["n_responded"] == 0
+    assert result["longitudinal"]["n_failed"] == 1
+
+    import json as _j
+    run_dir = Path(result["longitudinal"]["run_dir"])
+    audit_path = run_dir / "audit.jsonl"
+    lines = audit_path.read_text(encoding="utf-8").splitlines()
+    assert lines, "audit.jsonl should not be empty"
+    entry = _j.loads(lines[0])
+    assert entry["event"] == "schema_validation_failure"
+    assert entry["agent_id"] == 0
+    detail = entry["detail"]
+    assert detail["label"] == "longitudinal_T0"
+    assert len(detail["attempts"]) == 2
+    assert detail["attempts"][0]["raw"] == bad_response
+    assert detail["attempts"][1]["raw"] == bad_response
diff --git a/backend/tests/interviews/test_scenario.py b/backend/tests/interviews/test_scenario.py
new file mode 100644
index 00000000..61787211
--- /dev/null
+++ b/backend/tests/interviews/test_scenario.py
@@ -0,0 +1,60 @@
+from pathlib import Path
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"ratings": {sid: {
+            "desirability": 4, "plausibility": 3, "impact_on_my_group": 5, "fairness": 3,
+            "if_woke_up_response": f"act-on-{sid}",
+        } for sid in ("S1", "S2", "S3", "S4")}}
+
+def test_scenario_administer():
+    sub = ScenarioSubagent(llm=_LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert set(resp.ratings.keys()) == {"S1", "S2", "S3", "S4"}
+    assert resp.ratings["S1"].desirability == 4
+
+def test_polarity_matrix():
+    from app.models.interview import ScenarioResponse, ScenarioRating
+    responses = [ScenarioResponse(agent_id=i, ratings={
+        "S1": ScenarioRating(desirability=5, plausibility=4, impact_on_my_group=5, fairness=4,
+                              if_woke_up_response="x"),
+    }) for i in range(3)]
+    m = polarity_matrix(responses)
+    assert "S1" in m
+    assert m["S1"]["mean_desirability"] == 5
+    assert m["S1"]["n"] == 3
+
+
+def test_scenario_accepts_string_likert_values():
+    """Scenario ratings should accept stringified ints across all 4 dimensions."""
+    from app.services.interviews.base import PersonaRecord, MemoryDigest
+    from app.services.interviews.scenario import ScenarioSubagent
+    from pathlib import Path as _P
+
+    class _Mem:
+        def get_digest(self, agent_id, max_chars=2000):
+            return MemoryDigest(text="x", available=True)
+
+    class _StringLLM:
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            return {"ratings": {sid: {
+                "desirability": "4", "plausibility": "3",
+                "impact_on_my_group": "5", "fairness": "3",
+                "if_woke_up_response": f"act-{sid}",
+            } for sid in ("S1","S2","S3","S4")}}
+
+    inst = _P(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
+    sub = ScenarioSubagent(llm=_StringLLM(), memory=_Mem(), instrument_path=inst)
+    persona = PersonaRecord(agent_id=3, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert resp.ratings["S1"].desirability == 4
+    assert isinstance(resp.ratings["S1"].desirability, int)
diff --git a/backend/tests/interviews/test_simulation_hooks.py b/backend/tests/interviews/test_simulation_hooks.py
new file mode 100644
index 00000000..52852d28
--- /dev/null
+++ b/backend/tests/interviews/test_simulation_hooks.py
@@ -0,0 +1,96 @@
+"""
+Tests for SimulationManager lifecycle hooks (on_ready / on_completed).
+
+NOTE ON SHAPE DIVERGENCE vs. original plan spec:
+- SimulationState uses `simulation_id` (not `sim_id`)
+- `status` is a SimulationStatus enum, not a plain string
+- The COMPLETED transition lives in simulation_runner.py (SimulationRunner._monitor_simulation),
+  not in simulation_manager.py.  The _notify_on_completed hook is registered on SimulationManager
+  and the production insertion point for COMPLETED is documented in DONE_WITH_CONCERNS.
+
+Hooks are stored on the class (C3 fix), so each test snapshots/restores the
+registries via the autouse fixture to keep test isolation.
+"""
+
+import pytest
+
+from app.services.simulation_manager import SimulationManager, SimulationState, SimulationStatus
+
+
+@pytest.fixture(autouse=True)
+def _isolate_class_hooks():
+    saved_ready = list(SimulationManager._on_ready_hooks)
+    saved_completed = list(SimulationManager._on_completed_hooks)
+    try:
+        yield
+    finally:
+        SimulationManager._on_ready_hooks[:] = saved_ready
+        SimulationManager._on_completed_hooks[:] = saved_completed
+
+
+def test_register_post_ready_hook_invoked():
+    called = []
+    mgr = SimulationManager()
+    mgr.register_on_ready(lambda state: called.append(("ready", state.simulation_id)))
+    state = SimulationState(
+        simulation_id="abc",
+        project_id="proj1",
+        graph_id="graph1",
+        status=SimulationStatus.READY,
+    )
+    mgr._notify_on_ready(state)
+    assert called == [("ready", "abc")]
+
+
+def test_register_post_completed_hook_invoked():
+    called = []
+    mgr = SimulationManager()
+    mgr.register_on_completed(lambda state: called.append(("done", state.simulation_id)))
+    state = SimulationState(
+        simulation_id="abc",
+        project_id="proj1",
+        graph_id="graph1",
+        status=SimulationStatus.COMPLETED,
+    )
+    mgr._notify_on_completed(state)
+    assert called == [("done", "abc")]
+
+
+def test_hooks_survive_across_instances():
+    """C3: hook registries are class-level, so callbacks registered through the
+    classmethod must still fire on a freshly constructed instance.  This is
+    what makes the Flask per-request ``SimulationManager()`` pattern work
+    after ``install_hooks(SimulationManager)`` runs at app startup.
+    """
+    called: list[str] = []
+
+    # Register via the class — the production install_hooks(cls) path.
+    SimulationManager.register_on_ready(lambda s: called.append(f"ready:{s.simulation_id}"))
+    SimulationManager.register_on_completed(lambda s: called.append(f"done:{s.simulation_id}"))
+
+    # New, independently-constructed instance must still see the hooks.
+    fresh = SimulationManager()
+    state = SimulationState(
+        simulation_id="cross_instance",
+        project_id="p",
+        graph_id="g",
+        status=SimulationStatus.READY,
+    )
+    fresh._notify_on_ready(state)
+    state.status = SimulationStatus.COMPLETED
+    fresh._notify_on_completed(state)
+
+    assert "ready:cross_instance" in called
+    assert "done:cross_instance" in called
+
+
+def test_register_via_instance_also_lands_on_class():
+    """Registering through an instance must populate the class registry too —
+    backward-compatibility with code that calls ``manager.register_on_*``.
+    """
+    mgr1 = SimulationManager()
+    mgr1.register_on_ready(lambda s: None)
+    # A second, unrelated instance must see the hook.
+    mgr2 = SimulationManager()
+    assert len(SimulationManager._on_ready_hooks) >= 1
+    assert SimulationManager._on_ready_hooks is mgr2.__class__._on_ready_hooks
diff --git a/backend/tests/interviews/test_storage.py b/backend/tests/interviews/test_storage.py
new file mode 100644
index 00000000..26837e92
--- /dev/null
+++ b/backend/tests/interviews/test_storage.py
@@ -0,0 +1,37 @@
+import json
+from pathlib import Path
+from app.models.interview import (
+    LikertResponse, InterviewPhase, SubagentKind,
+)
+from app.services.interviews.storage import InterviewStore
+
+def test_run_directory_layout(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
+    assert run_dir.exists()
+    assert run_dir.parent.name == "longitudinal"
+    assert run_dir.parent.parent.name == "T0"
+
+def test_append_response(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
+    r = LikertResponse(agent_id=1, phase=InterviewPhase.T0,
+                       responses={"a": 3}, confidence={"a": 0.5})
+    store.append_response(run_dir, r)
+    contents = (run_dir / "responses.jsonl").read_text()
+    assert json.loads(contents.splitlines()[0])["agent_id"] == 1
+
+def test_write_aggregate_and_latest_pointer(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T1, subagent=SubagentKind.SCENARIO)
+    store.write_aggregate(run_dir, {"k": 1})
+    store.mark_latest(run_dir)
+    latest = (run_dir.parent / "latest.json").read_text()
+    assert json.loads(latest)["run_dir"].endswith(run_dir.name)
+
+def test_audit_log_append(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.DELPHI)
+    store.audit(run_dir, agent_id=7, event="schema_violation", detail="missing key x")
+    audit = (run_dir / "audit.jsonl").read_text()
+    assert "schema_violation" in audit
diff --git a/backend/tests/interviews/test_synthesizer.py b/backend/tests/interviews/test_synthesizer.py
new file mode 100644
index 00000000..2a842114
--- /dev/null
+++ b/backend/tests/interviews/test_synthesizer.py
@@ -0,0 +1,32 @@
+import json
+from pathlib import Path
+from app.services.interviews.storage import InterviewStore
+from app.models.interview import InterviewPhase, SubagentKind, LikertResponse
+from app.services.interview_synthesizer import InterviewSynthesizer
+
+def _seed_minimal(tmp_path: Path) -> InterviewStore:
+    store = InterviewStore(root=tmp_path, sim_id="s1")
+    rd = store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+    for i in range(3):
+        store.append_response(rd, LikertResponse(
+            agent_id=i, phase=InterviewPhase.T0,
+            responses={"stk_1": 3, "gov_1": 3}, confidence={"stk_1": 0.5, "gov_1": 0.5},
+        ))
+    store.write_aggregate(rd, {"per_item": {}, "n_paired": 0})
+    store.mark_latest(rd)
+    return store
+
+def test_synthesizer_runs_with_partial_data(tmp_path):
+    store = _seed_minimal(tmp_path)
+    synth = InterviewSynthesizer(store=store)
+    report = synth.run()
+    assert "limitations" in report.lower()
+    assert "stub mode" in report.lower() or "n_responded" in report.lower()
+
+def test_synthesizer_writes_files(tmp_path):
+    store = _seed_minimal(tmp_path)
+    synth = InterviewSynthesizer(store=store)
+    synth.run()
+    files = list((store.base / "synthesis").iterdir())
+    names = {f.name for f in files}
+    assert "report.md" in names
diff --git a/backend/tests/interviews/test_zep_writer.py b/backend/tests/interviews/test_zep_writer.py
new file mode 100644
index 00000000..6eaed454
--- /dev/null
+++ b/backend/tests/interviews/test_zep_writer.py
@@ -0,0 +1,77 @@
+import pytest
+
+from app.models.interview import (
+    LikertResponse, InterviewPhase, SubagentKind,
+)
+from app.services.interviews.zep_writer import InterviewZepWriter
+
+
+class _FakeMemoryUpdater:
+    """Fake mirroring the real ZepGraphMemoryUpdater contract.
+
+    Post-C4 the writer only uses ``add_text_episode(graph_id, text)`` —
+    ``add_activity`` is deliberately omitted to lock in the new behaviour and
+    catch any regression that re-introduces the broken dict-based fallback.
+    """
+
+    def __init__(self):
+        self.events: list[dict] = []
+
+    def add_text_episode(self, graph_id, text):
+        self.events.append({"graph_id": graph_id, "text": text})
+
+
+def test_per_agent_episode_text():
+    upd = _FakeMemoryUpdater()
+    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
+    r = LikertResponse(agent_id=42, phase=InterviewPhase.T1,
+                       responses={"stk_1": 4, "gov_1": 3},
+                       confidence={"stk_1": 0.8, "gov_1": 0.7})
+    w.write_per_agent(SubagentKind.LONGITUDINAL, r, agent_name="Fischer Müller")
+    assert any("Fischer Müller" in str(e) for e in upd.events)
+    assert any("longitudinal/T1" in str(e) for e in upd.events)
+    # Each event must carry the configured graph_id.
+    assert all(e["graph_id"] == "g1" for e in upd.events)
+
+
+def test_aggregate_episode():
+    upd = _FakeMemoryUpdater()
+    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
+    w.write_aggregate(SubagentKind.SCENARIO, summary="S1 mean desirability 5.2; S2 mean 2.1")
+    assert any("S1 mean" in str(e) for e in upd.events)
+
+
+def test_emit_uses_add_text_episode_with_graph_id():
+    """C4: ``_emit`` must call ``updater.add_text_episode(graph_id, text)``
+    with the constructor's graph_id and the raw text — no dict shape, no
+    ``add_activity`` fallback (the real ``add_activity`` rejects dicts).
+    """
+    upd = _FakeMemoryUpdater()
+    w = InterviewZepWriter(memory_updater=upd, graph_id="g_xyz")
+    w._emit("hello world")
+    assert upd.events == [{"graph_id": "g_xyz", "text": "hello world"}]
+
+
+def test_emit_raises_when_updater_lacks_add_text_episode():
+    """C4: a memory_updater without ``add_text_episode`` must surface a
+    RuntimeError rather than silently no-op via a broken ``add_activity``
+    fallback.
+    """
+
+    class _Broken:
+        def add_activity(self, activity):  # pragma: no cover - kept for clarity
+            raise AssertionError("must not be called")
+
+    w = InterviewZepWriter(memory_updater=_Broken(), graph_id="g1")
+    with pytest.raises(RuntimeError, match="add_text_episode"):
+        w._emit("x")
+
+
+def test_real_updater_exposes_add_text_episode():
+    """C4 sanity check: ZepGraphMemoryUpdater (the real class) must expose
+    ``add_text_episode`` so the production wiring works without falling
+    through to the broken ``add_activity(dict)`` path.
+    """
+    from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
+
+    assert hasattr(ZepGraphMemoryUpdater, "add_text_episode")
diff --git a/backend/uv.lock b/backend/uv.lock
index 642dd9c3..612a6a31 100644
--- a/backend/uv.lock
+++ b/backend/uv.lock
@@ -994,10 +994,15 @@ dependencies = [
     { name = "charset-normalizer" },
     { name = "flask" },
     { name = "flask-cors" },
+    { name = "numpy" },
     { name = "openai" },
+    { name = "pandas" },
     { name = "pydantic" },
     { name = "pymupdf" },
     { name = "python-dotenv" },
+    { name = "pyyaml" },
+    { name = "scikit-learn" },
+    { name = "scipy" },
     { name = "zep-cloud" },
 ]
 
@@ -1022,13 +1027,18 @@ requires-dist = [
     { name = "charset-normalizer", specifier = ">=3.0.0" },
     { name = "flask", specifier = ">=3.0.0" },
     { name = "flask-cors", specifier = ">=6.0.0" },
+    { name = "numpy", specifier = ">=1.26" },
     { name = "openai", specifier = ">=1.0.0" },
+    { name = "pandas", specifier = ">=2.1" },
     { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" },
     { name = "pydantic", specifier = ">=2.0.0" },
     { name = "pymupdf", specifier = ">=1.24.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
+    { name = "pyyaml", specifier = ">=6.0" },
+    { name = "scikit-learn", specifier = ">=1.4" },
+    { name = "scipy", specifier = ">=1.12" },
     { name = "zep-cloud", specifier = "==3.13.0" },
 ]
 provides-extras = ["dev"]
diff --git a/docs/superpowers/plans/2026-05-23-stakeholder-interview-subagents.md b/docs/superpowers/plans/2026-05-23-stakeholder-interview-subagents.md
new file mode 100644
index 00000000..4de7f7c6
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-23-stakeholder-interview-subagents.md
@@ -0,0 +1,3837 @@
+# Stakeholder Interview Subagents Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Build a four-subagent post-simulation interview system (Longitudinal, Diversity, Delphi, Scenario) over MiroFish-simulated stakeholders, plus a cross-method synthesiser, exposed via `/api/interview` and rendered in a new Vue Step4b.
+
+**Architecture:** Deterministic instrument runners (not ReACT). Shared `StakeholderInterviewer` base loads persona + Zep memory digest and administers per-instrument JSON-schema-validated prompts via the existing `LLMClient`. Four subagents own their own instrument YAML + output schema. `InterviewOrchestrator` fans out parallel post-sim execution; `InterviewSynthesizer` aggregates. Files: backend Python services + new Flask blueprint; frontend new Vue component with d3 viz.
+
+**Tech Stack:** Python 3.12, Flask, pydantic v2, PyYAML, scikit-learn (PCA, k-means), scipy (Wilcoxon), numpy, pytest; Vue 3, axios, d3 v7, vue-i18n.
+
+**Spec:** `docs/superpowers/specs/2026-05-23-stakeholder-interview-subagents-design.md`
+
+---
+
+## Phase 0 — Setup
+
+### Task 0: Add deps and pytest scaffold
+
+**Files:**
+- Modify: `backend/pyproject.toml`
+- Create: `backend/tests/__init__.py`
+- Create: `backend/tests/conftest.py`
+- Create: `backend/pytest.ini`
+
+- [ ] **Step 1: Add deps to `backend/pyproject.toml`**
+
+In the `dependencies` array (after `pydantic>=2.0.0`), add:
+```toml
+    "PyYAML>=6.0",
+    "scikit-learn>=1.4",
+    "scipy>=1.12",
+    "numpy>=1.26",
+    "pandas>=2.1",
+```
+
+- [ ] **Step 2: Create `backend/pytest.ini`**
+
+```ini
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -ra --strict-markers
+markers =
+    integration: marks integration tests (deselect with -m 'not integration')
+```
+
+- [ ] **Step 3: Create `backend/tests/__init__.py`**
+
+Empty file.
+
+- [ ] **Step 4: Create `backend/tests/conftest.py`**
+
+```python
+import os
+import sys
+import pathlib
+import pytest
+
+ROOT = pathlib.Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(ROOT))
+
+os.environ.setdefault("LLM_API_KEY", "test")
+os.environ.setdefault("LLM_BASE_URL", "https://example.invalid")
+os.environ.setdefault("LLM_MODEL_NAME", "test-model")
+os.environ.setdefault("ZEP_API_KEY", "test")
+
+@pytest.fixture
+def tmp_uploads(tmp_path, monkeypatch):
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    return tmp_path
+```
+
+- [ ] **Step 5: Install + verify**
+
+Run: `cd backend && uv sync --python 3.12 && uv run pytest -q`
+Expected: `0 tests collected` (no failures). Confirms infrastructure works.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/pyproject.toml backend/uv.lock backend/pytest.ini backend/tests/__init__.py backend/tests/conftest.py
+git commit -m "chore(interviews): add deps and pytest scaffold for interview subsystem"
+```
+
+---
+
+### Task 1: Add interview config keys
+
+**Files:**
+- Modify: `backend/app/config.py`
+
+- [ ] **Step 1: Read current config**
+
+Open `backend/app/config.py` and locate the `Config` class.
+
+- [ ] **Step 2: Add config keys**
+
+Add inside the `Config` class (preserving existing keys):
+```python
+    # Interview subsystem
+    INTERVIEW_MAX_TOKENS_PER_RUN = int(os.environ.get("INTERVIEW_MAX_TOKENS_PER_RUN", 15_000_000))
+    INTERVIEW_MAX_WORKERS = int(os.environ.get("INTERVIEW_MAX_WORKERS", 8))
+    INTERVIEW_DEFAULT_LANGUAGE = os.environ.get("INTERVIEW_DEFAULT_LANGUAGE", "de")
+    LLM_STUB_MODE = os.environ.get("LLM_STUB_MODE", "false").lower() == "true"
+```
+
+- [ ] **Step 3: Verify import**
+
+Run: `cd backend && uv run python -c "from app.config import Config; print(Config.INTERVIEW_MAX_WORKERS, Config.LLM_STUB_MODE)"`
+Expected: `8 False`
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add backend/app/config.py
+git commit -m "feat(interviews): add interview config keys (token budget, workers, language, stub mode)"
+```
+
+---
+
+## Phase 1 — Foundation
+
+### Task 2: Pydantic models for instruments and responses
+
+**Files:**
+- Create: `backend/app/models/interview.py`
+- Create: `backend/tests/interviews/__init__.py`
+- Test: `backend/tests/interviews/test_models.py`
+
+- [ ] **Step 1: Write failing test**
+
+Create `backend/tests/interviews/__init__.py` (empty), then `backend/tests/interviews/test_models.py`:
+```python
+import pytest
+from pydantic import ValidationError
+from app.models.interview import (
+    LikertItem, LikertInstrument, LikertResponse,
+    InterviewPhase, SubagentKind,
+)
+
+def test_likert_item_requires_de_and_en():
+    item = LikertItem(item_id="x1", de="Frage", en="Question", scale=5)
+    assert item.scale == 5
+
+def test_likert_item_rejects_bad_scale():
+    with pytest.raises(ValidationError):
+        LikertItem(item_id="x1", de="d", en="e", scale=2)
+
+def test_likert_instrument_unique_item_ids():
+    with pytest.raises(ValidationError):
+        LikertInstrument(
+            name="t",
+            items=[LikertItem(item_id="a", de="d", en="e", scale=5),
+                   LikertItem(item_id="a", de="d", en="e", scale=5)],
+        )
+
+def test_likert_response_validates_scale_range():
+    with pytest.raises(ValidationError):
+        LikertResponse(agent_id=1, phase=InterviewPhase.T0,
+                       responses={"a": 6}, confidence={"a": 0.5})
+
+def test_subagent_kind_enum():
+    assert SubagentKind.LONGITUDINAL.value == "longitudinal"
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_models.py -v`
+Expected: ImportError (module not yet created).
+
+- [ ] **Step 3: Create `backend/app/models/interview.py`**
+
+```python
+from __future__ import annotations
+from enum import Enum
+from typing import Optional
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+class InterviewPhase(str, Enum):
+    T0 = "T0"
+    T1 = "T1"
+
+class SubagentKind(str, Enum):
+    LONGITUDINAL = "longitudinal"
+    DIVERSITY = "diversity"
+    DELPHI = "delphi"
+    SCENARIO = "scenario"
+
+class LikertItem(BaseModel):
+    item_id: str
+    de: str
+    en: str
+    scale: int = Field(ge=3, le=7)
+    family: Optional[str] = None
+    reverse_coded: bool = False
+
+    @field_validator("scale")
+    @classmethod
+    def odd_scale(cls, v: int) -> int:
+        if v not in (3, 5, 7):
+            raise ValueError("scale must be 3, 5, or 7")
+        return v
+
+class LikertInstrument(BaseModel):
+    name: str
+    version: str = "1.0"
+    language_default: str = "de"
+    items: list[LikertItem]
+
+    @model_validator(mode="after")
+    def unique_item_ids(self) -> "LikertInstrument":
+        ids = [i.item_id for i in self.items]
+        if len(set(ids)) != len(ids):
+            raise ValueError("duplicate item_id in instrument")
+        return self
+
+class LikertResponse(BaseModel):
+    agent_id: int
+    phase: InterviewPhase
+    responses: dict[str, int]
+    confidence: dict[str, float] = Field(default_factory=dict)
+    open_comment: Optional[str] = None
+    memory_available: bool = True
+    failed_items: list[str] = Field(default_factory=list)
+
+    @model_validator(mode="after")
+    def values_in_range(self) -> "LikertResponse":
+        for k, v in self.responses.items():
+            if not 1 <= v <= 7:
+                raise ValueError(f"response {k}={v} out of 1..7 range")
+        for k, v in self.confidence.items():
+            if not 0.0 <= v <= 1.0:
+                raise ValueError(f"confidence {k}={v} out of 0..1 range")
+        return self
+
+class QSortStatement(BaseModel):
+    statement_id: str
+    de: str
+    en: str
+
+class QSortInstrument(BaseModel):
+    name: str
+    version: str = "1.0"
+    statements: list[QSortStatement]
+    distribution: list[int]  # e.g. [2,3,4,6,4,3,2] for -3..+3
+
+class QSortResponse(BaseModel):
+    agent_id: int
+    placements: dict[str, int]  # statement_id -> bucket (-3..+3)
+    likert_axes: dict[str, int]  # axis_id -> 1..7
+
+class DelphiOpenResponse(BaseModel):
+    agent_id: int
+    round: int = 1
+    answers: dict[str, str]  # question_id -> free text
+
+class DelphiRatingResponse(BaseModel):
+    agent_id: int
+    round: int
+    ratings: dict[str, dict[str, int]]  # theme_id -> {importance, plausibility}
+    justification: Optional[str] = None
+
+class ScenarioRating(BaseModel):
+    desirability: int = Field(ge=1, le=7)
+    plausibility: int = Field(ge=1, le=7)
+    impact_on_my_group: int = Field(ge=1, le=7)
+    fairness: int = Field(ge=1, le=7)
+    if_woke_up_response: str
+
+class ScenarioResponse(BaseModel):
+    agent_id: int
+    ratings: dict[str, ScenarioRating]  # scenario_id -> rating
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_models.py -v`
+Expected: 5 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/models/interview.py backend/tests/interviews/__init__.py backend/tests/interviews/test_models.py
+git commit -m "feat(interviews): add pydantic models for instruments and responses"
+```
+
+---
+
+### Task 3: YAML instrument loader + validator
+
+**Files:**
+- Create: `backend/app/services/interviews/__init__.py`
+- Create: `backend/app/services/interviews/instrument_loader.py`
+- Create: `backend/scripts/instruments/__init__.py` (empty marker so tests can import path)
+- Test: `backend/tests/interviews/test_instrument_loader.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_instrument_loader.py
+import pytest
+from app.services.interviews.instrument_loader import (
+    load_likert_instrument, InstrumentValidationError,
+)
+
+def _write(tmp_path, text):
+    p = tmp_path / "inst.yaml"
+    p.write_text(text, encoding="utf-8")
+    return p
+
+def test_loads_valid_likert(tmp_path):
+    p = _write(tmp_path, """
+name: longitudinal_v1
+version: "1.0"
+language_default: de
+items:
+  - item_id: stk_1
+    de: "Der westliche Dorschbestand wird sich erholen"
+    en: "Western cod stock will recover"
+    scale: 5
+    family: stocks
+""")
+    inst = load_likert_instrument(p)
+    assert inst.name == "longitudinal_v1"
+    assert len(inst.items) == 1
+
+def test_rejects_duplicate_item_id(tmp_path):
+    p = _write(tmp_path, """
+name: x
+items:
+  - {item_id: a, de: d, en: e, scale: 5}
+  - {item_id: a, de: d, en: e, scale: 5}
+""")
+    with pytest.raises(InstrumentValidationError):
+        load_likert_instrument(p)
+
+def test_rejects_missing_required_field(tmp_path):
+    p = _write(tmp_path, """
+name: x
+items:
+  - {item_id: a, de: d, scale: 5}
+""")
+    with pytest.raises(InstrumentValidationError):
+        load_likert_instrument(p)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_instrument_loader.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Create loader**
+
+Create `backend/app/services/interviews/__init__.py` (empty), `backend/scripts/instruments/__init__.py` (empty), then `backend/app/services/interviews/instrument_loader.py`:
+
+```python
+from __future__ import annotations
+import hashlib
+import json
+from pathlib import Path
+import yaml
+from pydantic import ValidationError
+from app.models.interview import (
+    LikertInstrument, QSortInstrument,
+)
+
+class InstrumentValidationError(ValueError):
+    pass
+
+def _parse_yaml(path: Path) -> dict:
+    if not path.exists():
+        raise InstrumentValidationError(f"instrument file not found: {path}")
+    try:
+        with path.open("r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+    except yaml.YAMLError as e:
+        raise InstrumentValidationError(f"YAML parse error in {path}: {e}") from e
+    if not isinstance(data, dict):
+        raise InstrumentValidationError(f"top-level YAML must be a mapping in {path}")
+    return data
+
+def load_likert_instrument(path: Path) -> LikertInstrument:
+    data = _parse_yaml(Path(path))
+    try:
+        return LikertInstrument(**data)
+    except ValidationError as e:
+        raise InstrumentValidationError(str(e)) from e
+
+def load_qsort_instrument(path: Path) -> QSortInstrument:
+    data = _parse_yaml(Path(path))
+    try:
+        return QSortInstrument(**data)
+    except ValidationError as e:
+        raise InstrumentValidationError(str(e)) from e
+
+def instrument_hash(path: Path) -> str:
+    data = Path(path).read_bytes()
+    return hashlib.sha256(data).hexdigest()[:16]
+
+def freeze_snapshot(instruments: dict[str, Path], out_path: Path) -> dict:
+    snapshot = {
+        name: {
+            "path": str(p),
+            "hash": instrument_hash(p),
+            "content": _parse_yaml(p),
+        }
+        for name, p in instruments.items()
+    }
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(snapshot, ensure_ascii=False, indent=2), encoding="utf-8")
+    return snapshot
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_instrument_loader.py -v`
+Expected: 3 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interviews/__init__.py backend/app/services/interviews/instrument_loader.py backend/scripts/instruments/__init__.py backend/tests/interviews/test_instrument_loader.py
+git commit -m "feat(interviews): YAML instrument loader with pydantic validation and hash freezing"
+```
+
+---
+
+### Task 4: LLM stub mode
+
+**Files:**
+- Modify: `backend/app/utils/llm_client.py`
+- Test: `backend/tests/interviews/test_llm_stub.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_llm_stub.py
+import json
+from app.utils.llm_client import LLMClient
+
+def test_stub_mode_returns_deterministic_canned_json(monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    client = LLMClient(api_key="x", base_url="x", model="x")
+    messages = [
+        {"role": "system", "content": "You are persona_42. Return JSON."},
+        {"role": "user", "content": "stub_key=longitudinal:item_001"},
+    ]
+    out1 = client.chat_json(messages=messages, temperature=0.0)
+    out2 = client.chat_json(messages=messages, temperature=0.0)
+    assert out1 == out2
+    assert isinstance(out1, dict)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_llm_stub.py -v`
+Expected: FAIL (real API call attempted or stub absent).
+
+- [ ] **Step 3: Read current `llm_client.py`**
+
+Read the file to locate `chat` and `chat_json` method bodies and where to insert the stub branch.
+
+- [ ] **Step 4: Add stub branch**
+
+At the top of `LLMClient.chat` (before the OpenAI call), insert:
+```python
+        from app.config import Config
+        if getattr(Config, "LLM_STUB_MODE", False):
+            return self._stub_response(messages)
+```
+
+And at the top of `LLMClient.chat_json` (before delegating), insert the same guard returning a parsed dict via `self._stub_response_json(messages)`.
+
+Add these methods to `LLMClient`:
+```python
+    def _stub_key(self, messages: list[dict]) -> str:
+        user_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+        sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
+        # Allow callers to embed an explicit stub_key=... token
+        for chunk in user_msg.split():
+            if chunk.startswith("stub_key="):
+                return chunk[len("stub_key="):]
+        import hashlib
+        return hashlib.sha256((sys_msg + "|" + user_msg).encode("utf-8")).hexdigest()[:12]
+
+    def _stub_response(self, messages: list[dict]) -> str:
+        import json as _json
+        return _json.dumps(self._stub_response_json(messages), ensure_ascii=False)
+
+    def _stub_response_json(self, messages: list[dict]) -> dict:
+        key = self._stub_key(messages)
+        # Deterministic centered Likert + plausible open text
+        digit = sum(ord(c) for c in key) % 5 + 1
+        return {
+            "stub_key": key,
+            "responses": {"item_001": digit, "item_002": digit, "item_003": (digit % 5) + 1},
+            "confidence": {"item_001": 0.7, "item_002": 0.7, "item_003": 0.6},
+            "open_comment": f"stub:{key}",
+        }
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_llm_stub.py -v`
+Expected: 1 passed.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/app/utils/llm_client.py backend/tests/interviews/test_llm_stub.py
+git commit -m "feat(interviews): LLM stub mode for deterministic CI tests"
+```
+
+---
+
+### Task 5: StakeholderInterviewer base class
+
+**Files:**
+- Create: `backend/app/services/interviews/base.py`
+- Test: `backend/tests/interviews/test_base_interviewer.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_base_interviewer.py
+import json
+import pytest
+from app.services.interviews.base import StakeholderInterviewer, MemoryDigest, PersonaRecord
+
+class _FakeLLM:
+    def __init__(self, responses):
+        self.responses = list(responses)
+        self.calls = []
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        self.calls.append(messages)
+        return self.responses.pop(0)
+
+class _FakeMemory:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text=f"digest-for-{agent_id}", available=True)
+
+def test_in_character_prompt_includes_persona_and_memory():
+    llm = _FakeLLM([{"x": 1}])
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    persona = PersonaRecord(agent_id=7, name="A", persona="I am a small-scale Baltic fisher.")
+    out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="{...}")
+    assert out == {"x": 1}
+    sys_msg = llm.calls[0][0]["content"]
+    assert "small-scale Baltic fisher" in sys_msg
+    assert "digest-for-7" in sys_msg
+
+def test_schema_retry_on_first_failure():
+    bad_then_good = [{}, {"responses": {"a": 3}}]
+    llm = _FakeLLM(bad_then_good)
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    def validator(d): 
+        return d if "responses" in d else None
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    out = interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x", validate=validator)
+    assert out == {"responses": {"a": 3}}
+    assert len(llm.calls) == 2
+
+def test_two_failures_raise():
+    llm = _FakeLLM([{}, {}])
+    mem = _FakeMemory()
+    interviewer = StakeholderInterviewer(llm=llm, memory=mem)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    with pytest.raises(ValueError):
+        interviewer.ask_in_character(persona, user_prompt="Q?", schema_hint="x",
+                                     validate=lambda d: d if "responses" in d else None)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_base_interviewer.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement base**
+
+`backend/app/services/interviews/base.py`:
+```python
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional, Protocol
+
+@dataclass
+class PersonaRecord:
+    agent_id: int
+    name: str
+    persona: str
+    profession: Optional[str] = None
+    bio: Optional[str] = None
+
+@dataclass
+class MemoryDigest:
+    text: str
+    available: bool = True
+
+class MemoryProvider(Protocol):
+    def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest: ...
+
+class StakeholderInterviewer:
+    def __init__(self, llm, memory: MemoryProvider, language: str = "de"):
+        self.llm = llm
+        self.memory = memory
+        self.language = language
+
+    def _system_prompt(self, persona: PersonaRecord, digest: MemoryDigest, schema_hint: str) -> str:
+        memory_block = digest.text if digest.available else "[no simulation memory available]"
+        lang_note = "Antworte ausschließlich auf Deutsch." if self.language == "de" else "Answer in English."
+        return (
+            f"You are {persona.name}. {persona.persona}\n\n"
+            "You are answering a survey about the future of German fisheries. "
+            "Answer strictly in character based on your background, values, and what you experienced "
+            "during the simulated social media discourse summarised below.\n\n"
+            f"--- simulation memory digest ---\n{memory_block}\n--- end ---\n\n"
+            f"{lang_note} Return JSON ONLY matching this schema:\n{schema_hint}"
+        )
+
+    def ask_in_character(
+        self,
+        persona: PersonaRecord,
+        user_prompt: str,
+        schema_hint: str,
+        *,
+        temperature: float = 0.3,
+        max_tokens: Optional[int] = None,
+        validate: Optional[Callable[[dict], Optional[dict]]] = None,
+    ) -> dict:
+        digest = self.memory.get_digest(persona.agent_id)
+        messages = [
+            {"role": "system", "content": self._system_prompt(persona, digest, schema_hint)},
+            {"role": "user", "content": user_prompt},
+        ]
+        out = self.llm.chat_json(messages=messages, temperature=temperature, max_tokens=max_tokens)
+        if validate is not None:
+            validated = validate(out)
+            if validated is not None:
+                return validated
+            messages.append({"role": "assistant", "content": str(out)})
+            messages.append({"role": "user", "content":
+                "Your previous response did not match the required schema. "
+                f"Return ONLY valid JSON matching: {schema_hint}"})
+            out = self.llm.chat_json(messages=messages, temperature=0.0, max_tokens=max_tokens)
+            validated = validate(out)
+            if validated is None:
+                raise ValueError(f"agent {persona.agent_id}: schema violation after retry")
+            return validated
+        return out
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_base_interviewer.py -v`
+Expected: 3 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interviews/base.py backend/tests/interviews/test_base_interviewer.py
+git commit -m "feat(interviews): StakeholderInterviewer base with in-character prompting and schema retry"
+```
+
+---
+
+## Phase 2 — Subagents
+
+### Task 6: Longitudinal subagent + instrument YAML
+
+**Files:**
+- Create: `backend/scripts/instruments/longitudinal_v1.yaml`
+- Create: `backend/app/services/interviews/longitudinal.py`
+- Test: `backend/tests/interviews/test_longitudinal.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_longitudinal.py
+from pathlib import Path
+import pytest
+from app.models.interview import InterviewPhase
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate
+
+class _FakeMem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _CannedLLM:
+    def __init__(self): self.n = 0
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        self.n += 1
+        return {
+            "responses": {"stk_1": 4, "gov_1": 3, "mkt_1": 5, "clm_1": 2},
+            "confidence": {"stk_1": 0.8, "gov_1": 0.6, "mkt_1": 0.7, "clm_1": 0.5},
+            "open_comment": "test",
+        }
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "longitudinal_v1.yaml"
+
+def test_longitudinal_administer_one_agent():
+    sub = LongitudinalSubagent(llm=_CannedLLM(), memory=_FakeMem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=3, name="A", persona="p")
+    resp = sub.administer(persona, phase=InterviewPhase.T0)
+    assert resp.agent_id == 3
+    assert resp.phase == InterviewPhase.T0
+    assert set(resp.responses.keys()) >= {"stk_1", "gov_1", "mkt_1", "clm_1"}
+
+def test_longitudinal_aggregate_delta():
+    from app.models.interview import LikertResponse
+    t0 = [LikertResponse(agent_id=i, phase=InterviewPhase.T0,
+                         responses={"stk_1": 3, "gov_1": 4},
+                         confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
+    t1 = [LikertResponse(agent_id=i, phase=InterviewPhase.T1,
+                         responses={"stk_1": 4, "gov_1": 4},
+                         confidence={"stk_1": 0.8, "gov_1": 0.8}) for i in range(5)]
+    agg = run_aggregate(t0, t1)
+    assert agg["per_item"]["stk_1"]["mean_delta"] == 1.0
+    assert agg["per_item"]["gov_1"]["mean_delta"] == 0.0
+    assert agg["n_paired"] == 5
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_longitudinal.py -v`
+Expected: ImportError + missing YAML file.
+
+- [ ] **Step 3: Create instrument YAML**
+
+`backend/scripts/instruments/longitudinal_v1.yaml`:
+```yaml
+name: longitudinal_v1
+version: "1.0"
+language_default: de
+items:
+  # Stock status & recovery
+  - {item_id: stk_1, family: stocks, scale: 5,
+     de: "Der westliche Dorschbestand wird sich bis 2035 erholen.",
+     en: "The Western Baltic cod stock will recover by 2035."}
+  - {item_id: stk_2, family: stocks, scale: 5,
+     de: "Der Heringsbestand in der westlichen Ostsee ist nicht mehr zu retten.",
+     en: "The Western Baltic herring stock can no longer be saved.",
+     reverse_coded: true}
+  - {item_id: stk_3, family: stocks, scale: 5,
+     de: "Wissenschaftliche Bestandsschätzungen sind generell zuverlässig.",
+     en: "Scientific stock assessments are generally reliable."}
+  # Governance & CFP
+  - {item_id: gov_1, family: governance, scale: 5,
+     de: "Die Gemeinsame Fischereipolitik der EU scheitert beim Schutz der Ostseefische.",
+     en: "The EU Common Fisheries Policy fails to protect Baltic fish.",
+     reverse_coded: true}
+  - {item_id: gov_2, family: governance, scale: 5,
+     de: "Entscheidungen über Fangquoten sollten stärker lokal getroffen werden.",
+     en: "Decisions on catch quotas should be taken more locally."}
+  - {item_id: gov_3, family: governance, scale: 5,
+     de: "Die deutsche Bundesregierung handelt entschlossen bei Fischereifragen.",
+     en: "The German federal government acts decisively on fisheries issues."}
+  # Market & MSC
+  - {item_id: mkt_1, family: market, scale: 5,
+     de: "Nur MSC-zertifizierter Fisch sollte verkauft werden dürfen.",
+     en: "Only MSC-certified fish should be allowed for sale."}
+  - {item_id: mkt_2, family: market, scale: 5,
+     de: "Importierter Fisch verdrängt die deutsche Kleinfischerei.",
+     en: "Imported fish displaces German small-scale fisheries."}
+  - {item_id: mkt_3, family: market, scale: 5,
+     de: "Verbraucher zahlen gerne mehr für nachhaltigen Ostseefisch.",
+     en: "Consumers gladly pay more for sustainable Baltic fish."}
+  # Climate & adaptation
+  - {item_id: clm_1, family: climate, scale: 5,
+     de: "Der Klimawandel macht traditionelle Ostseefischerei unmöglich.",
+     en: "Climate change makes traditional Baltic fisheries impossible.",
+     reverse_coded: true}
+  - {item_id: clm_2, family: climate, scale: 5,
+     de: "Aquakultur ist die Zukunft der deutschen Fischwirtschaft.",
+     en: "Aquaculture is the future of the German fishing industry."}
+  - {item_id: clm_3, family: climate, scale: 5,
+     de: "Die Fischerei muss sich grundlegend an neue Arten anpassen.",
+     en: "Fisheries must fundamentally adapt to new species."}
+```
+
+- [ ] **Step 4: Implement subagent**
+
+`backend/app/services/interviews/longitudinal.py`:
+```python
+from __future__ import annotations
+import json
+import math
+from pathlib import Path
+from typing import Optional
+from app.models.interview import (
+    LikertInstrument, LikertResponse, InterviewPhase,
+)
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
+from app.services.interviews.instrument_loader import load_likert_instrument
+
+class LongitudinalSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        self.instrument: LikertInstrument = load_likert_instrument(Path(instrument_path))
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _schema_hint(self) -> str:
+        ids = [i.item_id for i in self.instrument.items]
+        return json.dumps({
+            "responses": {k: "<int 1-5>" for k in ids},
+            "confidence": {k: "<float 0-1>" for k in ids},
+            "open_comment": "<string, optional>",
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        lines = ["Bitte bewerten Sie die folgenden Aussagen auf einer Skala von 1 (lehne stark ab) bis 5 (stimme stark zu)." if self.language == "de"
+                 else "Please rate the following statements on a scale from 1 (strongly disagree) to 5 (strongly agree)."]
+        for it in self.instrument.items:
+            txt = it.de if self.language == "de" else it.en
+            lines.append(f"- [{it.item_id}] {txt}")
+        return "\n".join(lines)
+
+    def _validator(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        resp = raw.get("responses")
+        if not isinstance(resp, dict): return None
+        required = {it.item_id for it in self.instrument.items}
+        if not required.issubset(resp.keys()): return None
+        for k, v in resp.items():
+            if not isinstance(v, int) or not 1 <= v <= 5: return None
+        return raw
+
+    def administer(self, persona: PersonaRecord, phase: InterviewPhase) -> LikertResponse:
+        raw = self.interviewer.ask_in_character(
+            persona,
+            user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(),
+            validate=self._validator,
+        )
+        return LikertResponse(
+            agent_id=persona.agent_id,
+            phase=phase,
+            responses={k: int(v) for k, v in raw["responses"].items()},
+            confidence={k: float(v) for k, v in raw.get("confidence", {}).items()},
+            open_comment=raw.get("open_comment"),
+        )
+
+def run_aggregate(t0: list[LikertResponse], t1: list[LikertResponse]) -> dict:
+    by_t0 = {r.agent_id: r for r in t0}
+    by_t1 = {r.agent_id: r for r in t1}
+    paired = sorted(set(by_t0) & set(by_t1))
+    items: set[str] = set()
+    for r in t0 + t1:
+        items.update(r.responses.keys())
+    per_item: dict[str, dict] = {}
+    for it in sorted(items):
+        deltas = []
+        for aid in paired:
+            v0 = by_t0[aid].responses.get(it)
+            v1 = by_t1[aid].responses.get(it)
+            if v0 is None or v1 is None: continue
+            deltas.append(v1 - v0)
+        if not deltas:
+            per_item[it] = {"mean_delta": None, "n": 0}
+            continue
+        m = sum(deltas) / len(deltas)
+        var = sum((d - m) ** 2 for d in deltas) / max(len(deltas) - 1, 1)
+        per_item[it] = {
+            "mean_delta": m,
+            "sd_delta": math.sqrt(var),
+            "n": len(deltas),
+            "n_positive": sum(1 for d in deltas if d > 0),
+            "n_negative": sum(1 for d in deltas if d < 0),
+        }
+    per_agent: dict[int, dict] = {}
+    for aid in paired:
+        r0 = by_t0[aid].responses
+        r1 = by_t1[aid].responses
+        common = set(r0) & set(r1)
+        total = sum(abs(r1[k] - r0[k]) for k in common)
+        per_agent[aid] = {"total_abs_drift": total, "n_items": len(common)}
+    return {
+        "n_paired": len(paired),
+        "n_t0_only": len(set(by_t0) - set(by_t1)),
+        "n_t1_only": len(set(by_t1) - set(by_t0)),
+        "per_item": per_item,
+        "per_agent": per_agent,
+    }
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_longitudinal.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/scripts/instruments/longitudinal_v1.yaml backend/app/services/interviews/longitudinal.py backend/tests/interviews/test_longitudinal.py
+git commit -m "feat(interviews): longitudinal subagent + 12-item Likert instrument"
+```
+
+---
+
+### Task 7: Diversity subagent + Q-sort instrument
+
+**Files:**
+- Create: `backend/scripts/instruments/diversity_v1.yaml`
+- Create: `backend/app/services/interviews/diversity.py`
+- Test: `backend/tests/interviews/test_diversity.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_diversity.py
+from pathlib import Path
+import numpy as np
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.diversity import (
+    DiversitySubagent, run_typology,
+)
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _CannedLLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        # Place all 24 statements into legal buckets per the forced distribution
+        placements = {}
+        buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+        for i in range(24):
+            placements[f"st_{i+1:02d}"] = buckets[i]
+        return {
+            "placements": placements,
+            "likert_axes": {"ax_pres_extr": 5, "ax_loc_eu": 3, "ax_sci_trad": 4,
+                            "ax_ind_col": 4, "ax_short_long": 5, "ax_mkt_reg": 3},
+        }
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "diversity_v1.yaml"
+
+def test_diversity_administer():
+    sub = DiversitySubagent(llm=_CannedLLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert len(resp.placements) == 24
+    assert set(resp.likert_axes.keys()) == {
+        "ax_pres_extr","ax_loc_eu","ax_sci_trad","ax_ind_col","ax_short_long","ax_mkt_reg"
+    }
+
+def test_typology_runs_pca_kmeans():
+    from app.models.interview import QSortResponse
+    rng = np.random.default_rng(42)
+    responses = []
+    for aid in range(20):
+        placements = {f"st_{i+1:02d}": int(rng.integers(-3, 4)) for i in range(24)}
+        axes = {f"ax_{j}": int(rng.integers(1, 8)) for j in range(6)}
+        responses.append(QSortResponse(agent_id=aid, placements=placements, likert_axes=axes))
+    result = run_typology(responses, n_clusters=3)
+    assert "clusters" in result
+    assert len(result["clusters"]) == 3
+    assert "pca" in result
+    assert len(result["pca"]["components"]) >= 2
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_diversity.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Create instrument YAML**
+
+`backend/scripts/instruments/diversity_v1.yaml`:
+```yaml
+name: diversity_v1
+version: "1.0"
+language_default: de
+distribution: [2, 3, 4, 6, 4, 3, 2]   # buckets from -3 to +3, total 24
+statements:
+  - {statement_id: st_01, de: "Die Ostsee gehört den Fischern, die hier seit Generationen leben.", en: "The Baltic belongs to fishers who have lived here for generations."}
+  - {statement_id: st_02, de: "MSC-Zertifizierung schützt vor allem große Konzerne.", en: "MSC certification mainly protects large corporations."}
+  - {statement_id: st_03, de: "Wissenschaftliche Quoten sind die einzige Grundlage für Politik.", en: "Scientific quotas are the only legitimate basis for policy."}
+  - {statement_id: st_04, de: "Aquakultur kann Ostseefischerei ersetzen.", en: "Aquaculture can replace Baltic fisheries."}
+  - {statement_id: st_05, de: "Sportfischer schaden den Beständen mehr als die Berufsfischer.", en: "Recreational anglers harm stocks more than commercial fishers."}
+  - {statement_id: st_06, de: "Die EU-Fischereipolitik kennt die Ostsee nicht.", en: "EU fisheries policy doesn't understand the Baltic."}
+  - {statement_id: st_07, de: "Großtechnische Fischerei ist effizienter und damit nachhaltiger.", en: "Industrial fisheries are more efficient and therefore more sustainable."}
+  - {statement_id: st_08, de: "Wer Fisch isst, sollte mehr dafür bezahlen.", en: "Those who eat fish should pay more for it."}
+  - {statement_id: st_09, de: "Die Kleinfischerei muss subventioniert werden.", en: "Small-scale fisheries must be subsidised."}
+  - {statement_id: st_10, de: "Marine Schutzgebiete sind reine Symbolpolitik.", en: "Marine protected areas are mere symbolism."}
+  - {statement_id: st_11, de: "Russlands Krieg ändert alles in der Ostsee.", en: "Russia's war changes everything in the Baltic."}
+  - {statement_id: st_12, de: "Nur drastische Reduktion der Fangmengen rettet die Bestände.", en: "Only drastic catch reductions will save the stocks."}
+  - {statement_id: st_13, de: "NGOs übertreiben die Krise systematisch.", en: "NGOs systematically exaggerate the crisis."}
+  - {statement_id: st_14, de: "Klimawandel ist das eigentliche Problem, nicht die Fischerei.", en: "Climate change is the real problem, not fisheries."}
+  - {statement_id: st_15, de: "Tradition zählt mehr als kurzfristige Bestandszahlen.", en: "Tradition matters more than short-term stock numbers."}
+  - {statement_id: st_16, de: "Verbraucher entscheiden über die Zukunft des Fisches.", en: "Consumers decide the future of fish."}
+  - {statement_id: st_17, de: "Ohne Generalstreik der Fischer ändert sich nichts.", en: "Without a fishers' general strike, nothing will change."}
+  - {statement_id: st_18, de: "Die Bundesregierung sollte Kutter aufkaufen und stilllegen.", en: "The federal government should buy out and decommission boats."}
+  - {statement_id: st_19, de: "Die Dorschkrise ist Folge gescheiterter Politik.", en: "The cod crisis is the result of policy failure."}
+  - {statement_id: st_20, de: "Ostsee-Aquakultur ist ökologisch problematisch.", en: "Baltic aquaculture is ecologically problematic."}
+  - {statement_id: st_21, de: "Junge Menschen werden keinen Fischereibetrieb mehr übernehmen.", en: "Young people will no longer take over fishing businesses."}
+  - {statement_id: st_22, de: "Markt regelt sich selbst, auch beim Fisch.", en: "The market regulates itself, also for fish."}
+  - {statement_id: st_23, de: "Lokale Genossenschaften sind die Lösung.", en: "Local cooperatives are the solution."}
+  - {statement_id: st_24, de: "In 20 Jahren gibt es keine deutsche Ostseefischerei mehr.", en: "In 20 years there will be no German Baltic fisheries left."}
+likert_axes:
+  - {axis_id: ax_pres_extr, scale: 7, de: "Bewahrung (1) vs. Nutzung (7)", en: "Preservation (1) vs. Extraction (7)"}
+  - {axis_id: ax_loc_eu,    scale: 7, de: "Lokal (1) vs. EU-zentral (7)",  en: "Local (1) vs. EU-central (7)"}
+  - {axis_id: ax_sci_trad,  scale: 7, de: "Wissenschaft (1) vs. Tradition (7)", en: "Science-led (1) vs. Tradition-led (7)"}
+  - {axis_id: ax_ind_col,   scale: 7, de: "Individuum (1) vs. Kollektiv (7)", en: "Individual (1) vs. Collective (7)"}
+  - {axis_id: ax_short_long,scale: 7, de: "Kurzfristig (1) vs. Langfristig (7)", en: "Short-term (1) vs. Long-term (7)"}
+  - {axis_id: ax_mkt_reg,   scale: 7, de: "Markt (1) vs. Regulierung (7)", en: "Market (1) vs. Regulation (7)"}
+```
+
+- [ ] **Step 4: Implement subagent**
+
+`backend/app/services/interviews/diversity.py`:
+```python
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Optional
+import numpy as np
+from sklearn.decomposition import PCA
+from sklearn.cluster import KMeans
+import yaml
+from app.models.interview import QSortResponse
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
+from app.services.interviews.instrument_loader import InstrumentValidationError
+
+class DiversitySubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        self.instrument = self._load(Path(instrument_path))
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _load(self, path: Path) -> dict:
+        with path.open("r", encoding="utf-8") as f:
+            data = yaml.safe_load(f)
+        if not isinstance(data, dict) or "statements" not in data or "distribution" not in data:
+            raise InstrumentValidationError(f"invalid diversity instrument: {path}")
+        if sum(data["distribution"]) != len(data["statements"]):
+            raise InstrumentValidationError("distribution sum must equal number of statements")
+        return data
+
+    def _schema_hint(self) -> str:
+        return json.dumps({
+            "placements": {s["statement_id"]: "<int in -3..+3>" for s in self.instrument["statements"]},
+            "likert_axes": {a["axis_id"]: "<int 1-7>" for a in self.instrument["likert_axes"]},
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        dist = self.instrument["distribution"]
+        buckets = list(range(-3, 4))
+        bucket_desc = ", ".join(f"{b}:{n}" for b, n in zip(buckets, dist))
+        lines = [
+            ("Ordnen Sie jede Aussage genau einer Box von -3 (lehne stark ab) bis +3 (stimme stark zu) zu. "
+             f"Die Verteilung ist erzwungen: {bucket_desc}.") if self.language == "de" else
+            ("Place every statement into exactly one box from -3 (strongly disagree) to +3 (strongly agree). "
+             f"The distribution is forced: {bucket_desc}."),
+            "",
+            "Statements:",
+        ]
+        for s in self.instrument["statements"]:
+            txt = s["de"] if self.language == "de" else s["en"]
+            lines.append(f"- [{s['statement_id']}] {txt}")
+        lines += ["", "Then rate each axis from 1 to 7:"]
+        for a in self.instrument["likert_axes"]:
+            txt = a["de"] if self.language == "de" else a["en"]
+            lines.append(f"- [{a['axis_id']}] {txt}")
+        return "\n".join(lines)
+
+    def _validator(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        placements = raw.get("placements", {})
+        axes = raw.get("likert_axes", {})
+        statements = {s["statement_id"] for s in self.instrument["statements"]}
+        if set(placements.keys()) != statements: return None
+        dist = self.instrument["distribution"]
+        target = {b: n for b, n in zip(range(-3, 4), dist)}
+        got: dict[int, int] = {}
+        for v in placements.values():
+            if not isinstance(v, int) or not -3 <= v <= 3: return None
+            got[v] = got.get(v, 0) + 1
+        if got != target: return None
+        for a in self.instrument["likert_axes"]:
+            v = axes.get(a["axis_id"])
+            if not isinstance(v, int) or not 1 <= v <= 7: return None
+        return raw
+
+    def administer(self, persona: PersonaRecord) -> QSortResponse:
+        raw = self.interviewer.ask_in_character(
+            persona,
+            user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(),
+            validate=self._validator,
+        )
+        return QSortResponse(
+            agent_id=persona.agent_id,
+            placements={k: int(v) for k, v in raw["placements"].items()},
+            likert_axes={k: int(v) for k, v in raw["likert_axes"].items()},
+        )
+
+def _vectorize(r: QSortResponse, statements: list[str], axes: list[str]) -> np.ndarray:
+    return np.array(
+        [r.placements.get(s, 0) for s in statements] +
+        [r.likert_axes.get(a, 4) for a in axes],
+        dtype=float,
+    )
+
+def run_typology(responses: list[QSortResponse], n_clusters: int = 4) -> dict:
+    if not responses:
+        return {"n": 0, "clusters": [], "pca": {"components": [], "explained_variance": []}}
+    statements = sorted({k for r in responses for k in r.placements})
+    axes = sorted({k for r in responses for k in r.likert_axes})
+    X = np.vstack([_vectorize(r, statements, axes) for r in responses])
+    n_clusters = min(n_clusters, len(responses))
+    pca = PCA(n_components=min(5, X.shape[1], X.shape[0]))
+    pcs = pca.fit_transform(X)
+    km = KMeans(n_clusters=n_clusters, n_init=10, random_state=0)
+    labels = km.fit_predict(X)
+    clusters = []
+    for c in range(n_clusters):
+        members = [responses[i].agent_id for i in range(len(responses)) if labels[i] == c]
+        centroid = km.cluster_centers_[c]
+        clusters.append({
+            "cluster_id": int(c),
+            "n": len(members),
+            "agent_ids": members,
+            "top_loadings": {
+                statements[i] if i < len(statements) else axes[i - len(statements)]: float(centroid[i])
+                for i in np.argsort(np.abs(centroid))[::-1][:8].tolist()
+            },
+        })
+    return {
+        "n": len(responses),
+        "clusters": clusters,
+        "pca": {
+            "components": pcs.tolist(),
+            "explained_variance": pca.explained_variance_ratio_.tolist(),
+            "agent_ids": [r.agent_id for r in responses],
+        },
+    }
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_diversity.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/scripts/instruments/diversity_v1.yaml backend/app/services/interviews/diversity.py backend/tests/interviews/test_diversity.py
+git commit -m "feat(interviews): diversity subagent with Q-sort + 6 Likert axes + PCA/k-means typology"
+```
+
+---
+
+### Task 8: Delphi subagent (three rounds)
+
+**Files:**
+- Create: `backend/scripts/instruments/delphi_v1.yaml`
+- Create: `backend/app/services/interviews/delphi.py`
+- Test: `backend/tests/interviews/test_delphi.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_delphi.py
+from pathlib import Path
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.delphi import (
+    DelphiSubagent, extract_themes, convergence_metrics,
+)
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "delphi_v1.yaml"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _R1LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"answers": {
+            "q1": "Klimawandel, Quoten, Generationswechsel",
+            "q2": "MSC, Aquakultur",
+            "q3": "Russland, EU-Politik",
+            "q4": "Verbraucherpreise",
+        }}
+
+class _R2LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"ratings": {f"theme_{i}": {"importance": 4, "plausibility": 3} for i in range(5)}}
+
+class _ExtractLLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"themes": [
+            {"theme_id": "theme_0", "label": "Klimawandel"},
+            {"theme_id": "theme_1", "label": "Quoten"},
+            {"theme_id": "theme_2", "label": "MSC"},
+            {"theme_id": "theme_3", "label": "EU-Politik"},
+            {"theme_id": "theme_4", "label": "Generationswechsel"},
+        ]}
+
+def test_delphi_round1_open():
+    sub = DelphiSubagent(llm=_R1LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=2, name="A", persona="p")
+    resp = sub.administer_round1(persona)
+    assert resp.round == 1
+    assert len(resp.answers) == 4
+
+def test_extract_themes_aggregates():
+    from app.models.interview import DelphiOpenResponse
+    r1 = [DelphiOpenResponse(agent_id=i, answers={"q1": "Klimawandel", "q2": "MSC"}) for i in range(3)]
+    themes = extract_themes(r1, llm=_ExtractLLM())
+    assert len(themes) == 5
+    assert all("theme_id" in t for t in themes)
+
+def test_convergence_metrics():
+    from app.models.interview import DelphiRatingResponse
+    r2 = [DelphiRatingResponse(agent_id=i, round=2,
+            ratings={"t1": {"importance": 3, "plausibility": 3}}) for i in range(5)]
+    r3 = [DelphiRatingResponse(agent_id=i, round=3,
+            ratings={"t1": {"importance": 4, "plausibility": 4}}) for i in range(5)]
+    conv = convergence_metrics(r2, r3)
+    assert "t1" in conv
+    assert conv["t1"]["delta_iqr_importance"] is not None
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_delphi.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Create instrument YAML**
+
+`backend/scripts/instruments/delphi_v1.yaml`:
+```yaml
+name: delphi_v1
+version: "1.0"
+language_default: de
+rounds: 3
+questions:
+  - {question_id: q1, de: "Welche drei Faktoren werden die deutsche Fischerei bis 2040 am stärksten prägen?", en: "Which three factors will most shape German fisheries by 2040?"}
+  - {question_id: q2, de: "Welche Akteurinnen und Akteure sind heute entscheidend, werden aber unterschätzt?", en: "Which actors are decisive today but underestimated?"}
+  - {question_id: q3, de: "Was sollte sich in den nächsten fünf Jahren ändern, damit die Fischerei eine Zukunft hat?", en: "What should change in the next five years for fisheries to have a future?"}
+  - {question_id: q4, de: "Welcher Trend macht Ihnen am meisten Hoffnung – und welcher am meisten Sorge?", en: "Which trend gives you most hope — and which most concern?"}
+```
+
+- [ ] **Step 4: Implement subagent**
+
+`backend/app/services/interviews/delphi.py`:
+```python
+from __future__ import annotations
+import json
+import statistics
+from pathlib import Path
+from typing import Optional
+import yaml
+from app.models.interview import (
+    DelphiOpenResponse, DelphiRatingResponse,
+)
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
+
+class DelphiSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        with Path(instrument_path).open("r", encoding="utf-8") as f:
+            self.instrument = yaml.safe_load(f)
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.llm = llm
+        self.language = language
+
+    # --- Round 1: open questions ---
+    def _r1_schema(self) -> str:
+        return json.dumps({
+            "answers": {q["question_id"]: "<string>" for q in self.instrument["questions"]}
+        }, ensure_ascii=False)
+
+    def _r1_prompt(self) -> str:
+        lines = ["Bitte beantworten Sie offen:" if self.language == "de" else "Please answer openly:"]
+        for q in self.instrument["questions"]:
+            txt = q["de"] if self.language == "de" else q["en"]
+            lines.append(f"[{q['question_id']}] {txt}")
+        return "\n".join(lines)
+
+    def _r1_validate(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        ans = raw.get("answers")
+        if not isinstance(ans, dict): return None
+        required = {q["question_id"] for q in self.instrument["questions"]}
+        if not required.issubset(ans.keys()): return None
+        return raw
+
+    def administer_round1(self, persona: PersonaRecord) -> DelphiOpenResponse:
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._r1_prompt(),
+            schema_hint=self._r1_schema(), validate=self._r1_validate,
+        )
+        return DelphiOpenResponse(agent_id=persona.agent_id, round=1,
+                                  answers={k: str(v) for k, v in raw["answers"].items()})
+
+    # --- Round 2: rate themes ---
+    def _r2_schema(self, theme_ids: list[str]) -> str:
+        return json.dumps({
+            "ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids}
+        }, ensure_ascii=False)
+
+    def _r2_prompt(self, themes: list[dict]) -> str:
+        head = "Bewerten Sie jedes Thema nach Wichtigkeit (1-5) und Plausibilität (1-5):" if self.language == "de" \
+               else "Rate each theme on importance (1-5) and plausibility (1-5):"
+        body = [f"- [{t['theme_id']}] {t['label']}" for t in themes]
+        return head + "\n" + "\n".join(body)
+
+    def _r2_validate(self, theme_ids: list[str]):
+        def v(raw: dict) -> Optional[dict]:
+            if not isinstance(raw, dict): return None
+            ratings = raw.get("ratings", {})
+            if set(ratings.keys()) != set(theme_ids): return None
+            for tid, r in ratings.items():
+                if not isinstance(r, dict): return None
+                for key in ("importance", "plausibility"):
+                    if not isinstance(r.get(key), int) or not 1 <= r[key] <= 5: return None
+            return raw
+        return v
+
+    def administer_round2(self, persona: PersonaRecord, themes: list[dict]) -> DelphiRatingResponse:
+        theme_ids = [t["theme_id"] for t in themes]
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._r2_prompt(themes),
+            schema_hint=self._r2_schema(theme_ids), validate=self._r2_validate(theme_ids),
+        )
+        return DelphiRatingResponse(agent_id=persona.agent_id, round=2,
+                                    ratings={k: dict(v) for k, v in raw["ratings"].items()})
+
+    # --- Round 3: revise after seeing group stats ---
+    def administer_round3(
+        self, persona: PersonaRecord, themes: list[dict], group_stats: dict, own_r2: DelphiRatingResponse
+    ) -> DelphiRatingResponse:
+        theme_ids = [t["theme_id"] for t in themes]
+        head = ("Sie sehen unten die anonymisierten Gruppenwerte (Median, IQR). "
+                "Bitte überarbeiten Sie Ihre Bewertungen, wenn Sie möchten, und begründen Sie kurz.") \
+               if self.language == "de" else \
+               ("Below are the anonymised group values (median, IQR). "
+                "Please revise your ratings if you wish and add a short justification.")
+        ctx_lines = []
+        for t in themes:
+            tid = t["theme_id"]
+            gs = group_stats.get(tid, {})
+            own = own_r2.ratings.get(tid, {})
+            ctx_lines.append(
+                f"[{tid}] {t['label']} — group importance median={gs.get('imp_median')}, "
+                f"IQR={gs.get('imp_iqr')}; plausibility median={gs.get('plaus_median')}, "
+                f"IQR={gs.get('plaus_iqr')}. Your R2: imp={own.get('importance')}, plaus={own.get('plausibility')}."
+            )
+        prompt = head + "\n\n" + "\n".join(ctx_lines)
+        schema = json.dumps({
+            "ratings": {tid: {"importance": "<int 1-5>", "plausibility": "<int 1-5>"} for tid in theme_ids},
+            "justification": "<string>",
+        }, ensure_ascii=False)
+        def validate(raw):
+            if not isinstance(raw, dict): return None
+            ratings = raw.get("ratings", {})
+            if set(ratings.keys()) != set(theme_ids): return None
+            for r in ratings.values():
+                if not isinstance(r, dict): return None
+                for key in ("importance", "plausibility"):
+                    if not isinstance(r.get(key), int) or not 1 <= r[key] <= 5: return None
+            return raw
+        raw = self.interviewer.ask_in_character(persona, user_prompt=prompt,
+                                                schema_hint=schema, validate=validate)
+        return DelphiRatingResponse(
+            agent_id=persona.agent_id, round=3,
+            ratings={k: dict(v) for k, v in raw["ratings"].items()},
+            justification=raw.get("justification"),
+        )
+
+def extract_themes(round1: list[DelphiOpenResponse], llm) -> list[dict]:
+    text_blocks = []
+    for r in round1:
+        for qid, ans in r.answers.items():
+            text_blocks.append(f"[agent {r.agent_id} {qid}] {ans}")
+    schema = json.dumps({"themes": [{"theme_id": "<string>", "label": "<short string>"}]}, ensure_ascii=False)
+    messages = [
+        {"role": "system", "content":
+            "You extract distinct thematic codes from open-ended German fisheries survey responses. "
+            f"Return JSON ONLY matching: {schema}. Use stable theme_ids of form theme_0, theme_1, …"},
+        {"role": "user", "content": "Responses:\n" + "\n".join(text_blocks) + "\n\nReturn up to 12 distinct themes."},
+    ]
+    raw = llm.chat_json(messages=messages, temperature=0.0)
+    themes = raw.get("themes", []) if isinstance(raw, dict) else []
+    out = []
+    for i, t in enumerate(themes):
+        if isinstance(t, dict) and "label" in t:
+            out.append({"theme_id": t.get("theme_id") or f"theme_{i}", "label": str(t["label"])})
+    return out
+
+def _iqr(xs: list[float]) -> float:
+    if not xs: return 0.0
+    xs = sorted(xs)
+    q1 = statistics.quantiles(xs, n=4)[0] if len(xs) >= 4 else xs[0]
+    q3 = statistics.quantiles(xs, n=4)[2] if len(xs) >= 4 else xs[-1]
+    return q3 - q1
+
+def convergence_metrics(r2: list[DelphiRatingResponse], r3: list[DelphiRatingResponse]) -> dict:
+    by_r2 = {r.agent_id: r for r in r2}
+    by_r3 = {r.agent_id: r for r in r3}
+    themes: set[str] = set()
+    for r in r2 + r3:
+        themes.update(r.ratings.keys())
+    out: dict[str, dict] = {}
+    for t in sorted(themes):
+        imp_r2 = [by_r2[a].ratings[t]["importance"] for a in by_r2 if t in by_r2[a].ratings]
+        imp_r3 = [by_r3[a].ratings[t]["importance"] for a in by_r3 if t in by_r3[a].ratings]
+        plaus_r2 = [by_r2[a].ratings[t]["plausibility"] for a in by_r2 if t in by_r2[a].ratings]
+        plaus_r3 = [by_r3[a].ratings[t]["plausibility"] for a in by_r3 if t in by_r3[a].ratings]
+        out[t] = {
+            "imp_median_r2": statistics.median(imp_r2) if imp_r2 else None,
+            "imp_median_r3": statistics.median(imp_r3) if imp_r3 else None,
+            "imp_iqr_r2": _iqr(imp_r2),
+            "imp_iqr_r3": _iqr(imp_r3),
+            "delta_iqr_importance": _iqr(imp_r3) - _iqr(imp_r2),
+            "plaus_iqr_r2": _iqr(plaus_r2),
+            "plaus_iqr_r3": _iqr(plaus_r3),
+            "delta_iqr_plausibility": _iqr(plaus_r3) - _iqr(plaus_r2),
+        }
+    return out
+
+def group_stats_from_r2(r2: list[DelphiRatingResponse]) -> dict:
+    themes: set[str] = set()
+    for r in r2: themes.update(r.ratings.keys())
+    stats: dict[str, dict] = {}
+    for t in themes:
+        imps = [r.ratings[t]["importance"] for r in r2 if t in r.ratings]
+        plauss = [r.ratings[t]["plausibility"] for r in r2 if t in r.ratings]
+        stats[t] = {
+            "imp_median": statistics.median(imps) if imps else None,
+            "imp_iqr": _iqr(imps),
+            "plaus_median": statistics.median(plauss) if plauss else None,
+            "plaus_iqr": _iqr(plauss),
+        }
+    return stats
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_delphi.py -v`
+Expected: 3 passed.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/scripts/instruments/delphi_v1.yaml backend/app/services/interviews/delphi.py backend/tests/interviews/test_delphi.py
+git commit -m "feat(interviews): Delphi subagent (3 rounds: open, rate, revise) + convergence metrics"
+```
+
+---
+
+### Task 9: Scenario subagent
+
+**Files:**
+- Create: `backend/scripts/instruments/scenario_v1.yaml`
+- Create: `backend/app/services/interviews/scenario.py`
+- Test: `backend/tests/interviews/test_scenario.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_scenario.py
+from pathlib import Path
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
+
+INSTRUMENT = Path(__file__).resolve().parents[2] / "scripts" / "instruments" / "scenario_v1.yaml"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        return {"ratings": {sid: {
+            "desirability": 4, "plausibility": 3, "impact_on_my_group": 5, "fairness": 3,
+            "if_woke_up_response": f"act-on-{sid}",
+        } for sid in ("S1", "S2", "S3", "S4")}}
+
+def test_scenario_administer():
+    sub = ScenarioSubagent(llm=_LLM(), memory=_Mem(), instrument_path=INSTRUMENT)
+    persona = PersonaRecord(agent_id=1, name="A", persona="p")
+    resp = sub.administer(persona)
+    assert set(resp.ratings.keys()) == {"S1", "S2", "S3", "S4"}
+    assert resp.ratings["S1"].desirability == 4
+
+def test_polarity_matrix():
+    from app.models.interview import ScenarioResponse, ScenarioRating
+    responses = [ScenarioResponse(agent_id=i, ratings={
+        "S1": ScenarioRating(desirability=5, plausibility=4, impact_on_my_group=5, fairness=4,
+                              if_woke_up_response="x"),
+    }) for i in range(3)]
+    m = polarity_matrix(responses)
+    assert "S1" in m
+    assert m["S1"]["mean_desirability"] == 5
+    assert m["S1"]["n"] == 3
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_scenario.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Create instrument YAML**
+
+`backend/scripts/instruments/scenario_v1.yaml`:
+```yaml
+name: scenario_v1
+version: "1.0"
+language_default: de
+scenarios:
+  - scenario_id: S1
+    label_de: "Erholung 2040"
+    label_en: "Recovery 2040"
+    description_de: |
+      Bis 2040 haben sich Dorsch- und Heringsbestände in der westlichen Ostsee
+      deutlich erholt. MSC-Zertifizierung ist branchenweit Standard. Die kleine
+      Küstenfischerei hat sich stabilisiert; die Politik gilt als erfolgreich.
+    description_en: |
+      By 2040, Western Baltic cod and herring stocks have substantially recovered.
+      MSC certification is industry-wide standard. Small-scale coastal fisheries
+      have stabilised; policy is regarded as successful.
+  - scenario_id: S2
+    label_de: "Kollaps 2040"
+    label_en: "Collapse 2040"
+    description_de: |
+      Bis 2040 sind Dorsch- und Heringsbestände zusammengebrochen. Die Flotte
+      ist halbiert, Aquakultur dominiert den Markt, Häfen veröden.
+    description_en: |
+      By 2040, cod and herring stocks have collapsed. The fleet is halved,
+      aquaculture dominates the market, harbour towns decline.
+  - scenario_id: S3
+    label_de: "Festung Europa 2040"
+    label_en: "Fortress Europe 2040"
+    description_de: |
+      Bis 2040 verfolgt die EU eine protektionistische Politik mit hohen Importzöllen,
+      Meeresschutzgebiete bedecken 30% der Ostsee, Sportfischerei ist stark eingeschränkt.
+    description_en: |
+      By 2040, the EU pursues a protectionist policy with high import tariffs,
+      MPAs cover 30% of the Baltic, recreational fishing is strongly curtailed.
+  - scenario_id: S4
+    label_de: "Privatisierung 2040"
+    label_en: "Privatisation 2040"
+    description_de: |
+      Bis 2040 sind Fangrechte als handelbare Quoten (ITQs) etabliert. Die Branche
+      hat sich konsolidiert; nur große, kapitalstarke Unternehmen sind übrig.
+    description_en: |
+      By 2040, fishing rights are tradable quotas (ITQs). The industry has
+      consolidated; only large, well-capitalised firms remain.
+dimensions:
+  - {dimension_id: desirability, scale: 7,
+     de: "Wie wünschenswert ist dieses Szenario?", en: "How desirable is this scenario?"}
+  - {dimension_id: plausibility, scale: 7,
+     de: "Wie plausibel ist dieses Szenario?",   en: "How plausible is this scenario?"}
+  - {dimension_id: impact_on_my_group, scale: 7,
+     de: "Wie stark trifft es Ihre Gruppe?",     en: "How strongly does it affect your group?"}
+  - {dimension_id: fairness, scale: 7,
+     de: "Wie fair ist dieses Szenario?",        en: "How fair is this scenario?"}
+```
+
+- [ ] **Step 4: Implement subagent**
+
+`backend/app/services/interviews/scenario.py`:
+```python
+from __future__ import annotations
+import json
+import statistics
+from pathlib import Path
+from typing import Optional
+import yaml
+from app.models.interview import ScenarioRating, ScenarioResponse
+from app.services.interviews.base import StakeholderInterviewer, PersonaRecord
+
+class ScenarioSubagent:
+    def __init__(self, llm, memory, instrument_path: Path, language: str = "de"):
+        with Path(instrument_path).open("r", encoding="utf-8") as f:
+            self.instrument = yaml.safe_load(f)
+        self.interviewer = StakeholderInterviewer(llm=llm, memory=memory, language=language)
+        self.language = language
+
+    def _schema_hint(self) -> str:
+        sids = [s["scenario_id"] for s in self.instrument["scenarios"]]
+        return json.dumps({
+            "ratings": {sid: {
+                "desirability": "<int 1-7>",
+                "plausibility": "<int 1-7>",
+                "impact_on_my_group": "<int 1-7>",
+                "fairness": "<int 1-7>",
+                "if_woke_up_response": "<string>",
+            } for sid in sids}
+        }, ensure_ascii=False)
+
+    def _user_prompt(self) -> str:
+        head = ("Bewerten Sie jedes der folgenden Szenarien auf vier Dimensionen (1-7) "
+                "und beantworten Sie kurz, was Sie tun würden, wenn Sie in dieser Welt aufwachten.") \
+               if self.language == "de" else \
+               ("Rate each of the following scenarios on four dimensions (1-7) "
+                "and briefly answer what you would do if you woke up in this world.")
+        blocks = []
+        for s in self.instrument["scenarios"]:
+            label = s["label_de"] if self.language == "de" else s["label_en"]
+            desc = s["description_de"] if self.language == "de" else s["description_en"]
+            blocks.append(f"--- {s['scenario_id']}: {label} ---\n{desc}")
+        return head + "\n\n" + "\n\n".join(blocks)
+
+    def _validate(self, raw: dict) -> Optional[dict]:
+        if not isinstance(raw, dict): return None
+        sids = {s["scenario_id"] for s in self.instrument["scenarios"]}
+        ratings = raw.get("ratings", {})
+        if set(ratings.keys()) != sids: return None
+        for v in ratings.values():
+            if not isinstance(v, dict): return None
+            for k in ("desirability", "plausibility", "impact_on_my_group", "fairness"):
+                if not isinstance(v.get(k), int) or not 1 <= v[k] <= 7: return None
+            if not isinstance(v.get("if_woke_up_response", ""), str): return None
+        return raw
+
+    def administer(self, persona: PersonaRecord) -> ScenarioResponse:
+        raw = self.interviewer.ask_in_character(
+            persona, user_prompt=self._user_prompt(),
+            schema_hint=self._schema_hint(), validate=self._validate,
+        )
+        ratings = {sid: ScenarioRating(**v) for sid, v in raw["ratings"].items()}
+        return ScenarioResponse(agent_id=persona.agent_id, ratings=ratings)
+
+def polarity_matrix(responses: list[ScenarioResponse]) -> dict:
+    matrix: dict[str, dict] = {}
+    sids: set[str] = set()
+    for r in responses: sids.update(r.ratings.keys())
+    for sid in sorted(sids):
+        vals = [r.ratings[sid] for r in responses if sid in r.ratings]
+        if not vals:
+            matrix[sid] = {"n": 0}
+            continue
+        matrix[sid] = {
+            "n": len(vals),
+            "mean_desirability": statistics.mean(v.desirability for v in vals),
+            "mean_plausibility": statistics.mean(v.plausibility for v in vals),
+            "mean_impact": statistics.mean(v.impact_on_my_group for v in vals),
+            "mean_fairness": statistics.mean(v.fairness for v in vals),
+            "sd_desirability": statistics.pstdev([v.desirability for v in vals]) if len(vals) > 1 else 0.0,
+            "sd_plausibility": statistics.pstdev([v.plausibility for v in vals]) if len(vals) > 1 else 0.0,
+        }
+    return matrix
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_scenario.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/scripts/instruments/scenario_v1.yaml backend/app/services/interviews/scenario.py backend/tests/interviews/test_scenario.py
+git commit -m "feat(interviews): scenario subagent with 4 futures × 4 dimensions + polarity matrix"
+```
+
+---
+
+## Phase 3 — Storage and Zep
+
+### Task 10: Interview storage layout writer
+
+**Files:**
+- Create: `backend/app/services/interviews/storage.py`
+- Test: `backend/tests/interviews/test_storage.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_storage.py
+import json
+from pathlib import Path
+from app.models.interview import (
+    LikertResponse, InterviewPhase, SubagentKind,
+)
+from app.services.interviews.storage import InterviewStore
+
+def test_run_directory_layout(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
+    assert run_dir.exists()
+    assert run_dir.parent.name == "longitudinal"
+    assert run_dir.parent.parent.name == "T0"
+
+def test_append_response(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.LONGITUDINAL)
+    r = LikertResponse(agent_id=1, phase=InterviewPhase.T0,
+                       responses={"a": 3}, confidence={"a": 0.5})
+    store.append_response(run_dir, r)
+    contents = (run_dir / "responses.jsonl").read_text()
+    assert json.loads(contents.splitlines()[0])["agent_id"] == 1
+
+def test_write_aggregate_and_latest_pointer(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T1, subagent=SubagentKind.SCENARIO)
+    store.write_aggregate(run_dir, {"k": 1})
+    store.mark_latest(run_dir)
+    latest = (run_dir.parent / "latest.json").read_text()
+    assert json.loads(latest)["run_dir"].endswith(run_dir.name)
+
+def test_audit_log_append(tmp_path):
+    store = InterviewStore(root=tmp_path, sim_id="sim42")
+    run_dir = store.start_run(phase=InterviewPhase.T0, subagent=SubagentKind.DELPHI)
+    store.audit(run_dir, agent_id=7, event="schema_violation", detail="missing key x")
+    audit = (run_dir / "audit.jsonl").read_text()
+    assert "schema_violation" in audit
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_storage.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement storage**
+
+`backend/app/services/interviews/storage.py`:
+```python
+from __future__ import annotations
+import json
+import time
+import uuid
+from pathlib import Path
+from typing import Any
+from pydantic import BaseModel
+from app.models.interview import InterviewPhase, SubagentKind
+
+class InterviewStore:
+    def __init__(self, root: Path, sim_id: str):
+        self.base = Path(root) / "simulations" / sim_id / "interviews"
+        self.base.mkdir(parents=True, exist_ok=True)
+
+    def start_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path:
+        run_id = time.strftime("%Y%m%dT%H%M%S") + "-" + uuid.uuid4().hex[:6]
+        run_dir = self.base / phase.value / subagent.value / run_id
+        run_dir.mkdir(parents=True, exist_ok=True)
+        meta = {"run_id": run_id, "phase": phase.value, "subagent": subagent.value,
+                "created_at": time.time()}
+        (run_dir / "run.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")
+        return run_dir
+
+    def append_response(self, run_dir: Path, model: BaseModel) -> None:
+        path = run_dir / "responses.jsonl"
+        with path.open("a", encoding="utf-8") as f:
+            f.write(model.model_dump_json() + "\n")
+
+    def append_jsonl(self, run_dir: Path, filename: str, payload: dict | BaseModel) -> None:
+        path = run_dir / filename
+        with path.open("a", encoding="utf-8") as f:
+            if isinstance(payload, BaseModel):
+                f.write(payload.model_dump_json() + "\n")
+            else:
+                f.write(json.dumps(payload, ensure_ascii=False) + "\n")
+
+    def read_responses(self, run_dir: Path, filename: str = "responses.jsonl") -> list[dict]:
+        path = run_dir / filename
+        if not path.exists(): return []
+        return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
+
+    def write_aggregate(self, run_dir: Path, payload: dict) -> None:
+        (run_dir / "aggregate.json").write_text(
+            json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def write_named(self, run_dir: Path, name: str, payload: Any) -> None:
+        (run_dir / name).write_text(
+            json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    def audit(self, run_dir: Path, agent_id: int | None, event: str, detail: str = "") -> None:
+        entry = {"ts": time.time(), "agent_id": agent_id, "event": event, "detail": detail}
+        with (run_dir / "audit.jsonl").open("a", encoding="utf-8") as f:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+
+    def mark_latest(self, run_dir: Path) -> None:
+        pointer = run_dir.parent / "latest.json"
+        pointer.write_text(json.dumps({
+            "run_dir": str(run_dir.relative_to(self.base)),
+        }), encoding="utf-8")
+
+    def latest_run(self, phase: InterviewPhase, subagent: SubagentKind) -> Path | None:
+        pointer = self.base / phase.value / subagent.value / "latest.json"
+        if not pointer.exists(): return None
+        rel = json.loads(pointer.read_text())["run_dir"]
+        path = self.base / rel
+        return path if path.exists() else None
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_storage.py -v`
+Expected: 4 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interviews/storage.py backend/tests/interviews/test_storage.py
+git commit -m "feat(interviews): JSONL/JSON storage layout with run_id directories and latest pointer"
+```
+
+---
+
+### Task 11: Zep episode writer for interviews
+
+**Files:**
+- Create: `backend/app/services/interviews/zep_writer.py`
+- Test: `backend/tests/interviews/test_zep_writer.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_zep_writer.py
+from app.models.interview import (
+    LikertResponse, InterviewPhase, SubagentKind,
+)
+from app.services.interviews.zep_writer import InterviewZepWriter
+
+class _FakeMemoryUpdater:
+    def __init__(self):
+        self.events = []
+    def add_activity(self, activity):
+        self.events.append(activity)
+    def add_text_episode(self, graph_id, text):
+        self.events.append({"graph_id": graph_id, "text": text})
+
+def test_per_agent_episode_text():
+    upd = _FakeMemoryUpdater()
+    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
+    r = LikertResponse(agent_id=42, phase=InterviewPhase.T1,
+                       responses={"stk_1": 4, "gov_1": 3},
+                       confidence={"stk_1": 0.8, "gov_1": 0.7})
+    w.write_per_agent(SubagentKind.LONGITUDINAL, r, agent_name="Fischer Müller")
+    assert any("Fischer Müller" in str(e) for e in upd.events)
+    assert any("longitudinal/T1" in str(e) for e in upd.events)
+
+def test_aggregate_episode():
+    upd = _FakeMemoryUpdater()
+    w = InterviewZepWriter(memory_updater=upd, graph_id="g1")
+    w.write_aggregate(SubagentKind.SCENARIO, summary="S1 mean desirability 5.2; S2 mean 2.1")
+    assert any("S1 mean" in str(e) for e in upd.events)
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_zep_writer.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement writer**
+
+`backend/app/services/interviews/zep_writer.py`:
+```python
+from __future__ import annotations
+from typing import Any, Optional
+from app.models.interview import (
+    LikertResponse, QSortResponse, DelphiRatingResponse, ScenarioResponse, SubagentKind,
+)
+
+class InterviewZepWriter:
+    """Mirrors `ZepGraphMemoryUpdater.add_activity` usage but for interview episodes.
+
+    The real `ZepGraphMemoryUpdater` may expose `add_activity` (preferred) or a lower-level
+    text-episode method; this writer adapts to either via duck typing.
+    """
+    def __init__(self, memory_updater, graph_id: str):
+        self.updater = memory_updater
+        self.graph_id = graph_id
+
+    def _emit(self, text: str) -> None:
+        if hasattr(self.updater, "add_text_episode"):
+            self.updater.add_text_episode(self.graph_id, text)
+        elif hasattr(self.updater, "add_activity"):
+            self.updater.add_activity({"graph_id": self.graph_id, "text": text})
+        else:
+            raise RuntimeError("memory_updater has neither add_text_episode nor add_activity")
+
+    def _summarize_likert(self, r: LikertResponse) -> str:
+        mean_v = sum(r.responses.values()) / max(len(r.responses), 1)
+        top = sorted(r.responses.items(), key=lambda kv: -kv[1])[:3]
+        bot = sorted(r.responses.items(), key=lambda kv: kv[1])[:3]
+        return (f"mean={mean_v:.2f}; agrees with {[k for k,_ in top]}; "
+                f"disagrees with {[k for k,_ in bot]}")
+
+    def _summarize_qsort(self, r: QSortResponse) -> str:
+        plus = [k for k, v in r.placements.items() if v >= 2]
+        minus = [k for k, v in r.placements.items() if v <= -2]
+        return f"+strongly:{plus}; -strongly:{minus}"
+
+    def _summarize_scenario(self, r: ScenarioResponse) -> str:
+        parts = [f"{sid}: des={rt.desirability} plaus={rt.plausibility}"
+                 for sid, rt in r.ratings.items()]
+        return "; ".join(parts)
+
+    def write_per_agent(
+        self, subagent: SubagentKind, response: Any, agent_name: str,
+        phase: Optional[str] = None,
+    ) -> None:
+        if isinstance(response, LikertResponse):
+            phase = phase or response.phase.value
+            summary = self._summarize_likert(response)
+        elif isinstance(response, QSortResponse):
+            phase = phase or "T1"
+            summary = self._summarize_qsort(response)
+        elif isinstance(response, ScenarioResponse):
+            phase = phase or "T1"
+            summary = self._summarize_scenario(response)
+        elif isinstance(response, DelphiRatingResponse):
+            phase = phase or f"T1/R{response.round}"
+            summary = f"round={response.round}; {len(response.ratings)} themes rated"
+        else:
+            phase = phase or "T1"
+            summary = str(response)[:200]
+        text = f"Agent {agent_name} (interview/{subagent.value}/{phase}): {summary}"
+        self._emit(text)
+
+    def write_aggregate(self, subagent: SubagentKind, summary: str) -> None:
+        self._emit(f"Interview aggregate ({subagent.value}): {summary}")
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_zep_writer.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interviews/zep_writer.py backend/tests/interviews/test_zep_writer.py
+git commit -m "feat(interviews): Zep writer adapts add_activity/add_text_episode for per-agent + aggregate episodes"
+```
+
+---
+
+## Phase 4 — Orchestrator, lifecycle, synthesiser
+
+### Task 12: InterviewOrchestrator (parallel fan-out)
+
+**Files:**
+- Create: `backend/app/services/interview_orchestrator.py`
+- Test: `backend/tests/interviews/test_orchestrator.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_orchestrator.py
+from pathlib import Path
+import pytest
+from app.models.interview import InterviewPhase, SubagentKind
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+from app.services.interview_orchestrator import (
+    InterviewOrchestrator, PersonaProvider,
+)
+
+INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+class _Mem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text="x", available=True)
+
+class _LLM:
+    def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+        sys_text = next((m["content"] for m in messages if m["role"] == "system"), "")
+        if "longitudinal" in sys_text or "stk_" in (messages[-1].get("content") or ""):
+            return {
+                "responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                                             "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
+                "confidence": {}, "open_comment": "ok",
+            }
+        return {}
+
+class _Personas(PersonaProvider):
+    def __init__(self, n=3):
+        self._items = [PersonaRecord(agent_id=i, name=f"A{i}", persona="p") for i in range(n)]
+    def all(self): return list(self._items)
+
+class _NoopZep:
+    def write_per_agent(self, *a, **kw): pass
+    def write_aggregate(self, *a, **kw): pass
+
+def test_pre_phase_runs_longitudinal_only(tmp_path):
+    orch = InterviewOrchestrator(
+        llm=_LLM(), memory=_Mem(), personas=_Personas(3),
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim1",
+        zep_writer=_NoopZep(), max_workers=2,
+    )
+    result = orch.run_pre()
+    assert result["longitudinal"]["n_responded"] == 3
+    assert "diversity" not in result  # only longitudinal in pre-phase
+
+def test_partial_failure_does_not_kill_run(tmp_path):
+    class _FlakyLLM:
+        def __init__(self): self.n = 0
+        def chat_json(self, messages, temperature=0.0, max_tokens=None, **kw):
+            self.n += 1
+            if self.n % 2 == 0:
+                raise RuntimeError("simulated LLM 5xx")
+            return {
+                "responses": {k: 3 for k in ("stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                                             "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3")},
+                "confidence": {}, "open_comment": "ok",
+            }
+    orch = InterviewOrchestrator(
+        llm=_FlakyLLM(), memory=_Mem(), personas=_Personas(4),
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="sim2",
+        zep_writer=_NoopZep(), max_workers=1,
+    )
+    result = orch.run_pre()
+    assert result["longitudinal"]["n_responded"] < 4
+    assert result["longitudinal"]["n_failed"] > 0
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_orchestrator.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement orchestrator**
+
+`backend/app/services/interview_orchestrator.py`:
+```python
+from __future__ import annotations
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Protocol
+from app.models.interview import (
+    InterviewPhase, SubagentKind, LikertResponse, QSortResponse,
+    DelphiOpenResponse, DelphiRatingResponse, ScenarioResponse,
+)
+from app.services.interviews.base import PersonaRecord
+from app.services.interviews.longitudinal import LongitudinalSubagent, run_aggregate as longitudinal_aggregate
+from app.services.interviews.diversity import DiversitySubagent, run_typology
+from app.services.interviews.delphi import (
+    DelphiSubagent, extract_themes, convergence_metrics, group_stats_from_r2,
+)
+from app.services.interviews.scenario import ScenarioSubagent, polarity_matrix
+from app.services.interviews.storage import InterviewStore
+from app.services.interviews.instrument_loader import freeze_snapshot
+
+class PersonaProvider(Protocol):
+    def all(self) -> list[PersonaRecord]: ...
+
+class InterviewOrchestrator:
+    def __init__(
+        self, llm, memory, personas: PersonaProvider,
+        instrument_dir: Path, store_root: Path, sim_id: str,
+        zep_writer, max_workers: int = 8, language: str = "de",
+    ):
+        self.llm = llm
+        self.memory = memory
+        self.personas = personas
+        self.instrument_dir = Path(instrument_dir)
+        self.store = InterviewStore(root=store_root, sim_id=sim_id)
+        self.zep_writer = zep_writer
+        self.max_workers = max_workers
+        self.language = language
+        # Freeze snapshot once per orchestrator lifetime
+        freeze_snapshot(
+            instruments={
+                "longitudinal": self.instrument_dir / "longitudinal_v1.yaml",
+                "diversity":    self.instrument_dir / "diversity_v1.yaml",
+                "delphi":       self.instrument_dir / "delphi_v1.yaml",
+                "scenario":     self.instrument_dir / "scenario_v1.yaml",
+            },
+            out_path=self.store.base / "instruments_used.json",
+        )
+
+    # --- Generic per-agent runner ---
+    def _fan_out(self, run_dir, agent_fn, personas, audit_label):
+        ok: list = []
+        failed: list[int] = []
+        with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
+            futures = {pool.submit(agent_fn, p): p for p in personas}
+            for fut in as_completed(futures):
+                p = futures[fut]
+                try:
+                    out = fut.result()
+                    ok.append(out)
+                    self.store.append_response(run_dir, out)
+                except Exception as e:
+                    failed.append(p.agent_id)
+                    self.store.audit(run_dir, agent_id=p.agent_id,
+                                     event="agent_failed", detail=f"{audit_label}: {e!r}")
+        return ok, failed
+
+    # --- Pre-phase (T0) ---
+    def run_pre(self) -> dict:
+        sub = LongitudinalSubagent(self.llm, self.memory,
+                                   self.instrument_dir / "longitudinal_v1.yaml",
+                                   language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T0),
+            self.personas.all(), audit_label="longitudinal_T0",
+        )
+        for r in ok:
+            persona = next(p for p in self.personas.all() if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"longitudinal": {"n_responded": len(ok), "n_failed": len(failed),
+                                 "run_dir": str(run_dir)}}
+
+    # --- Post-phase (T1) ---
+    def run_post(self) -> dict:
+        personas = self.personas.all()
+        out: dict = {}
+        with ThreadPoolExecutor(max_workers=4) as pool:
+            futures = {
+                "longitudinal": pool.submit(self._post_longitudinal, personas),
+                "diversity":    pool.submit(self._post_diversity, personas),
+                "scenario":     pool.submit(self._post_scenario, personas),
+            }
+            for name, fut in futures.items():
+                try: out[name] = fut.result()
+                except Exception as e: out[name] = {"error": repr(e)}
+        # Delphi runs sequentially (R1 → R2 → R3) and uses the LLM for theme extraction
+        try: out["delphi"] = self._post_delphi(personas)
+        except Exception as e: out["delphi"] = {"error": repr(e)}
+        return out
+
+    def _post_longitudinal(self, personas) -> dict:
+        sub = LongitudinalSubagent(self.llm, self.memory,
+                                   self.instrument_dir / "longitudinal_v1.yaml",
+                                   language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p, phase=InterviewPhase.T1),
+            personas, audit_label="longitudinal_T1",
+        )
+        # Aggregate using T0 + T1
+        t0_path = self.store.latest_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        t0_raw = self.store.read_responses(t0_path) if t0_path else []
+        t0 = [LikertResponse(**d) for d in t0_raw]
+        agg = longitudinal_aggregate(t0, ok)
+        self.store.write_aggregate(run_dir, agg)
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.LONGITUDINAL, r, persona.name)
+            except Exception: pass
+        try: self.zep_writer.write_aggregate(SubagentKind.LONGITUDINAL,
+                                             f"n_paired={agg['n_paired']}")
+        except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_diversity(self, personas) -> dict:
+        sub = DiversitySubagent(self.llm, self.memory,
+                                self.instrument_dir / "diversity_v1.yaml",
+                                language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DIVERSITY)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p), personas, audit_label="diversity",
+        )
+        typology = run_typology(ok)
+        self.store.write_named(run_dir, "typology.json", typology)
+        self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
+                                             "clusters": typology["clusters"]})
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.DIVERSITY, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_scenario(self, personas) -> dict:
+        sub = ScenarioSubagent(self.llm, self.memory,
+                               self.instrument_dir / "scenario_v1.yaml",
+                               language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.SCENARIO)
+        ok, failed = self._fan_out(
+            run_dir, lambda p: sub.administer(p), personas, audit_label="scenario",
+        )
+        matrix = polarity_matrix(ok)
+        self.store.write_named(run_dir, "polarity_matrix.json", matrix)
+        self.store.write_aggregate(run_dir, {"n": len(ok), "n_failed": len(failed),
+                                             "polarity": matrix})
+        for r in ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.SCENARIO, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_responded": len(ok), "n_failed": len(failed), "run_dir": str(run_dir)}
+
+    def _post_delphi(self, personas) -> dict:
+        sub = DelphiSubagent(self.llm, self.memory,
+                             self.instrument_dir / "delphi_v1.yaml",
+                             language=self.language)
+        run_dir = self.store.start_run(InterviewPhase.T1, SubagentKind.DELPHI)
+        # Round 1
+        r1_ok, r1_failed = self._fan_out(
+            run_dir, lambda p: sub.administer_round1(p), personas, audit_label="delphi_r1",
+        )
+        # Move all R1 responses into a dedicated file
+        for r in r1_ok: self.store.append_jsonl(run_dir, "round1_themes.jsonl", r)
+        # Extract themes from R1
+        themes = extract_themes(r1_ok, llm=self.llm)
+        self.store.write_named(run_dir, "themes.json", {"themes": themes})
+        # Round 2
+        r2_ok, r2_failed = self._fan_out(
+            run_dir, lambda p: sub.administer_round2(p, themes),
+            [p for p in personas if p.agent_id in {r.agent_id for r in r1_ok}],
+            audit_label="delphi_r2",
+        )
+        for r in r2_ok: self.store.append_jsonl(run_dir, "round2_ratings.jsonl", r)
+        gstats = group_stats_from_r2(r2_ok)
+        # Round 3
+        r2_by = {r.agent_id: r for r in r2_ok}
+        r3_personas = [p for p in personas if p.agent_id in r2_by]
+        def r3_call(p): return sub.administer_round3(p, themes, gstats, r2_by[p.agent_id])
+        r3_ok, r3_failed = self._fan_out(run_dir, r3_call, r3_personas, audit_label="delphi_r3")
+        for r in r3_ok: self.store.append_jsonl(run_dir, "round3_revisions.jsonl", r)
+        # Convergence
+        conv = convergence_metrics(r2_ok, r3_ok)
+        self.store.write_named(run_dir, "convergence.json", conv)
+        self.store.write_aggregate(run_dir, {
+            "n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
+            "n_failed_r1": len(r1_failed), "n_failed_r2": len(r2_failed), "n_failed_r3": len(r3_failed),
+            "themes": themes,
+        })
+        for r in r3_ok:
+            persona = next(p for p in personas if p.agent_id == r.agent_id)
+            try: self.zep_writer.write_per_agent(SubagentKind.DELPHI, r, persona.name)
+            except Exception: pass
+        self.store.mark_latest(run_dir)
+        return {"n_r1": len(r1_ok), "n_r2": len(r2_ok), "n_r3": len(r3_ok),
+                "run_dir": str(run_dir)}
+
+    # --- Re-run a single subagent ---
+    def rerun(self, subagent: SubagentKind) -> dict:
+        personas = self.personas.all()
+        if subagent == SubagentKind.LONGITUDINAL: return {"longitudinal": self._post_longitudinal(personas)}
+        if subagent == SubagentKind.DIVERSITY:    return {"diversity":    self._post_diversity(personas)}
+        if subagent == SubagentKind.SCENARIO:     return {"scenario":     self._post_scenario(personas)}
+        if subagent == SubagentKind.DELPHI:       return {"delphi":       self._post_delphi(personas)}
+        raise ValueError(f"unknown subagent {subagent}")
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_orchestrator.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interview_orchestrator.py backend/tests/interviews/test_orchestrator.py
+git commit -m "feat(interviews): orchestrator with two-phase lifecycle, parallel fan-out, isolated failures"
+```
+
+---
+
+### Task 13: Simulation manager lifecycle hooks
+
+**Files:**
+- Modify: `backend/app/services/simulation_manager.py`
+- Test: `backend/tests/interviews/test_simulation_hooks.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_simulation_hooks.py
+from app.services.simulation_manager import SimulationManager, SimulationState
+
+def test_register_post_ready_hook_invoked(monkeypatch):
+    called = []
+    mgr = SimulationManager()
+    mgr.register_on_ready(lambda state: called.append(("ready", state.sim_id)))
+    state = SimulationState(sim_id="abc", status="ready")
+    mgr._notify_on_ready(state)
+    assert called == [("ready", "abc")]
+
+def test_register_post_completed_hook_invoked():
+    called = []
+    mgr = SimulationManager()
+    mgr.register_on_completed(lambda state: called.append(("done", state.sim_id)))
+    state = SimulationState(sim_id="abc", status="completed")
+    mgr._notify_on_completed(state)
+    assert called == [("done", "abc")]
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_simulation_hooks.py -v`
+Expected: AttributeError on `register_on_ready` / `register_on_completed`.
+
+- [ ] **Step 3: Add hook registry to SimulationManager**
+
+In `backend/app/services/simulation_manager.py`, find the `SimulationManager` class. Add to `__init__` (preserving existing init):
+```python
+        self._on_ready_hooks: list = []
+        self._on_completed_hooks: list = []
+```
+
+Add methods to the class:
+```python
+    def register_on_ready(self, fn) -> None:
+        self._on_ready_hooks.append(fn)
+
+    def register_on_completed(self, fn) -> None:
+        self._on_completed_hooks.append(fn)
+
+    def _notify_on_ready(self, state) -> None:
+        for fn in list(self._on_ready_hooks):
+            try: fn(state)
+            except Exception as e:
+                from app.utils.logger import get_logger
+                get_logger(__name__).warning(f"on_ready hook failed: {e!r}")
+
+    def _notify_on_completed(self, state) -> None:
+        for fn in list(self._on_completed_hooks):
+            try: fn(state)
+            except Exception as e:
+                from app.utils.logger import get_logger
+                get_logger(__name__).warning(f"on_completed hook failed: {e!r}")
+```
+
+Locate the existing code that transitions state to `ready` (after `prepare_simulation` completes) and to `completed` (after simulation finishes). Insert calls to `self._notify_on_ready(state)` and `self._notify_on_completed(state)` immediately after each transition. If `SimulationState` is not a simple dataclass with `sim_id` and `status` attributes, adjust the test fixture to match the actual class shape (read the file first).
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_simulation_hooks.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/simulation_manager.py backend/tests/interviews/test_simulation_hooks.py
+git commit -m "feat(interviews): on_ready / on_completed hook registry on SimulationManager"
+```
+
+---
+
+### Task 14: InterviewSynthesizer
+
+**Files:**
+- Create: `backend/app/services/interview_synthesizer.py`
+- Test: `backend/tests/interviews/test_synthesizer.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_synthesizer.py
+import json
+from pathlib import Path
+from app.services.interviews.storage import InterviewStore
+from app.models.interview import InterviewPhase, SubagentKind, LikertResponse
+from app.services.interview_synthesizer import InterviewSynthesizer
+
+def _seed_minimal(tmp_path: Path) -> InterviewStore:
+    store = InterviewStore(root=tmp_path, sim_id="s1")
+    rd = store.start_run(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+    for i in range(3):
+        store.append_response(rd, LikertResponse(
+            agent_id=i, phase=InterviewPhase.T0,
+            responses={"stk_1": 3, "gov_1": 3}, confidence={"stk_1": 0.5, "gov_1": 0.5},
+        ))
+    store.write_aggregate(rd, {"per_item": {}, "n_paired": 0})
+    store.mark_latest(rd)
+    return store
+
+def test_synthesizer_runs_with_partial_data(tmp_path):
+    store = _seed_minimal(tmp_path)
+    synth = InterviewSynthesizer(store=store)
+    report = synth.run()
+    assert "limitations" in report.lower()
+    assert "stub mode" in report.lower() or "n_responded" in report.lower()
+
+def test_synthesizer_writes_files(tmp_path):
+    store = _seed_minimal(tmp_path)
+    synth = InterviewSynthesizer(store=store)
+    synth.run()
+    files = list((store.base / "synthesis").iterdir())
+    names = {f.name for f in files}
+    assert "report.md" in names
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_synthesizer.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement synthesiser**
+
+`backend/app/services/interview_synthesizer.py`:
+```python
+from __future__ import annotations
+import csv
+import json
+from pathlib import Path
+from app.models.interview import InterviewPhase, SubagentKind
+from app.services.interviews.storage import InterviewStore
+
+class InterviewSynthesizer:
+    def __init__(self, store: InterviewStore):
+        self.store = store
+
+    def _maybe(self, phase: InterviewPhase, sub: SubagentKind) -> dict | None:
+        run = self.store.latest_run(phase, sub)
+        if run is None: return None
+        agg = run / "aggregate.json"
+        if not agg.exists(): return None
+        return {"run_dir": str(run), "aggregate": json.loads(agg.read_text(encoding="utf-8"))}
+
+    def _instrument_hashes(self) -> dict:
+        snap = self.store.base / "instruments_used.json"
+        if not snap.exists(): return {}
+        try: data = json.loads(snap.read_text(encoding="utf-8"))
+        except Exception: return {}
+        return {k: v.get("hash") for k, v in data.items()}
+
+    def _limitations_text(self, present: dict[str, bool]) -> str:
+        lines = [
+            "## Limitations",
+            "- **Simulated, not real stakeholders.** Responses reflect how the seed-document discourse "
+            "and the LLM jointly encode each stakeholder type, not what an actual fisher or NGO "
+            "staffer would say. The instrument measures the *model of the stakeholder*, not the stakeholder.",
+            "- **Memory digest is lossy.** Each agent's experience of OASIS is summarised to bounded length; "
+            "agents do not have full episodic recall.",
+            "- **LLM acquiescence and centrality bias.** Likert scales with LLM respondents skew toward 3–4 "
+            "of 5; check per-item distribution shape before drawing conclusions.",
+            "- **N is what it is.** `n_responded` and `n_failed` are printed verbatim per subagent; no smoothing.",
+            "- **Instrument provenance.** Hashes of frozen instruments are listed below; an identical run "
+            "is reproducible from these snapshots.",
+        ]
+        for k, ok in present.items():
+            if not ok:
+                lines.append(f"- *{k}* subagent results are missing for this run.")
+        return "\n".join(lines)
+
+    def run(self) -> str:
+        sections: list[str] = []
+        sections.append("# Stakeholder Interview Synthesis\n")
+
+        long_t0 = self._maybe(InterviewPhase.T0, SubagentKind.LONGITUDINAL)
+        long_t1 = self._maybe(InterviewPhase.T1, SubagentKind.LONGITUDINAL)
+        if long_t1:
+            agg = long_t1["aggregate"]
+            sections.append("## Longitudinal opinion drift (T0 → T1)")
+            sections.append(f"- N paired: {agg.get('n_paired', 'NA')}")
+            per_item = agg.get("per_item", {})
+            top = sorted(per_item.items(),
+                         key=lambda kv: abs(kv[1].get("mean_delta") or 0), reverse=True)[:5]
+            sections.append("- Largest mean shifts:")
+            for k, v in top:
+                sections.append(f"  - `{k}`: Δ̄ = {v.get('mean_delta'):+0.2f}  (n={v.get('n')})")
+
+        diversity = self._maybe(InterviewPhase.T1, SubagentKind.DIVERSITY)
+        if diversity:
+            clusters = diversity["aggregate"].get("clusters", [])
+            sections.append("## Stakeholder typology")
+            sections.append(f"- N agents: {diversity['aggregate'].get('n', 'NA')}")
+            sections.append(f"- Clusters: {len(clusters)}")
+            for c in clusters:
+                sections.append(f"  - cluster {c['cluster_id']}: n={c['n']}, "
+                                f"top loadings = {list(c['top_loadings'].keys())[:5]}")
+
+        delphi = self._maybe(InterviewPhase.T1, SubagentKind.DELPHI)
+        if delphi:
+            agg = delphi["aggregate"]
+            sections.append("## Delphi consensus")
+            sections.append(f"- Rounds completed: R1={agg.get('n_r1')}, R2={agg.get('n_r2')}, R3={agg.get('n_r3')}")
+            themes = agg.get("themes", [])
+            sections.append(f"- Themes: {[t.get('label') for t in themes]}")
+
+        scenario = self._maybe(InterviewPhase.T1, SubagentKind.SCENARIO)
+        if scenario:
+            pol = scenario["aggregate"].get("polarity", {})
+            sections.append("## Scenario evaluation")
+            for sid in sorted(pol):
+                v = pol[sid]
+                if v.get("n", 0) == 0: continue
+                sections.append(
+                    f"- **{sid}**: n={v['n']}, desirability {v['mean_desirability']:.2f}, "
+                    f"plausibility {v['mean_plausibility']:.2f}, impact {v['mean_impact']:.2f}, "
+                    f"fairness {v['mean_fairness']:.2f}")
+
+        sections.append("")
+        sections.append(self._limitations_text({
+            "longitudinal": bool(long_t1),
+            "diversity":    bool(diversity),
+            "delphi":       bool(delphi),
+            "scenario":     bool(scenario),
+        }))
+        sections.append("")
+        sections.append("### Instrument provenance")
+        for name, h in self._instrument_hashes().items():
+            sections.append(f"- `{name}`: hash `{h}`")
+
+        report = "\n\n".join(sections)
+        out_dir = self.store.base / "synthesis"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "report.md").write_text(report, encoding="utf-8")
+        self._write_tidy_csv(out_dir / "exports" / "all_responses.csv")
+        return report
+
+    def _write_tidy_csv(self, csv_path: Path) -> None:
+        csv_path.parent.mkdir(parents=True, exist_ok=True)
+        rows: list[dict] = []
+        for phase in (InterviewPhase.T0, InterviewPhase.T1):
+            for sub in SubagentKind:
+                run = self.store.latest_run(phase, sub)
+                if run is None: continue
+                files = ["responses.jsonl", "round1_themes.jsonl",
+                         "round2_ratings.jsonl", "round3_revisions.jsonl"]
+                for fname in files:
+                    for rec in self.store.read_responses(run, fname):
+                        flat = self._flatten(rec, phase=phase.value, subagent=sub.value)
+                        rows.extend(flat)
+        if not rows:
+            csv_path.write_text("phase,subagent,agent_id,key,value\n", encoding="utf-8")
+            return
+        fieldnames = sorted({k for r in rows for k in r.keys()})
+        with csv_path.open("w", encoding="utf-8", newline="") as f:
+            w = csv.DictWriter(f, fieldnames=fieldnames)
+            w.writeheader()
+            for r in rows: w.writerow(r)
+
+    def _flatten(self, rec: dict, *, phase: str, subagent: str) -> list[dict]:
+        out: list[dict] = []
+        aid = rec.get("agent_id")
+        for key, val in rec.items():
+            if key == "agent_id": continue
+            if isinstance(val, dict):
+                for k2, v2 in val.items():
+                    if isinstance(v2, dict):
+                        for k3, v3 in v2.items():
+                            out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                                        "key": f"{key}.{k2}.{k3}", "value": v3})
+                    else:
+                        out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                                    "key": f"{key}.{k2}", "value": v2})
+            else:
+                out.append({"phase": phase, "subagent": subagent, "agent_id": aid,
+                            "key": key, "value": val})
+        return out
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_synthesizer.py -v`
+Expected: 2 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interview_synthesizer.py backend/tests/interviews/test_synthesizer.py
+git commit -m "feat(interviews): synthesiser emits cross-method report + tidy CSV + limitations section"
+```
+
+---
+
+## Phase 5 — Adapters and API
+
+### Task 15: Persona + memory adapters
+
+**Files:**
+- Create: `backend/app/services/interviews/adapters.py`
+- Test: `backend/tests/interviews/test_adapters.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_adapters.py
+import csv
+import json
+from pathlib import Path
+from app.services.interviews.adapters import (
+    FileSystemPersonaProvider, ZepMemoryProvider,
+)
+
+def _write_reddit_profiles(tmp_path: Path):
+    data = [
+        {"user_id": 0, "user_name": "fischer1", "name": "Fischer Müller",
+         "persona": "I am a small-scale Baltic fisher.", "profession": "fisher", "bio": ""},
+        {"user_id": 1, "user_name": "ngo1", "name": "Ines NGO",
+         "persona": "I work for an environmental NGO.", "profession": "ngo_staff", "bio": ""},
+    ]
+    p = tmp_path / "reddit_profiles.json"
+    p.write_text(json.dumps(data), encoding="utf-8")
+    return p
+
+def test_file_system_persona_provider_reads_reddit_json(tmp_path):
+    p = _write_reddit_profiles(tmp_path)
+    provider = FileSystemPersonaProvider(reddit_path=p, twitter_path=None)
+    personas = provider.all()
+    assert len(personas) == 2
+    assert personas[0].name == "Fischer Müller"
+    assert personas[0].agent_id == 0
+
+def test_zep_memory_provider_returns_empty_when_unavailable():
+    class _BrokenReader:
+        def get_entity_with_context(self, *a, **kw):
+            raise RuntimeError("offline")
+    prov = ZepMemoryProvider(entity_reader=_BrokenReader(), graph_id="g1",
+                             agent_to_entity={0: "uuid-zero"})
+    d = prov.get_digest(0)
+    assert d.available is False
+    assert d.text != ""
+
+def test_zep_memory_provider_truncates_to_max_chars():
+    class _R:
+        def get_entity_with_context(self, *a, **kw):
+            class _Ctx:
+                name = "X"; summary = "Y"
+                related_edges = [{"fact": "very long fact " * 200}]
+            return _Ctx()
+    prov = ZepMemoryProvider(entity_reader=_R(), graph_id="g1",
+                             agent_to_entity={5: "uuid-five"})
+    d = prov.get_digest(5, max_chars=300)
+    assert d.available is True
+    assert len(d.text) <= 300
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_adapters.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement adapters**
+
+`backend/app/services/interviews/adapters.py`:
+```python
+from __future__ import annotations
+import csv
+import json
+from pathlib import Path
+from typing import Optional
+from app.services.interviews.base import PersonaRecord, MemoryDigest
+
+class FileSystemPersonaProvider:
+    """Reads OASIS profiles from the simulation's `reddit_profiles.json` and/or `twitter_profiles.csv`.
+
+    If both are present, agents from `reddit_profiles.json` take precedence; twitter-only agents are appended.
+    """
+    def __init__(self, reddit_path: Optional[Path], twitter_path: Optional[Path]):
+        self.reddit_path = Path(reddit_path) if reddit_path else None
+        self.twitter_path = Path(twitter_path) if twitter_path else None
+
+    def _load_reddit(self) -> list[PersonaRecord]:
+        if not self.reddit_path or not self.reddit_path.exists(): return []
+        data = json.loads(self.reddit_path.read_text(encoding="utf-8"))
+        out = []
+        for row in data:
+            out.append(PersonaRecord(
+                agent_id=int(row.get("user_id")),
+                name=str(row.get("name") or row.get("user_name") or f"agent_{row.get('user_id')}"),
+                persona=str(row.get("persona") or row.get("bio") or ""),
+                profession=row.get("profession"),
+                bio=row.get("bio"),
+            ))
+        return out
+
+    def _load_twitter(self) -> list[PersonaRecord]:
+        if not self.twitter_path or not self.twitter_path.exists(): return []
+        out = []
+        with self.twitter_path.open("r", encoding="utf-8", newline="") as f:
+            for row in csv.DictReader(f):
+                if not row.get("user_id"): continue
+                out.append(PersonaRecord(
+                    agent_id=int(row["user_id"]),
+                    name=str(row.get("name") or row.get("user_name") or f"agent_{row['user_id']}"),
+                    persona=str(row.get("persona") or row.get("bio") or ""),
+                    profession=row.get("profession"),
+                    bio=row.get("bio"),
+                ))
+        return out
+
+    def all(self) -> list[PersonaRecord]:
+        reddit = self._load_reddit()
+        seen = {p.agent_id for p in reddit}
+        twitter = [p for p in self._load_twitter() if p.agent_id not in seen]
+        return reddit + twitter
+
+class ZepMemoryProvider:
+    """Builds a bounded memory digest per agent from Zep entity context.
+
+    Maps `agent_id` (OASIS user_id) to a Zep entity UUID; falls back to the agent_id as a string.
+    """
+    def __init__(self, entity_reader, graph_id: str, agent_to_entity: dict[int, str] | None = None):
+        self.reader = entity_reader
+        self.graph_id = graph_id
+        self.map = dict(agent_to_entity or {})
+
+    def get_digest(self, agent_id: int, max_chars: int = 2000) -> MemoryDigest:
+        entity_uuid = self.map.get(agent_id) or str(agent_id)
+        try:
+            ctx = self.reader.get_entity_with_context(self.graph_id, entity_uuid)
+        except Exception:
+            return MemoryDigest(text=f"[no memory for agent {agent_id}]", available=False)
+        parts: list[str] = []
+        name = getattr(ctx, "name", None)
+        summary = getattr(ctx, "summary", None)
+        if name: parts.append(f"Name: {name}")
+        if summary: parts.append(f"Summary: {summary}")
+        edges = getattr(ctx, "related_edges", []) or []
+        for e in edges[:20]:
+            fact = e.get("fact") if isinstance(e, dict) else getattr(e, "fact", None)
+            if fact: parts.append(f"- {fact}")
+        text = "\n".join(parts)
+        if len(text) > max_chars: text = text[: max_chars - 1] + "…"
+        return MemoryDigest(text=text or f"[empty memory for agent {agent_id}]", available=True)
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_adapters.py -v`
+Expected: 3 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/services/interviews/adapters.py backend/tests/interviews/test_adapters.py
+git commit -m "feat(interviews): persona + Zep memory adapters bridging existing services to interview subsystem"
+```
+
+---
+
+### Task 16: /api/interview Flask blueprint
+
+**Files:**
+- Create: `backend/app/api/interview.py`
+- Modify: `backend/app/api/__init__.py`
+- Test: `backend/tests/interviews/test_api_interview.py`
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_api_interview.py
+import json
+import os
+from pathlib import Path
+import pytest
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    monkeypatch.setenv("UPLOADS_DIR", str(tmp_path))
+    from app.config import Config
+    Config.LLM_STUB_MODE = True
+    Config.UPLOADS_DIR = str(tmp_path)
+    # Seed a minimal reddit_profiles.json
+    sim_dir = tmp_path / "simulations" / "sim_test"
+    sim_dir.mkdir(parents=True)
+    profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
+                 "persona": "p", "profession": "fisher"} for i in range(3)]
+    (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
+    from flask import Flask
+    from app.api import register_blueprints
+    app = Flask(__name__)
+    register_blueprints(app)
+    return app.test_client()
+
+def test_post_pre_returns_task_id(client):
+    res = client.post("/api/interview/sim_test/pre")
+    assert res.status_code == 200
+    body = res.get_json()
+    assert body["success"] is True
+    assert "task_id" in body["data"]
+
+def test_status_endpoint_returns_progress(client):
+    res = client.post("/api/interview/sim_test/pre")
+    task_id = res.get_json()["data"]["task_id"]
+    res2 = client.get(f"/api/interview/sim_test/status?task_id={task_id}")
+    assert res2.status_code == 200
+    assert "status" in res2.get_json()["data"]
+
+def test_unknown_subagent_returns_400(client):
+    res = client.post("/api/interview/sim_test/rerun",
+                      json={"subagent": "nonsense"})
+    assert res.status_code == 400
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_api_interview.py -v`
+Expected: ImportError / 404.
+
+- [ ] **Step 3: Check current `api/__init__.py`**
+
+Read `backend/app/api/__init__.py` and identify how `graph_bp`, `simulation_bp`, `report_bp` are registered. The test expects a `register_blueprints(app)` helper — if one doesn't exist, add it.
+
+- [ ] **Step 4: Modify `api/__init__.py`**
+
+Replace contents (preserving existing blueprint imports — adjust to match actual file):
+```python
+from flask import Flask
+from .graph import graph_bp
+from .simulation import simulation_bp
+from .report import report_bp
+from .interview import interview_bp
+
+def register_blueprints(app: Flask) -> None:
+    app.register_blueprint(graph_bp, url_prefix="/api/graph")
+    app.register_blueprint(simulation_bp, url_prefix="/api/simulation")
+    app.register_blueprint(report_bp, url_prefix="/api/report")
+    app.register_blueprint(interview_bp, url_prefix="/api/interview")
+```
+
+If the existing app factory in `app/__init__.py` already calls register manually, update it to call `register_blueprints(app)` instead.
+
+- [ ] **Step 5: Implement blueprint**
+
+`backend/app/api/interview.py`:
+```python
+from __future__ import annotations
+import threading
+import traceback
+import uuid
+from pathlib import Path
+from flask import Blueprint, jsonify, request, send_file
+from app.config import Config
+from app.models.interview import SubagentKind, InterviewPhase
+from app.services.interviews.adapters import FileSystemPersonaProvider, ZepMemoryProvider
+from app.services.interviews.zep_writer import InterviewZepWriter
+from app.services.interview_orchestrator import InterviewOrchestrator
+from app.services.interview_synthesizer import InterviewSynthesizer
+from app.services.interviews.storage import InterviewStore
+from app.utils.llm_client import LLMClient
+
+interview_bp = Blueprint("interview", __name__)
+_TASKS: dict[str, dict] = {}
+_LOCK = threading.Lock()
+
+INSTRUMENT_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+def _uploads_root() -> Path:
+    return Path(getattr(Config, "UPLOADS_DIR", "uploads"))
+
+def _build_orchestrator(sim_id: str) -> InterviewOrchestrator:
+    sim_dir = _uploads_root() / "simulations" / sim_id
+    reddit = sim_dir / "reddit_profiles.json"
+    twitter = sim_dir / "twitter_profiles.csv"
+    personas = FileSystemPersonaProvider(reddit_path=reddit if reddit.exists() else None,
+                                         twitter_path=twitter if twitter.exists() else None)
+    # Zep memory + writer: best-effort; in stub/test mode the writer no-ops on exceptions
+    class _NullUpdater:
+        def add_text_episode(self, *a, **kw): return None
+    try:
+        from app.services.zep_entity_reader import ZepEntityReader
+        from app.services.zep_graph_memory_updater import ZepGraphMemoryUpdater
+        graph_id = (sim_dir / "graph_id.txt").read_text().strip() if (sim_dir / "graph_id.txt").exists() else ""
+        reader = ZepEntityReader()
+        updater = ZepGraphMemoryUpdater()
+        memory = ZepMemoryProvider(reader, graph_id=graph_id)
+        zep_writer = InterviewZepWriter(memory_updater=updater, graph_id=graph_id)
+    except Exception:
+        class _Mem:
+            def get_digest(self, agent_id, max_chars=2000):
+                from app.services.interviews.base import MemoryDigest
+                return MemoryDigest(text="[memory unavailable]", available=False)
+        memory = _Mem()
+        zep_writer = InterviewZepWriter(memory_updater=_NullUpdater(), graph_id="")
+    llm = LLMClient(api_key=Config.LLM_API_KEY, base_url=Config.LLM_BASE_URL,
+                    model=Config.LLM_MODEL_NAME)
+    return InterviewOrchestrator(
+        llm=llm, memory=memory, personas=personas,
+        instrument_dir=INSTRUMENT_DIR, store_root=_uploads_root(), sim_id=sim_id,
+        zep_writer=zep_writer, max_workers=Config.INTERVIEW_MAX_WORKERS,
+        language=Config.INTERVIEW_DEFAULT_LANGUAGE,
+    )
+
+def _run_task(task_id: str, fn) -> None:
+    with _LOCK:
+        _TASKS[task_id] = {"status": "running", "progress": {}, "result": None, "error": None}
+    try:
+        result = fn(task_id)
+        with _LOCK:
+            _TASKS[task_id]["status"] = "completed"; _TASKS[task_id]["result"] = result
+    except Exception as e:
+        with _LOCK:
+            _TASKS[task_id]["status"] = "failed"
+            _TASKS[task_id]["error"] = repr(e)
+            _TASKS[task_id]["traceback"] = traceback.format_exc()
+
+def _start_task(fn) -> str:
+    task_id = uuid.uuid4().hex[:12]
+    with _LOCK:
+        _TASKS[task_id] = {"status": "queued", "progress": {}, "result": None, "error": None}
+    threading.Thread(target=_run_task, args=(task_id, fn), daemon=True).start()
+    return task_id
+
+def _envelope(data=None, error=None, status: int = 200):
+    body = {"success": error is None, "data": data or {}, "error": error}
+    return jsonify(body), status
+
+@interview_bp.route("/<sim_id>/pre", methods=["POST"])
+def post_pre(sim_id: str):
+    orch = _build_orchestrator(sim_id)
+    task_id = _start_task(lambda tid: orch.run_pre())
+    return _envelope({"task_id": task_id})
+
+@interview_bp.route("/<sim_id>/post", methods=["POST"])
+def post_post(sim_id: str):
+    orch = _build_orchestrator(sim_id)
+    def run(tid):
+        out = orch.run_post()
+        synth = InterviewSynthesizer(store=orch.store)
+        out["synthesis"] = synth.run()[:1000]  # short preview
+        return out
+    task_id = _start_task(run)
+    return _envelope({"task_id": task_id})
+
+@interview_bp.route("/<sim_id>/rerun", methods=["POST"])
+def post_rerun(sim_id: str):
+    body = request.get_json(silent=True) or {}
+    sub = body.get("subagent")
+    try: subagent = SubagentKind(sub)
+    except ValueError: return _envelope(error=f"unknown subagent {sub!r}", status=400)
+    orch = _build_orchestrator(sim_id)
+    task_id = _start_task(lambda tid: orch.rerun(subagent))
+    return _envelope({"task_id": task_id})
+
+@interview_bp.route("/<sim_id>/status", methods=["GET"])
+def get_status(sim_id: str):
+    task_id = request.args.get("task_id")
+    with _LOCK:
+        task = _TASKS.get(task_id)
+    if task is None: return _envelope(error="unknown task_id", status=404)
+    return _envelope({"status": task["status"], "progress": task.get("progress", {}),
+                      "result": task.get("result"), "error": task.get("error")})
+
+@interview_bp.route("/<sim_id>/results/<subagent>", methods=["GET"])
+def get_results(sim_id: str, subagent: str):
+    try: sub = SubagentKind(subagent)
+    except ValueError: return _envelope(error=f"unknown subagent {subagent!r}", status=400)
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    phase = InterviewPhase.T1 if sub != SubagentKind.LONGITUDINAL else InterviewPhase.T1
+    run = store.latest_run(phase, sub)
+    if run is None: return _envelope(error="no results yet", status=404)
+    agg = (run / "aggregate.json")
+    if not agg.exists(): return _envelope(error="aggregate missing", status=404)
+    import json as _j
+    return _envelope({"aggregate": _j.loads(agg.read_text(encoding="utf-8")),
+                      "run_dir": str(run)})
+
+@interview_bp.route("/<sim_id>/results/synthesis", methods=["GET"])
+def get_synthesis(sim_id: str):
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    report = store.base / "synthesis" / "report.md"
+    if not report.exists():
+        synth = InterviewSynthesizer(store=store)
+        synth.run()
+    return _envelope({"report_markdown": report.read_text(encoding="utf-8")})
+
+@interview_bp.route("/<sim_id>/export.csv", methods=["GET"])
+def get_export_csv(sim_id: str):
+    store = InterviewStore(root=_uploads_root(), sim_id=sim_id)
+    csv_path = store.base / "synthesis" / "exports" / "all_responses.csv"
+    if not csv_path.exists():
+        InterviewSynthesizer(store=store).run()
+    return send_file(csv_path, mimetype="text/csv", as_attachment=True,
+                     download_name=f"{sim_id}_interviews.csv")
+```
+
+- [ ] **Step 6: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_api_interview.py -v`
+Expected: 3 passed.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add backend/app/api/__init__.py backend/app/api/interview.py backend/tests/interviews/test_api_interview.py
+git commit -m "feat(interviews): Flask blueprint /api/interview with task-based async + CSV export"
+```
+
+---
+
+## Phase 6 — Integration
+
+### Task 17: End-to-end pipeline test (stub LLM)
+
+**Files:**
+- Create: `backend/tests/integration/__init__.py`
+- Test: `backend/tests/integration/test_interview_pipeline.py`
+
+- [ ] **Step 1: Write failing test**
+
+Create `backend/tests/integration/__init__.py` (empty), then:
+
+```python
+# backend/tests/integration/test_interview_pipeline.py
+import json
+import pytest
+from pathlib import Path
+from app.config import Config
+from app.models.interview import SubagentKind, InterviewPhase
+from app.services.interviews.adapters import FileSystemPersonaProvider
+from app.services.interviews.base import MemoryDigest
+from app.services.interviews.zep_writer import InterviewZepWriter
+from app.services.interview_orchestrator import InterviewOrchestrator
+from app.services.interview_synthesizer import InterviewSynthesizer
+from app.utils.llm_client import LLMClient
+
+pytestmark = pytest.mark.integration
+
+INST_DIR = Path(__file__).resolve().parents[2] / "scripts" / "instruments"
+
+class _NullUpdater:
+    def __init__(self): self.events = []
+    def add_text_episode(self, graph_id, text): self.events.append(text)
+
+class _StaticMem:
+    def get_digest(self, agent_id, max_chars=2000):
+        return MemoryDigest(text=f"agent {agent_id} memory snippet", available=True)
+
+@pytest.fixture
+def seeded_uploads(tmp_path, monkeypatch):
+    monkeypatch.setenv("LLM_STUB_MODE", "true")
+    Config.LLM_STUB_MODE = True
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    sim_dir.mkdir(parents=True)
+    profiles = [{"user_id": i, "user_name": f"u{i}", "name": f"A{i}",
+                 "persona": "stakeholder p", "profession": "fisher"} for i in range(5)]
+    (sim_dir / "reddit_profiles.json").write_text(json.dumps(profiles), encoding="utf-8")
+    return tmp_path
+
+def _make_orch(tmp_path):
+    sim_dir = tmp_path / "simulations" / "intg_sim"
+    personas = FileSystemPersonaProvider(
+        reddit_path=sim_dir / "reddit_profiles.json", twitter_path=None,
+    )
+    llm = LLMClient(api_key="x", base_url="x", model="x")
+    updater = _NullUpdater()
+    writer = InterviewZepWriter(memory_updater=updater, graph_id="g")
+    return InterviewOrchestrator(
+        llm=llm, memory=_StaticMem(), personas=personas,
+        instrument_dir=INST_DIR, store_root=tmp_path, sim_id="intg_sim",
+        zep_writer=writer, max_workers=2, language="de",
+    )
+
+def test_pipeline_runs_pre_then_post_then_synthesis(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+
+    pre = orch.run_pre()
+    assert pre["longitudinal"]["n_responded"] >= 1
+
+    post = orch.run_post()
+    assert "longitudinal" in post
+    assert "diversity" in post
+    assert "scenario" in post
+    assert "delphi" in post
+
+    synth = InterviewSynthesizer(store=orch.store)
+    report = synth.run()
+    assert "Stakeholder Interview Synthesis" in report
+    assert "Limitations" in report
+
+    csv_path = orch.store.base / "synthesis" / "exports" / "all_responses.csv"
+    assert csv_path.exists()
+    lines = csv_path.read_text().splitlines()
+    assert lines[0].startswith("agent_id,") or "agent_id" in lines[0]
+
+def test_idempotent_rerun_creates_new_run_id(seeded_uploads):
+    tmp = seeded_uploads
+    orch = _make_orch(tmp)
+    orch.run_pre()
+    first = orch.run_post()
+    second = orch.rerun(SubagentKind.SCENARIO)
+    first_scn = first["scenario"]["run_dir"]
+    second_scn = second["scenario"]["run_dir"]
+    assert first_scn != second_scn
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/integration/test_interview_pipeline.py -v -m integration`
+Expected: most likely ValidationError from the stub LLM's canned JSON not satisfying every subagent's strict validator (forced Q-sort distribution, scenarios, Delphi). This is the signal to enrich the stub.
+
+- [ ] **Step 3: Enrich `_stub_response_json` in `LLMClient` to satisfy each subagent**
+
+Read the current `_stub_response_json` (Task 4). Replace its body with content-aware stubs by inspecting the user message text. In `backend/app/utils/llm_client.py`, replace `_stub_response_json` with:
+
+```python
+    def _stub_response_json(self, messages: list[dict]) -> dict:
+        import hashlib, json as _json
+        sys_msg = next((m["content"] for m in messages if m.get("role") == "system"), "")
+        usr_msg = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+        h = hashlib.sha256((sys_msg + "|" + usr_msg).encode("utf-8")).hexdigest()
+        seed = int(h[:8], 16)
+        rng = (seed % 5) + 1
+
+        # Longitudinal Likert (12 items)
+        if all(tok in usr_msg for tok in ("stk_1", "gov_1", "mkt_1", "clm_1")):
+            ids = ["stk_1","stk_2","stk_3","gov_1","gov_2","gov_3",
+                   "mkt_1","mkt_2","mkt_3","clm_1","clm_2","clm_3"]
+            return {"responses": {k: ((seed >> (i*3)) % 5) + 1 for i, k in enumerate(ids)},
+                    "confidence": {k: 0.6 for k in ids},
+                    "open_comment": f"stub:{h[:8]}"}
+
+        # Diversity Q-sort: 24 statements + 6 axes, forced distribution 2,3,4,6,4,3,2
+        if "st_01" in usr_msg and "ax_pres_extr" in usr_msg:
+            buckets = [-3]*2 + [-2]*3 + [-1]*4 + [0]*6 + [1]*4 + [2]*3 + [3]*2
+            stmts = [f"st_{i+1:02d}" for i in range(24)]
+            # shuffle deterministically
+            order = sorted(range(24), key=lambda i: (h[i % len(h)], i))
+            placements = {stmts[i]: buckets[order.index(i)] for i in range(24)}
+            return {
+                "placements": placements,
+                "likert_axes": {a: ((seed >> (j*3)) % 7) + 1 for j, a in enumerate(
+                    ["ax_pres_extr","ax_loc_eu","ax_sci_trad",
+                     "ax_ind_col","ax_short_long","ax_mkt_reg"])},
+            }
+
+        # Scenario: S1..S4 × 4 dims
+        if all(s in usr_msg for s in ("S1:", "S2:", "S3:", "S4:")):
+            return {"ratings": {sid: {
+                "desirability": ((seed >> (i*3)) % 7) + 1,
+                "plausibility": ((seed >> (i*3+1)) % 7) + 1,
+                "impact_on_my_group": ((seed >> (i*3+2)) % 7) + 1,
+                "fairness": ((seed >> (i*3+4)) % 7) + 1,
+                "if_woke_up_response": f"act-{sid}-{h[:4]}",
+            } for i, sid in enumerate(["S1","S2","S3","S4"])}}
+
+        # Delphi R1: q1..q4 free text
+        if "q1" in usr_msg and "q2" in usr_msg and "Bewerten" not in usr_msg and "Sie sehen" not in usr_msg:
+            return {"answers": {qid: f"stub-themes-{qid}-{h[:4]}" for qid in ("q1","q2","q3","q4")}}
+
+        # Delphi theme extraction (no in-character system prompt)
+        if "extract distinct thematic codes" in sys_msg:
+            return {"themes": [{"theme_id": f"theme_{i}", "label": f"Thema {i}"} for i in range(5)]}
+
+        # Delphi R2 (rate) or R3 (revise)
+        if "Bewerten Sie jedes Thema" in usr_msg or "Sie sehen unten" in usr_msg \
+           or "Rate each theme" in usr_msg or "Below are the anonymised" in usr_msg:
+            theme_ids = [f"theme_{i}" for i in range(5)]
+            out = {"ratings": {tid: {"importance": ((seed >> (i*2)) % 5) + 1,
+                                     "plausibility": ((seed >> (i*2+1)) % 5) + 1}
+                               for i, tid in enumerate(theme_ids)}}
+            if "Sie sehen unten" in usr_msg or "Below are the anonymised" in usr_msg:
+                out["justification"] = "stub-revision"
+            return out
+
+        # Fallback
+        return {"stub_key": h[:12], "value": rng}
+```
+
+- [ ] **Step 4: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/integration/test_interview_pipeline.py -v -m integration`
+Expected: 2 passed.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/app/utils/llm_client.py backend/tests/integration/__init__.py backend/tests/integration/test_interview_pipeline.py
+git commit -m "test(interviews): end-to-end pipeline test + content-aware LLM stubs for all 4 subagents"
+```
+
+---
+
+## Phase 7 — Frontend
+
+Note: this project has no frontend test framework. Tasks below use the build (`npm run build`) plus a manual smoke check via `npm run dev` as the verification gate. Commit after each task once the build is green.
+
+### Task 18: Step4bInterviews.vue scaffold + tab shell
+
+**Files:**
+- Create: `frontend/src/components/Step4bInterviews.vue`
+- Create: `frontend/src/api/interview.js`
+- Modify: `frontend/src/App.vue` (or the parent that orchestrates Step1..Step5 — locate and adjust)
+
+- [ ] **Step 1: Add API client module**
+
+`frontend/src/api/interview.js`:
+```javascript
+import { api } from "./index"
+
+export async function startPre(simId) {
+  const r = await api.post(`/api/interview/${simId}/pre`)
+  return r.data
+}
+export async function startPost(simId) {
+  const r = await api.post(`/api/interview/${simId}/post`)
+  return r.data
+}
+export async function rerun(simId, subagent) {
+  const r = await api.post(`/api/interview/${simId}/rerun`, { subagent })
+  return r.data
+}
+export async function getStatus(simId, taskId) {
+  const r = await api.get(`/api/interview/${simId}/status`, { params: { task_id: taskId } })
+  return r.data
+}
+export async function getResults(simId, subagent) {
+  const r = await api.get(`/api/interview/${simId}/results/${subagent}`)
+  return r.data
+}
+export async function getSynthesis(simId) {
+  const r = await api.get(`/api/interview/${simId}/results/synthesis`)
+  return r.data
+}
+export function exportCsvUrl(simId) {
+  return `/api/interview/${simId}/export.csv`
+}
+```
+
+- [ ] **Step 2: Implement Step4bInterviews.vue scaffold**
+
+`frontend/src/components/Step4bInterviews.vue`:
+```vue
+<template>
+  <section class="step4b">
+    <header>
+      <h2>{{ t('interview.title') }}</h2>
+      <p class="subtitle">{{ t('interview.subtitle') }}</p>
+    </header>
+
+    <div class="actions">
+      <button :disabled="busy" @click="startPostRun">{{ t('interview.runAll') }}</button>
+      <a :href="csvUrl" target="_blank" rel="noopener">{{ t('interview.downloadCsv') }}</a>
+    </div>
+
+    <nav class="tabs">
+      <button v-for="t in tabs" :key="t.id"
+              :class="{ active: active === t.id }"
+              @click="active = t.id">
+        {{ t.label }}
+      </button>
+    </nav>
+
+    <component :is="currentPanel" :sim-id="simId" :status="status" />
+  </section>
+</template>
+
+<script setup>
+import { computed, onMounted, ref } from 'vue'
+import { useI18n } from 'vue-i18n'
+import LongitudinalPanel from './interviews/LongitudinalPanel.vue'
+import DiversityPanel from './interviews/DiversityPanel.vue'
+import DelphiPanel from './interviews/DelphiPanel.vue'
+import ScenarioPanel from './interviews/ScenarioPanel.vue'
+import SynthesisPanel from './interviews/SynthesisPanel.vue'
+import { startPost, getStatus, exportCsvUrl } from '../api/interview'
+
+const props = defineProps({ simId: { type: String, required: true } })
+const { t } = useI18n()
+const tabs = [
+  { id: 'longitudinal', label: t('interview.tab.longitudinal') },
+  { id: 'diversity',    label: t('interview.tab.diversity') },
+  { id: 'delphi',       label: t('interview.tab.delphi') },
+  { id: 'scenario',     label: t('interview.tab.scenario') },
+  { id: 'synthesis',    label: t('interview.tab.synthesis') },
+]
+const active = ref('longitudinal')
+const status = ref({ status: 'idle' })
+const busy = ref(false)
+const csvUrl = computed(() => exportCsvUrl(props.simId))
+
+const panels = {
+  longitudinal: LongitudinalPanel, diversity: DiversityPanel,
+  delphi: DelphiPanel, scenario: ScenarioPanel, synthesis: SynthesisPanel,
+}
+const currentPanel = computed(() => panels[active.value])
+
+async function startPostRun() {
+  busy.value = true
+  try {
+    const res = await startPost(props.simId)
+    if (!res.success) throw new Error(res.error || 'failed to start')
+    await poll(res.data.task_id)
+  } finally { busy.value = false }
+}
+
+async function poll(taskId) {
+  while (true) {
+    const r = await getStatus(props.simId, taskId)
+    status.value = r.data
+    if (['completed', 'failed'].includes(r.data.status)) break
+    await new Promise(r => setTimeout(r, 1500))
+  }
+}
+</script>
+
+<style scoped>
+.step4b { padding: 1rem; }
+.tabs { display: flex; gap: .5rem; margin: 1rem 0; }
+.tabs button.active { font-weight: 700; border-bottom: 2px solid #333; }
+.actions { display: flex; gap: 1rem; align-items: center; }
+</style>
+```
+
+- [ ] **Step 3: Create placeholder panel components (to be filled in Task 19)**
+
+Create five empty-but-renderable Vue components so the scaffold compiles:
+
+`frontend/src/components/interviews/LongitudinalPanel.vue`:
+```vue
+<template><div class="panel">Longitudinal: results will appear here.</div></template>
+<script setup>
+defineProps({ simId: String, status: Object })
+</script>
+```
+
+Repeat the same pattern (changing only the inner text) for `DiversityPanel.vue`, `DelphiPanel.vue`, `ScenarioPanel.vue`, `SynthesisPanel.vue` in `frontend/src/components/interviews/`.
+
+- [ ] **Step 4: Wire Step4b into parent navigation**
+
+Read `frontend/src/App.vue` (or wherever Step1..Step5 are rendered). Locate the routing/visibility logic. Add a Step4b state between Step4 and Step5, and import `Step4bInterviews` from `./components/Step4bInterviews.vue`. Pass `:sim-id="currentSimId"` where the others receive the sim id. Add i18n keys to `locales/en.json`, `locales/de.json`, `locales/zh.json`:
+```json
+"interview": {
+  "title": "Stakeholder interviews",
+  "subtitle": "Four independent surveys of the simulated stakeholder population.",
+  "runAll": "Run all post-simulation interviews",
+  "downloadCsv": "Download CSV",
+  "tab": {
+    "longitudinal": "Longitudinal (Δ)",
+    "diversity": "Diversity",
+    "delphi": "Delphi",
+    "scenario": "Scenarios",
+    "synthesis": "Synthesis"
+  }
+}
+```
+
+- [ ] **Step 5: Build to verify it compiles**
+
+Run: `cd frontend && npm run build`
+Expected: build succeeds with no errors.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add frontend/src/api/interview.js frontend/src/components/Step4bInterviews.vue \
+        frontend/src/components/interviews/*.vue frontend/src/App.vue \
+        locales/*.json
+git commit -m "feat(interviews): Step4b Vue scaffold with five-tab navigation, API client, i18n keys"
+```
+
+---
+
+### Task 19: Per-tab d3 visualisations
+
+**Files:**
+- Modify: `frontend/src/components/interviews/LongitudinalPanel.vue`
+- Modify: `frontend/src/components/interviews/DiversityPanel.vue`
+- Modify: `frontend/src/components/interviews/DelphiPanel.vue`
+- Modify: `frontend/src/components/interviews/ScenarioPanel.vue`
+- Modify: `frontend/src/components/interviews/SynthesisPanel.vue`
+
+For each panel, fetch the relevant aggregate via the API on mount, then render with d3. The five implementations follow the same structure; each shows the full content below.
+
+- [ ] **Step 1: Longitudinal panel — heatmap of Δ̄ per item**
+
+`frontend/src/components/interviews/LongitudinalPanel.vue`:
+```vue
+<template>
+  <div class="panel">
+    <h3>Longitudinal Δ (T0 → T1)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null)
+const loading = ref(true)
+const error = ref(null)
+const width = 640
+const height = 360
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    const r = await getResults(props.simId, 'longitudinal')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate)
+  } catch (e) { error.value = String(e) }
+  finally { loading.value = false }
+}
+
+function draw(agg) {
+  const items = Object.entries(agg.per_item || {})
+  if (items.length === 0) return
+  const svg = d3.select(chart.value)
+  svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 60, left: 80 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleBand().domain(items.map(([k]) => k)).range([0, w]).padding(0.1)
+  const y = d3.scaleLinear().domain([-4, 4]).range([h, 0])
+  const color = d3.scaleDiverging(d3.interpolateRdBu).domain([-4, 0, 4])
+  g.selectAll('rect').data(items).enter().append('rect')
+    .attr('x', d => x(d[0]))
+    .attr('y', d => y(Math.max(0, d[1].mean_delta || 0)))
+    .attr('width', x.bandwidth())
+    .attr('height', d => Math.abs(y(d[1].mean_delta || 0) - y(0)))
+    .attr('fill', d => color(d[1].mean_delta || 0))
+  g.append('g').attr('transform', `translate(0,${y(0)})`)
+    .call(d3.axisBottom(x)).selectAll('text')
+    .attr('transform', 'rotate(-40)').attr('text-anchor', 'end')
+  g.append('g').call(d3.axisLeft(y))
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
+```
+
+- [ ] **Step 2: Diversity panel — PCA scatter coloured by cluster**
+
+`frontend/src/components/interviews/DiversityPanel.vue`:
+```vue
+<template>
+  <div class="panel">
+    <h3>Stakeholder typology (PCA)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 640, height = 480
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    const r = await getResults(props.simId, 'diversity')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate)
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(agg) {
+  // The /results endpoint returns aggregate.json which contains clusters + agent_ids
+  // PCA components live in typology.json (separate file). For v1 use clusters only,
+  // distributing them across a notional 2D layout from their cluster centroid hashes.
+  const clusters = agg.clusters || []
+  if (!clusters.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 30, left: 30 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const points = []
+  clusters.forEach((c, i) => {
+    (c.agent_ids || []).forEach((aid, k) => {
+      const angle = (i / clusters.length) * 2 * Math.PI
+      const radius = (k % 5 + 1) * 0.15 + 0.2
+      points.push({ x: 0.5 + Math.cos(angle) * radius, y: 0.5 + Math.sin(angle) * radius,
+                    cluster: c.cluster_id, agent_id: aid })
+    })
+  })
+  const x = d3.scaleLinear().domain([0, 1]).range([0, w])
+  const y = d3.scaleLinear().domain([0, 1]).range([h, 0])
+  const color = d3.scaleOrdinal(d3.schemeCategory10)
+  g.selectAll('circle').data(points).enter().append('circle')
+    .attr('cx', d => x(d.x)).attr('cy', d => y(d.y)).attr('r', 5)
+    .attr('fill', d => color(d.cluster)).attr('opacity', .7)
+    .append('title').text(d => `agent ${d.agent_id} · cluster ${d.cluster}`)
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
+```
+
+- [ ] **Step 3: Delphi panel — convergence bar chart (R2 IQR vs R3 IQR per theme)**
+
+`frontend/src/components/interviews/DelphiPanel.vue`:
+```vue
+<template>
+  <div class="panel">
+    <h3>Delphi convergence (IQR shift R2 → R3)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { api } from '../../api/index'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 640, height = 420
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    const r = await api.get(`/api/interview/${props.simId}/results/delphi`)
+    if (!r.data.success) { error.value = r.data.error; return }
+    // For richer detail, also fetch the per-theme convergence.json directly via a follow-up endpoint.
+    // v1: render aggregate.themes + agg.n_r1/r2/r3.
+    draw(r.data.data.aggregate)
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(agg) {
+  const themes = agg.themes || []
+  if (!themes.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 80, left: 60 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleBand().domain(themes.map(t => t.theme_id)).range([0, w]).padding(0.15)
+  const y = d3.scaleLinear().domain([0, agg.n_r1 || 1]).range([h, 0])
+  const bars = themes.map((t, i) => ({
+    theme: t.theme_id, label: t.label,
+    nr1: agg.n_r1, nr2: agg.n_r2, nr3: agg.n_r3,
+  }))
+  g.selectAll('rect').data(bars).enter().append('rect')
+    .attr('x', d => x(d.theme)).attr('y', d => y(d.nr3))
+    .attr('width', x.bandwidth()).attr('height', d => h - y(d.nr3))
+    .attr('fill', d3.schemeCategory10[2])
+  g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
+    .selectAll('text').attr('transform', 'rotate(-30)').attr('text-anchor', 'end')
+  g.append('g').call(d3.axisLeft(y))
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
+```
+
+- [ ] **Step 4: Scenario panel — polarity quadrant (desirability × plausibility)**
+
+`frontend/src/components/interviews/ScenarioPanel.vue`:
+```vue
+<template>
+  <div class="panel">
+    <h3>Scenarios: desirability × plausibility</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 520, height = 520
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    const r = await getResults(props.simId, 'scenario')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate.polarity || {})
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(polarity) {
+  const pts = Object.entries(polarity)
+    .filter(([, v]) => v && v.n > 0)
+    .map(([sid, v]) => ({
+      sid, x: v.mean_plausibility, y: v.mean_desirability,
+      n: v.n, sdx: v.sd_plausibility, sdy: v.sd_desirability,
+    }))
+  if (!pts.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 40, left: 40 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleLinear().domain([1, 7]).range([0, w])
+  const y = d3.scaleLinear().domain([1, 7]).range([h, 0])
+  g.append('line').attr('x1', 0).attr('x2', w).attr('y1', y(4)).attr('y2', y(4)).attr('stroke', '#ccc')
+  g.append('line').attr('x1', x(4)).attr('x2', x(4)).attr('y1', 0).attr('y2', h).attr('stroke', '#ccc')
+  g.selectAll('circle').data(pts).enter().append('circle')
+    .attr('cx', d => x(d.x)).attr('cy', d => y(d.y))
+    .attr('r', d => 6 + Math.sqrt(d.n))
+    .attr('fill', d3.schemeCategory10[1]).attr('opacity', .7)
+  g.selectAll('text.lbl').data(pts).enter().append('text')
+    .attr('class', 'lbl').attr('x', d => x(d.x) + 8).attr('y', d => y(d.y))
+    .text(d => `${d.sid} (n=${d.n})`)
+  g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
+  g.append('g').call(d3.axisLeft(y))
+  g.append('text').attr('x', w/2).attr('y', h+34).attr('text-anchor', 'middle').text('plausibility')
+  g.append('text').attr('transform', `rotate(-90)`).attr('x', -h/2).attr('y', -28)
+    .attr('text-anchor', 'middle').text('desirability')
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
+```
+
+- [ ] **Step 5: Synthesis panel — render markdown report**
+
+`frontend/src/components/interviews/SynthesisPanel.vue`:
+```vue
+<template>
+  <div class="panel">
+    <h3>Synthesis</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <pre v-else class="report">{{ report }}</pre>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import { getSynthesis } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const loading = ref(true); const error = ref(null); const report = ref('')
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    const r = await getSynthesis(props.simId)
+    if (!r.success) { error.value = r.error; return }
+    report.value = r.data.report_markdown
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+.report { white-space: pre-wrap; font-family: ui-monospace, monospace; line-height: 1.4; }
+</style>
+```
+
+- [ ] **Step 6: Build + smoke test**
+
+Run: `cd frontend && npm run build`
+Expected: build succeeds. Then `cd .. && npm run dev` and manually visit Step4b for a completed `sim_id` — verify all five tabs render without console errors.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add frontend/src/components/interviews/*.vue
+git commit -m "feat(interviews): d3 visualisations for longitudinal Δ, diversity PCA, Delphi, scenario polarity, synthesis"
+```
+
+---
+
+### Task 20: Auto-trigger pre-survey on simulation `ready`
+
+**Files:**
+- Create: `backend/app/services/interviews/lifecycle.py`
+- Modify: `backend/app/__init__.py` (app factory) to install the hook
+
+- [ ] **Step 1: Write failing test**
+
+```python
+# backend/tests/interviews/test_lifecycle.py
+from app.services.interviews.lifecycle import install_hooks
+
+class _StubMgr:
+    def __init__(self): self.ready = []; self.completed = []
+    def register_on_ready(self, fn): self.ready.append(fn)
+    def register_on_completed(self, fn): self.completed.append(fn)
+
+def test_install_hooks_registers_two_callables():
+    mgr = _StubMgr()
+    install_hooks(mgr)
+    assert len(mgr.ready) == 1
+    assert len(mgr.completed) == 1
+    assert callable(mgr.ready[0])
+    assert callable(mgr.completed[0])
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+Run: `cd backend && uv run pytest tests/interviews/test_lifecycle.py -v`
+Expected: ImportError.
+
+- [ ] **Step 3: Implement lifecycle hook installer**
+
+`backend/app/services/interviews/lifecycle.py`:
+```python
+from __future__ import annotations
+import threading
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+def install_hooks(manager) -> None:
+    """Attach interview lifecycle callbacks to a SimulationManager.
+
+    on_ready  → spawn T0 longitudinal in a background thread
+    on_completed → spawn full post-sim batch in a background thread
+    Hooks are best-effort; failures only log.
+    """
+    def _on_ready(state) -> None:
+        sim_id = getattr(state, "sim_id", None) or getattr(state, "id", None)
+        if not sim_id: return
+        threading.Thread(target=_run_pre, args=(sim_id,), daemon=True).start()
+
+    def _on_completed(state) -> None:
+        sim_id = getattr(state, "sim_id", None) or getattr(state, "id", None)
+        if not sim_id: return
+        threading.Thread(target=_run_post, args=(sim_id,), daemon=True).start()
+
+    manager.register_on_ready(_on_ready)
+    manager.register_on_completed(_on_completed)
+
+def _run_pre(sim_id: str) -> None:
+    try:
+        from app.api.interview import _build_orchestrator
+        orch = _build_orchestrator(sim_id)
+        orch.run_pre()
+    except Exception as e:
+        logger.warning(f"auto pre-survey failed for {sim_id}: {e!r}")
+
+def _run_post(sim_id: str) -> None:
+    try:
+        from app.api.interview import _build_orchestrator
+        from app.services.interview_synthesizer import InterviewSynthesizer
+        orch = _build_orchestrator(sim_id)
+        orch.run_post()
+        InterviewSynthesizer(store=orch.store).run()
+    except Exception as e:
+        logger.warning(f"auto post-survey failed for {sim_id}: {e!r}")
+```
+
+- [ ] **Step 4: Wire into app factory**
+
+Read `backend/app/__init__.py`. Locate where `SimulationManager` (or its singleton) is instantiated. Add:
+```python
+    from app.services.interviews.lifecycle import install_hooks
+    install_hooks(simulation_manager)
+```
+immediately after the manager is constructed. If `simulation_manager` is module-level in `simulation_manager.py`, attach the hooks at the bottom of that module instead — the goal is "install once on app startup".
+
+- [ ] **Step 5: Run test to verify it passes**
+
+Run: `cd backend && uv run pytest tests/interviews/test_lifecycle.py -v`
+Expected: 1 passed.
+
+- [ ] **Step 6: Full backend test suite**
+
+Run: `cd backend && uv run pytest -m "not integration" -q`
+Expected: all unit tests pass.
+
+Run: `cd backend && uv run pytest -m integration -q`
+Expected: integration tests pass.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add backend/app/services/interviews/lifecycle.py backend/app/__init__.py backend/tests/interviews/test_lifecycle.py
+git commit -m "feat(interviews): auto-trigger pre and post interviews via SimulationManager lifecycle hooks"
+```
+
+---
+
+## Final verification
+
+- [ ] **Run full backend test suite**
+
+Run: `cd backend && uv run pytest -q`
+Expected: every test passes.
+
+- [ ] **Run frontend build**
+
+Run: `cd frontend && npm run build`
+Expected: build succeeds with no errors.
+
+- [ ] **Smoke test the running app**
+
+Run: `npm run dev` from project root. With an existing completed simulation:
+1. Navigate to Step4b in the UI
+2. Click "Run all post-simulation interviews"
+3. Wait for status to reach `completed`
+4. Verify each of the five tabs renders without console errors
+5. Click "Download CSV" and confirm the file downloads
+
+- [ ] **Verify spec coverage**
+
+Re-open `docs/superpowers/specs/2026-05-23-stakeholder-interview-subagents-design.md` and confirm every section in the spec has a corresponding task:
+
+- §3 architectural approach (deterministic runners) → Tasks 5–9
+- §4 file structure + lifecycle hooks → Tasks 2–14, 20
+- §5.1–5.4 four instruments → Tasks 6, 7, 8, 9
+- §5.5 in-character prompting + structured output + cost guardrails → Tasks 4, 5
+- §6.1 storage layout → Task 10
+- §6.2 Zep integration → Task 11
+- §6.3 API surface (all 7 endpoints) → Task 16
+- §6.4 parallelism + token guard → Task 12 (parallelism); token guard sits in `Config.INTERVIEW_MAX_TOKENS_PER_RUN` from Task 1 — *open: enforcement not implemented in v1; flag if you want it added*
+- §6.5 frontend Step4b + per-tab viz → Tasks 18, 19
+- §7 error handling (per-agent isolation, schema retry, idempotency) → Tasks 5, 10, 12
+- §8 validation (schema, instrument, plausibility flags) → Tasks 2, 3 (schema + instrument); plausibility-flags currently sit inside synthesiser §10 — *check that flagged thresholds in §8 plausibility checks match what synthesiser currently emits*
+- §9 testing (unit per subagent + integration + stub mode) → Tasks 4, 6–9, 12, 17
+- §10 methodological caveats in synthesis → Task 14
+- §11 defaults — already encoded in Task 1 config keys and instrument YAML
+
+If §6.4 token-guard enforcement is needed for v1, add a small follow-up task that computes a projected-token estimate before `run_post` and returns 400 with `confirm=true` override — but the spec keeps this as a guard, not a blocker, so it can ship in v1.1.
+
+---
+
+**Plan complete and saved to `docs/superpowers/plans/2026-05-23-stakeholder-interview-subagents.md`. Two execution options:**
+
+**1. Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration.
+
+**2. Inline Execution** — Execute tasks in this session using executing-plans, batch execution with checkpoints.
+
+**Which approach?**
+
diff --git a/docs/superpowers/specs/2026-05-23-stakeholder-interview-subagents-design.md b/docs/superpowers/specs/2026-05-23-stakeholder-interview-subagents-design.md
new file mode 100644
index 00000000..f82a7ec7
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-23-stakeholder-interview-subagents-design.md
@@ -0,0 +1,280 @@
+# Stakeholder Interview Subagents — Design Spec
+
+- **Date:** 2026-05-23
+- **Project:** MiroFish (multi-agent simulation engine for German fisheries discourse)
+- **Author:** Christian Möllmann (with Claude Code)
+- **Status:** Approved design — pending implementation plan
+
+## 1. Purpose
+
+After the OASIS Twitter + Reddit simulation produces a population of in-character stakeholder agents (fishers, NGOs, policy actors, scientists, consumers, etc.) grounded in a German fisheries discourse knowledge graph, we want to interrogate each agent individually with a structured questionnaire about the future of German fisheries.
+
+Four methodologies run as independent subagents over the same agent population:
+
+1. **Longitudinal** — pre/post Likert to measure opinion drift induced by simulated peer interaction
+2. **Diversity** — Q-sort + multi-dim Likert to map the value space and derive a stakeholder typology
+3. **Delphi** — three-round consensus probing to identify where stakeholder views converge vs. stay polarised
+4. **Scenario** — rating of 4 pre-defined 2040 scenarios on desirability, plausibility, group-impact, fairness
+
+A synthesiser combines the four outputs into a single cross-method report.
+
+## 2. Non-goals (v1)
+
+- Real-time WebSocket streaming of interview progress (polling suffices)
+- Adaptive instruments / IRT calibration
+- Web UI for editing instruments (YAML + restart is fine)
+- Cross-simulation comparison endpoints (CSV exports support this externally)
+- Multi-language support beyond DE / EN
+
+## 3. Architectural approach
+
+**Chosen approach: Deterministic instrument runners.** Each subagent is a fixed protocol, not a ReACT loop. Rationale: fisheries futures methodology favours instrument fidelity (every stakeholder sees the same scale) over agent autonomy; results must be directly tabularisable for downstream analysis in pandas/R.
+
+Rejected:
+- *ReACT-style subagents* — non-deterministic, ~3–10× cost, can't guarantee every agent answered every item
+- *Single InterviewService with mode enum* — couples four distinct methodologies (especially multi-round Delphi and two-phase Longitudinal) into one growing class
+
+## 4. System architecture
+
+```
+                    InterviewOrchestrator
+                          │
+   ┌──────────────┬───────┴───────┬──────────────┐
+   ▼              ▼               ▼              ▼
+Longitudinal   Diversity        Delphi       Scenario
+Subagent       Subagent         Subagent     Subagent
+   │              │               │              │
+   └──────────────┴──────┬────────┴──────────────┘
+                         ▼
+              StakeholderInterviewer (base)
+                         │
+       ┌─────────────────┼─────────────────┐
+       ▼                 ▼                 ▼
+   LLMClient        ZepEntityReader   ProfileLoader
+   (in-character)   (memory digest)   (reddit/twitter)
+                         │
+                         ▼
+       uploads/.../interviews/    +    Zep episodes
+```
+
+### 4.1 New files
+
+| Path | Purpose |
+|---|---|
+| `backend/app/services/interviews/base.py` | `StakeholderInterviewer` — persona+memory loading, in-character prompting, retry/validation |
+| `backend/app/services/interviews/longitudinal.py` | Pre/post Likert |
+| `backend/app/services/interviews/diversity.py` | Q-sort + multi-dim value-space mapping |
+| `backend/app/services/interviews/delphi.py` | Three-round consensus |
+| `backend/app/services/interviews/scenario.py` | Scenario rating |
+| `backend/app/services/interview_orchestrator.py` | Fan-out, parallel execution, two-phase lifecycle |
+| `backend/app/services/interview_synthesizer.py` | Cross-method narrative report |
+| `backend/app/api/interview.py` | New Flask blueprint `/api/interview/*` |
+| `backend/app/models/interview.py` | Pydantic schemas for instruments + responses |
+| `backend/scripts/instruments/*.yaml` | Editable instrument definitions (one YAML per subagent) |
+| `frontend/src/components/Step4bInterviews.vue` | Four tabs + synthesis tab |
+| `backend/tests/interviews/` | Unit tests per subagent + base + orchestrator + synthesiser |
+| `tests/integration/test_interview_pipeline.py` | End-to-end with stub LLM + disposable Zep graph |
+
+### 4.2 Lifecycle integration
+
+Two hooks added to `backend/app/services/simulation_manager.py`:
+
+- `on_ready()` — automatically triggers Longitudinal T0 (pre-simulation baseline)
+- `on_completed()` — queues a `task_id` running Longitudinal T1 + Diversity + Delphi + Scenario in parallel, then Synthesiser
+
+The two-phase split is **non-negotiable**: Longitudinal needs T0 captured before OASIS exposes agents to peer-generated content, otherwise drift is unmeasurable.
+
+## 5. Instrument design
+
+All instruments live in `backend/scripts/instruments/*.yaml` so content is editable without redeploying. Items default to German, translatable via existing locale system.
+
+### 5.1 Longitudinal — opinion drift
+
+- 12–15 item 5-point Likert ("lehne stark ab" → "stimme stark zu")
+- Administered at T0 (post-persona, pre-OASIS) and T1 (post-OASIS)
+- Item families (3–4 each): stock status & recovery; governance & CFP; market & MSC; climate & adaptation
+- Per-agent output: response value + LLM self-reported confidence per item + one open comment
+- Aggregate: Δ-matrix (N × M items), per-item Wilcoxon signed-rank, per-agent total drift magnitude
+
+### 5.2 Diversity — typology mapping
+
+- One-shot, post-simulation only
+- **Part A (Q-sort lite):** 24 statements sorted onto forced quasi-normal distribution from −3 to +3
+- **Part B:** 6 multi-dim Likert axes (preservation↔extraction, local↔EU, science-led↔tradition-led, individual↔collective, short-term↔long-term, market↔regulation)
+- Per-agent output: vector ∈ ℝ^30
+- Aggregate: PCA + k-means → 3–5 stakeholder clusters with archetype descriptions + cluster-membership probabilities
+
+### 5.3 Delphi — consensus probing
+
+- Three rounds, fully automated
+- **R1 (open):** 4 open questions; LLM extracts thematic codes from responses
+- **R2 (rate):** Agent sees anonymised list of all unique themes; rates each on importance (1–5) + plausibility (1–5)
+- **R3 (revise):** Agent sees group median + IQR per theme; can revise own ratings; free-text justification
+- Aggregate: per-theme convergence (Δ-IQR R2→R3), persistent disagreements (IQR > 2), ranked consensus statements
+
+### 5.4 Scenario — futures evaluation
+
+Four 2040 scenarios (YAML-editable):
+
+- **S1 "Erholung"** — cod and herring recover, MSC ubiquitous, small-scale fleet stabilises
+- **S2 "Kollaps"** — both stocks collapse, fleet halved, aquaculture dominant
+- **S3 "Festung Europa"** — protectionist EU policy, MPAs cover 30%, recreational fishing curtailed
+- **S4 "Privatisierung"** — ITQs, consolidation, large operators only
+
+Each agent rates each scenario on 4 dimensions (1–7 Likert): desirability, plausibility, impact-on-my-group, fairness. Plus one open question per scenario: "If you woke up in this 2040, what would you do?"
+
+Aggregate: 4 × 4 per-agent matrix + open-text corpus → polarity charts (desirability × plausibility by stakeholder type), narrative themes.
+
+### 5.5 Cross-cutting
+
+**In-character prompting.** Every LLM call uses a system prompt of the form:
+
+> You are [persona_text]. You are answering a survey about the future of German fisheries. Answer strictly in character based on your background, values, and what you experienced during the simulated social media discourse summarised below: [Zep memory digest]. Return JSON only.
+
+Memory digest comes from `ZepEntityReader.get_entity_with_context()`.
+
+**Structured output enforced.** Every response goes through `LLMClient.chat_json()` with a per-instrument JSON schema. One auto-retry on schema violation; agent flagged in audit log on second failure.
+
+**Cost guardrails.** Longitudinal × 2 phases + Delphi × 3 rounds is heaviest. For N=50 agents and ~100 LLM calls per agent across all 4 subagents, budget ~5k calls / 5–10M tokens per simulation. Persona system prompts stay constant within a subagent run → cacheable.
+
+## 6. Data flow and storage
+
+### 6.1 Storage layout
+
+```
+uploads/simulations/{sim_id}/interviews/
+├── instruments_used.json          # frozen snapshot of YAML at run-time
+├── T0/
+│   └── longitudinal/
+│       ├── responses.jsonl
+│       ├── audit.jsonl            # raw LLM I/O, retries, validation failures
+│       └── aggregate.json
+├── T1/
+│   ├── longitudinal/{same structure}
+│   ├── diversity/
+│   │   ├── responses.jsonl
+│   │   ├── typology.json
+│   │   └── pca.json
+│   ├── delphi/
+│   │   ├── round1_themes.jsonl
+│   │   ├── round2_ratings.jsonl
+│   │   ├── round3_revisions.jsonl
+│   │   └── convergence.json
+│   └── scenario/
+│       ├── responses.jsonl
+│       └── polarity_matrix.json
+└── synthesis/
+    ├── report.md
+    └── exports/
+        ├── all_responses.csv      # tidy long format
+        └── codebook.json
+```
+
+JSONL for raw responses (append-safe, streams cleanly); JSON for aggregates; CSV for analysis hand-off. `instruments_used.json` snapshot is critical for reproducibility when YAML is later edited.
+
+### 6.2 Zep integration
+
+Two write patterns, both reusing `ZepGraphMemoryUpdater.add_activity()`:
+
+- **Per-agent episode** — after each subagent finishes for an agent, write one episode: `"Agent {name} (interview/{subagent}/{phase}): {short summary of stance}"`. The existing ReportAgent can retrieve interview content via its current `panorama_search` / `insight_forge` tools without changes.
+- **Aggregate episodes** — after each subagent's aggregate step, write one summary episode per cluster / theme / scenario.
+
+No new Zep schemas. No new entity types. Interviews are just more episodes — append-only, safe.
+
+### 6.3 API surface
+
+New blueprint `/api/interview`:
+
+| Method | Path | Purpose |
+|---|---|---|
+| `POST` | `/api/interview/{sim_id}/pre` | Trigger T0 longitudinal (auto on READY, manual for re-runs) |
+| `POST` | `/api/interview/{sim_id}/post` | Trigger all 4 post-sim subagents; returns `task_id` |
+| `GET`  | `/api/interview/{sim_id}/status?task_id=...` | Per-subagent progress |
+| `GET`  | `/api/interview/{sim_id}/results/{subagent}` | Aggregate JSON for one subagent |
+| `GET`  | `/api/interview/{sim_id}/results/synthesis` | Full synthesis report |
+| `GET`  | `/api/interview/{sim_id}/export.csv` | Tidy long-format CSV across all 4 subagents |
+| `POST` | `/api/interview/{sim_id}/rerun` | Re-run one subagent (e.g. after editing YAML) |
+
+All responses follow the existing `{success, data, error}` envelope. Polling reuses `models/task.py`.
+
+### 6.4 Parallelism
+
+- Within a subagent: `ThreadPoolExecutor(max_workers=8)` for per-agent LLM calls
+- Across the 4 post-sim subagents: parallel, except Delphi (sequential rounds internally)
+- Synthesiser waits for all four
+- Token budget guard: `Config.INTERVIEW_MAX_TOKENS_PER_RUN`; if projected cost exceeds, API returns 400 with dry-run estimate and `confirm=true` override
+
+### 6.5 Frontend
+
+New `Step4bInterviews.vue` between current Step4 (report) and Step5 (interaction). Four tabs (one per subagent) + a synthesis tab. Each tab shows progress bar during run, then results: Likert heatmap (longitudinal Δ), PCA scatter (diversity), convergence chart (Delphi), polarity quadrants (scenario). Download button per tab pulls the CSV export.
+
+## 7. Error handling
+
+**Per-agent failures are isolated.** If agent 17 times out or fails JSON validation twice, agent 17 is marked `failed` in `audit.jsonl`; the rest of the run continues. Aggregates report `n_responded` / `n_total` honestly.
+
+| Failure | Handling |
+|---|---|
+| LLM timeout / 5xx | Exponential-backoff retry (3 attempts) via existing `LLMClient`; then mark agent failed |
+| JSON schema violation | One auto-retry with explicit corrective instruction; then mark failed |
+| Likert out-of-range / missing items | Re-ask only the bad items; if still bad, item-level missing |
+| Zep memory fetch fails | Run without memory digest; flag in audit (`memory_available: false`); down-weight in drift analysis |
+| Whole-subagent crash | Other 3 continue; synthesiser runs on what completed and flags the gap |
+| Token budget exceeded | Pause, write partial results, return 503 with `resume_token` |
+
+**Idempotency.** Every subagent run is keyed by `(sim_id, subagent, phase, run_id)`. Re-runs write a new `run_id` directory; never overwrite. A `latest.json` pointer tracks the canonical run.
+
+## 8. Validation
+
+Three layers:
+
+1. **Schema validation** — pydantic models for every response; JSONL files validated on write
+2. **Instrument validation** — `validate_instrument(yaml)` pre-flight: required fields, scale coherence, no duplicate item_ids, DE+EN both present if i18n enabled
+3. **Plausibility checks** on aggregates (flag, don't kill):
+   - Longitudinal: >80% zero drift on every item OR >80% flip — likely a prompting bug or acquiescence bias
+   - Diversity: first two PCA components explain <30% of variance — instrument not discriminating
+   - Delphi: R3 ratings identical to R2 for >90% of agents — no engagement with anonymised feedback
+   - Scenario: all agents rate all scenarios identically on `desirability` — instrument failure
+
+Flags surface in the synthesis report under "instrument health" so the user can decide whether data is publishable.
+
+## 9. Testing
+
+**Unit tests** (`backend/tests/interviews/`):
+
+- `test_instruments.py` — every YAML parses and validates
+- `test_base_interviewer.py` — persona+memory loading, in-character prompt construction, schema-retry logic (mock `LLMClient`)
+- One file per subagent — happy path + each failure mode in §7
+- `test_orchestrator.py` — fan-out, partial failures, two-phase ordering (T0 before T1)
+- `test_synthesizer.py` — missing-subagent handling, stable output shape
+
+**Integration test** (`tests/integration/test_interview_pipeline.py`):
+
+End-to-end with N=5 agents against a recorded LLM cassette. Verifies T0 at READY, T1 + 3 others at COMPLETED, CSV export well-formed, Zep episodes written.
+
+**Stub LLM mode** (`Config.LLM_STUB_MODE=true`) returns deterministic canned responses keyed by `(subagent, item_id, persona_hash)`. Full pipeline exercisable in CI for free.
+
+**Zep**: disposable graph in integration tests (consistent with project conventions); unit tests stub.
+
+## 10. Methodological caveats (auto-emitted in synthesis)
+
+The synthesiser **always** emits a "Limitations" section, programmatically generated from run metadata:
+
+- **Simulated, not real stakeholders.** Responses reflect how the seed-document discourse + LLM jointly encode each stakeholder type, not what actual fishers / NGO staff would say. The instrument measures the *model of the stakeholder*, not the stakeholder.
+- **Memory digest is lossy.** Each agent's "experience" of OASIS is summarised to bounded length; agents do not have full episodic recall.
+- **LLM acquiescence and centrality bias.** Likert with LLM respondents skews toward 3–4 of 5; per-item distribution shape statistics are reported.
+- **N is what it is.** `n_total` and `n_responded` printed verbatim; no rounding, no smoothing.
+- **Instrument provenance.** Hash of `instruments_used.json` printed so future-you can rebuild the exact instrument.
+
+This section is load-bearing for any publication: it makes the system intellectually defensible rather than a black box.
+
+## 11. Defaulted decisions (revisit later if needed)
+
+- **N agents:** assumed 50, driven from existing simulation config; if you typically run more/fewer, cost guardrail threshold needs adjusting
+- **Default instrument language:** German with English fallback in YAML
+- **Delphi rounds = 3:** classic Delphi can run more; 3 is the methodological floor and the cost ceiling here
+
+## 12. Open questions for implementation phase
+
+- Whether to write a separate `instruments_changelog.md` per run, or embed change tracking in `instruments_used.json` metadata
+- Whether the synthesiser should write into Zep as a single mega-episode or stay file-only (current design: file-only, plus the per-agent + per-aggregate episodes from each subagent)
+- Whether `Step4bInterviews.vue` should sit strictly after Step4 (current design) or render in parallel — interviews depend on the simulation having reached `completed` (Step3 output) and on the `graph_id` (created in Step1); they do not depend on Step4's ReportAgent run, so a parallel layout is technically possible
diff --git a/frontend/src/api/interview.js b/frontend/src/api/interview.js
new file mode 100644
index 00000000..0f5cdbf5
--- /dev/null
+++ b/frontend/src/api/interview.js
@@ -0,0 +1,29 @@
+import service from './index'
+
+export async function startPre(simId) {
+  const r = await service.post(`/api/interview/${simId}/pre`)
+  return r
+}
+export async function startPost(simId) {
+  const r = await service.post(`/api/interview/${simId}/post`)
+  return r
+}
+export async function rerun(simId, subagent) {
+  const r = await service.post(`/api/interview/${simId}/rerun`, { subagent })
+  return r
+}
+export async function getStatus(simId, taskId) {
+  const r = await service.get(`/api/interview/${simId}/status`, { params: { task_id: taskId } })
+  return r
+}
+export async function getResults(simId, subagent) {
+  const r = await service.get(`/api/interview/${simId}/results/${subagent}`)
+  return r
+}
+export async function getSynthesis(simId) {
+  const r = await service.get(`/api/interview/${simId}/results/synthesis`)
+  return r
+}
+export function exportCsvUrl(simId) {
+  return `/api/interview/${simId}/export.csv`
+}
diff --git a/frontend/src/components/Step4bInterviews.vue b/frontend/src/components/Step4bInterviews.vue
new file mode 100644
index 00000000..d2aed844
--- /dev/null
+++ b/frontend/src/components/Step4bInterviews.vue
@@ -0,0 +1,79 @@
+<template>
+  <section class="step4b">
+    <header>
+      <h2>{{ t('interview.title') }}</h2>
+      <p class="subtitle">{{ t('interview.subtitle') }}</p>
+    </header>
+
+    <div class="actions">
+      <button :disabled="busy" @click="startPostRun">{{ t('interview.runAll') }}</button>
+      <a :href="csvUrl" target="_blank" rel="noopener">{{ t('interview.downloadCsv') }}</a>
+    </div>
+
+    <nav class="tabs">
+      <button v-for="tab in tabs" :key="tab.id"
+              :class="{ active: active === tab.id }"
+              @click="active = tab.id">
+        {{ tab.label }}
+      </button>
+    </nav>
+
+    <component :is="currentPanel" :sim-id="simId" :status="status" />
+  </section>
+</template>
+
+<script setup>
+import { computed, ref } from 'vue'
+import { useI18n } from 'vue-i18n'
+import LongitudinalPanel from './interviews/LongitudinalPanel.vue'
+import DiversityPanel from './interviews/DiversityPanel.vue'
+import DelphiPanel from './interviews/DelphiPanel.vue'
+import ScenarioPanel from './interviews/ScenarioPanel.vue'
+import SynthesisPanel from './interviews/SynthesisPanel.vue'
+import { startPost, getStatus, exportCsvUrl } from '../api/interview'
+
+const props = defineProps({ simId: { type: String, required: true } })
+const { t } = useI18n()
+const tabs = [
+  { id: 'longitudinal', label: t('interview.tab.longitudinal') },
+  { id: 'diversity',    label: t('interview.tab.diversity') },
+  { id: 'delphi',       label: t('interview.tab.delphi') },
+  { id: 'scenario',     label: t('interview.tab.scenario') },
+  { id: 'synthesis',    label: t('interview.tab.synthesis') },
+]
+const active = ref('longitudinal')
+const status = ref({ status: 'idle' })
+const busy = ref(false)
+const csvUrl = computed(() => exportCsvUrl(props.simId))
+
+const panels = {
+  longitudinal: LongitudinalPanel, diversity: DiversityPanel,
+  delphi: DelphiPanel, scenario: ScenarioPanel, synthesis: SynthesisPanel,
+}
+const currentPanel = computed(() => panels[active.value])
+
+async function startPostRun() {
+  busy.value = true
+  try {
+    const res = await startPost(props.simId)
+    if (!res.success) throw new Error(res.error || 'failed to start')
+    await poll(res.data.task_id)
+  } finally { busy.value = false }
+}
+
+async function poll(taskId) {
+  while (true) {
+    const r = await getStatus(props.simId, taskId)
+    status.value = r.data
+    if (['completed', 'failed'].includes(r.data.status)) break
+    await new Promise(resolve => setTimeout(resolve, 1500))
+  }
+}
+</script>
+
+<style scoped>
+.step4b { padding: 1rem; }
+.tabs { display: flex; gap: .5rem; margin: 1rem 0; }
+.tabs button.active { font-weight: 700; border-bottom: 2px solid #333; }
+.actions { display: flex; gap: 1rem; align-items: center; }
+</style>
diff --git a/frontend/src/components/interviews/DelphiPanel.vue b/frontend/src/components/interviews/DelphiPanel.vue
new file mode 100644
index 00000000..c111d0d3
--- /dev/null
+++ b/frontend/src/components/interviews/DelphiPanel.vue
@@ -0,0 +1,58 @@
+<template>
+  <div class="panel">
+    <h3>Delphi convergence (R1→R3)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 640, height = 420
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    // service interceptor returns the envelope {success, data, error} directly
+    const r = await getResults(props.simId, 'delphi')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate)
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(agg) {
+  const themes = agg.themes || []
+  if (!themes.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 80, left: 60 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleBand().domain(themes.map(t => t.theme_id)).range([0, w]).padding(0.15)
+  const y = d3.scaleLinear().domain([0, agg.n_r1 || 1]).range([h, 0])
+  const bars = themes.map((t) => ({
+    theme: t.theme_id, label: t.label,
+    nr1: agg.n_r1, nr2: agg.n_r2, nr3: agg.n_r3,
+  }))
+  g.selectAll('rect').data(bars).enter().append('rect')
+    .attr('x', d => x(d.theme)).attr('y', d => y(d.nr3))
+    .attr('width', x.bandwidth()).attr('height', d => h - y(d.nr3))
+    .attr('fill', d3.schemeCategory10[2])
+  g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
+    .selectAll('text').attr('transform', 'rotate(-30)').attr('text-anchor', 'end')
+  g.append('g').call(d3.axisLeft(y))
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
diff --git a/frontend/src/components/interviews/DiversityPanel.vue b/frontend/src/components/interviews/DiversityPanel.vue
new file mode 100644
index 00000000..558d8526
--- /dev/null
+++ b/frontend/src/components/interviews/DiversityPanel.vue
@@ -0,0 +1,63 @@
+<template>
+  <div class="panel">
+    <h3>Stakeholder typology (PCA)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 640, height = 480
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    // service interceptor returns the envelope {success, data, error} directly
+    const r = await getResults(props.simId, 'diversity')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate)
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(agg) {
+  // The /results endpoint returns aggregate.json which contains clusters + agent_ids.
+  // For v1 use clusters only, distributing them across a notional 2D layout per cluster.
+  const clusters = agg.clusters || []
+  if (!clusters.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 30, left: 30 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const points = []
+  clusters.forEach((c, i) => {
+    (c.agent_ids || []).forEach((aid, k) => {
+      const angle = (i / clusters.length) * 2 * Math.PI
+      const radius = (k % 5 + 1) * 0.15 + 0.2
+      points.push({ x: 0.5 + Math.cos(angle) * radius, y: 0.5 + Math.sin(angle) * radius,
+                    cluster: c.cluster_id, agent_id: aid })
+    })
+  })
+  const x = d3.scaleLinear().domain([0, 1]).range([0, w])
+  const y = d3.scaleLinear().domain([0, 1]).range([h, 0])
+  const color = d3.scaleOrdinal(d3.schemeCategory10)
+  g.selectAll('circle').data(points).enter().append('circle')
+    .attr('cx', d => x(d.x)).attr('cy', d => y(d.y)).attr('r', 5)
+    .attr('fill', d => color(d.cluster)).attr('opacity', .7)
+    .append('title').text(d => `agent ${d.agent_id} · cluster ${d.cluster}`)
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
diff --git a/frontend/src/components/interviews/LongitudinalPanel.vue b/frontend/src/components/interviews/LongitudinalPanel.vue
new file mode 100644
index 00000000..1596e93b
--- /dev/null
+++ b/frontend/src/components/interviews/LongitudinalPanel.vue
@@ -0,0 +1,63 @@
+<template>
+  <div class="panel">
+    <h3>Longitudinal Δ (T0 → T1)</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null)
+const loading = ref(true)
+const error = ref(null)
+const width = 640
+const height = 360
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    // service interceptor returns the envelope {success, data, error} directly
+    const r = await getResults(props.simId, 'longitudinal')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate)
+  } catch (e) { error.value = String(e) }
+  finally { loading.value = false }
+}
+
+function draw(agg) {
+  const items = Object.entries(agg.per_item || {})
+  if (items.length === 0) return
+  const svg = d3.select(chart.value)
+  svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 60, left: 80 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleBand().domain(items.map(([k]) => k)).range([0, w]).padding(0.1)
+  const y = d3.scaleLinear().domain([-4, 4]).range([h, 0])
+  const color = d3.scaleDiverging(d3.interpolateRdBu).domain([-4, 0, 4])
+  g.selectAll('rect').data(items).enter().append('rect')
+    .attr('x', d => x(d[0]))
+    .attr('y', d => y(Math.max(0, d[1].mean_delta || 0)))
+    .attr('width', x.bandwidth())
+    .attr('height', d => Math.abs(y(d[1].mean_delta || 0) - y(0)))
+    .attr('fill', d => color(d[1].mean_delta || 0))
+  g.append('g').attr('transform', `translate(0,${y(0)})`)
+    .call(d3.axisBottom(x)).selectAll('text')
+    .attr('transform', 'rotate(-40)').attr('text-anchor', 'end')
+  g.append('g').call(d3.axisLeft(y))
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
diff --git a/frontend/src/components/interviews/ScenarioPanel.vue b/frontend/src/components/interviews/ScenarioPanel.vue
new file mode 100644
index 00000000..ddc85b2b
--- /dev/null
+++ b/frontend/src/components/interviews/ScenarioPanel.vue
@@ -0,0 +1,66 @@
+<template>
+  <div class="panel">
+    <h3>Scenarios: desirability × plausibility</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <svg v-else ref="chart" :width="width" :height="height"></svg>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import * as d3 from 'd3'
+import { getResults } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const chart = ref(null); const loading = ref(true); const error = ref(null)
+const width = 520, height = 520
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    // service interceptor returns the envelope {success, data, error} directly
+    const r = await getResults(props.simId, 'scenario')
+    if (!r.success) { error.value = r.error; return }
+    draw(r.data.aggregate.polarity || {})
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+
+function draw(polarity) {
+  const pts = Object.entries(polarity)
+    .filter(([, v]) => v && v.n > 0)
+    .map(([sid, v]) => ({
+      sid, x: v.mean_plausibility, y: v.mean_desirability,
+      n: v.n, sdx: v.sd_plausibility, sdy: v.sd_desirability,
+    }))
+  if (!pts.length) return
+  const svg = d3.select(chart.value); svg.selectAll('*').remove()
+  const margin = { top: 20, right: 20, bottom: 40, left: 40 }
+  const w = width - margin.left - margin.right
+  const h = height - margin.top - margin.bottom
+  const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`)
+  const x = d3.scaleLinear().domain([1, 7]).range([0, w])
+  const y = d3.scaleLinear().domain([1, 7]).range([h, 0])
+  g.append('line').attr('x1', 0).attr('x2', w).attr('y1', y(4)).attr('y2', y(4)).attr('stroke', '#ccc')
+  g.append('line').attr('x1', x(4)).attr('x2', x(4)).attr('y1', 0).attr('y2', h).attr('stroke', '#ccc')
+  g.selectAll('circle').data(pts).enter().append('circle')
+    .attr('cx', d => x(d.x)).attr('cy', d => y(d.y))
+    .attr('r', d => 6 + Math.sqrt(d.n))
+    .attr('fill', d3.schemeCategory10[1]).attr('opacity', .7)
+  g.selectAll('text.lbl').data(pts).enter().append('text')
+    .attr('class', 'lbl').attr('x', d => x(d.x) + 8).attr('y', d => y(d.y))
+    .text(d => `${d.sid} (n=${d.n})`)
+  g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x))
+  g.append('g').call(d3.axisLeft(y))
+  g.append('text').attr('x', w/2).attr('y', h+34).attr('text-anchor', 'middle').text('plausibility')
+  g.append('text').attr('transform', `rotate(-90)`).attr('x', -h/2).attr('y', -28)
+    .attr('text-anchor', 'middle').text('desirability')
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+</style>
diff --git a/frontend/src/components/interviews/SynthesisPanel.vue b/frontend/src/components/interviews/SynthesisPanel.vue
new file mode 100644
index 00000000..e435b4d2
--- /dev/null
+++ b/frontend/src/components/interviews/SynthesisPanel.vue
@@ -0,0 +1,34 @@
+<template>
+  <div class="panel">
+    <h3>Synthesis</h3>
+    <div v-if="loading">Loading…</div>
+    <div v-else-if="error">{{ error }}</div>
+    <pre v-else class="report">{{ report }}</pre>
+  </div>
+</template>
+
+<script setup>
+import { onMounted, ref, watch } from 'vue'
+import { getSynthesis } from '../../api/interview'
+
+const props = defineProps({ simId: String, status: Object })
+const loading = ref(true); const error = ref(null); const report = ref('')
+
+watch(() => props.status?.status, (s) => { if (s === 'completed') load() })
+onMounted(load)
+
+async function load() {
+  loading.value = true; error.value = null
+  try {
+    // service interceptor returns the envelope {success, data, error} directly
+    const r = await getSynthesis(props.simId)
+    if (!r.success) { error.value = r.error; return }
+    report.value = r.data.report_markdown
+  } catch (e) { error.value = String(e) } finally { loading.value = false }
+}
+</script>
+
+<style scoped>
+.panel { padding: .5rem; }
+.report { white-space: pre-wrap; font-family: ui-monospace, monospace; line-height: 1.4; }
+</style>
diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js
index 62d23201..30b072b8 100644
--- a/frontend/src/router/index.js
+++ b/frontend/src/router/index.js
@@ -4,6 +4,7 @@ import Process from '../views/MainView.vue'
 import SimulationView from '../views/SimulationView.vue'
 import SimulationRunView from '../views/SimulationRunView.vue'
 import ReportView from '../views/ReportView.vue'
+import InterviewView from '../views/InterviewView.vue'
 import InteractionView from '../views/InteractionView.vue'
 
 const routes = [
@@ -36,6 +37,12 @@ const routes = [
     component: ReportView,
     props: true
   },
+  {
+    path: '/interview/:simulationId',
+    name: 'Interview',
+    component: InterviewView,
+    props: true
+  },
   {
     path: '/interaction/:reportId',
     name: 'Interaction',
diff --git a/frontend/src/views/InterviewView.vue b/frontend/src/views/InterviewView.vue
new file mode 100644
index 00000000..767ac9b7
--- /dev/null
+++ b/frontend/src/views/InterviewView.vue
@@ -0,0 +1,192 @@
+<template>
+  <div class="main-view">
+    <!-- Header -->
+    <header class="app-header">
+      <div class="header-left">
+        <div class="brand" @click="router.push('/')">MIROFISH</div>
+      </div>
+
+      <div class="header-center">
+        <div class="view-switcher">
+          <button
+            v-for="mode in ['graph', 'split', 'workbench']"
+            :key="mode"
+            class="switch-btn"
+            :class="{ active: viewMode === mode }"
+            @click="viewMode = mode"
+          >
+            {{ { graph: $t('main.layoutGraph'), split: $t('main.layoutSplit'), workbench: $t('main.layoutWorkbench') }[mode] }}
+          </button>
+        </div>
+      </div>
+
+      <div class="header-right">
+        <LanguageSwitcher />
+        <div class="step-divider"></div>
+        <div class="workflow-step">
+          <span class="step-num">Step 4b/5</span>
+          <span class="step-name">{{ $t('interview.title') }}</span>
+        </div>
+        <div class="step-divider"></div>
+        <span class="status-indicator idle">
+          <span class="dot"></span>
+          {{ $t('common.ready') }}
+        </span>
+      </div>
+    </header>
+
+    <!-- Main Content Area -->
+    <main class="content-area">
+      <!-- Right Panel fills workbench mode -->
+      <div class="panel-wrapper right" :style="rightPanelStyle">
+        <Step4bInterviews :sim-id="currentSimId" />
+      </div>
+    </main>
+  </div>
+</template>
+
+<script setup>
+import { ref, computed } from 'vue'
+import { useRoute, useRouter } from 'vue-router'
+import LanguageSwitcher from '../components/LanguageSwitcher.vue'
+import Step4bInterviews from '../components/Step4bInterviews.vue'
+
+const route = useRoute()
+const router = useRouter()
+
+const currentSimId = ref(route.params.simulationId)
+const viewMode = ref('workbench')
+
+const rightPanelStyle = computed(() => {
+  if (viewMode.value === 'workbench') return { width: '100%', opacity: 1, transform: 'translateX(0)' }
+  if (viewMode.value === 'graph') return { width: '0%', opacity: 0, transform: 'translateX(20px)' }
+  return { width: '50%', opacity: 1, transform: 'translateX(0)' }
+})
+</script>
+
+<style scoped>
+.main-view {
+  display: flex;
+  flex-direction: column;
+  height: 100vh;
+  overflow: hidden;
+  font-family: 'JetBrains Mono', 'Space Grotesk', 'Noto Sans SC', monospace;
+}
+
+.app-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 0 24px;
+  height: 56px;
+  background: #000;
+  color: #fff;
+  flex-shrink: 0;
+  z-index: 10;
+}
+
+.brand {
+  font-size: 1rem;
+  font-weight: 700;
+  letter-spacing: 0.1em;
+  cursor: pointer;
+  transition: opacity 0.2s;
+}
+
+.brand:hover { opacity: 0.8; }
+
+.header-center {
+  position: absolute;
+  left: 50%;
+  transform: translateX(-50%);
+}
+
+.view-switcher {
+  display: flex;
+  gap: 2px;
+  background: #1a1a1a;
+  padding: 3px;
+  border-radius: 4px;
+}
+
+.switch-btn {
+  padding: 4px 12px;
+  font-size: 0.75rem;
+  background: transparent;
+  border: none;
+  color: #666;
+  cursor: pointer;
+  border-radius: 2px;
+  transition: all 0.15s;
+  font-family: inherit;
+}
+
+.switch-btn.active {
+  background: #fff;
+  color: #000;
+}
+
+.header-right {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+}
+
+.step-divider {
+  width: 1px;
+  height: 20px;
+  background: #333;
+}
+
+.workflow-step {
+  display: flex;
+  flex-direction: column;
+  align-items: flex-end;
+}
+
+.step-num {
+  font-size: 0.65rem;
+  color: #666;
+  letter-spacing: 0.05em;
+}
+
+.step-name {
+  font-size: 0.75rem;
+  color: #fff;
+}
+
+.status-indicator {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  font-size: 0.75rem;
+  color: #999;
+}
+
+.dot {
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: #666;
+}
+
+.status-indicator.idle .dot { background: #666; }
+
+.content-area {
+  flex: 1;
+  display: flex;
+  overflow: hidden;
+  position: relative;
+}
+
+.panel-wrapper {
+  overflow: hidden;
+  transition: width 0.35s cubic-bezier(0.4, 0, 0.2, 1),
+              opacity 0.3s ease,
+              transform 0.3s ease;
+}
+
+.panel-wrapper.right {
+  overflow-y: auto;
+}
+</style>
diff --git a/locales/de.json b/locales/de.json
new file mode 100644
index 00000000..4032d4db
--- /dev/null
+++ b/locales/de.json
@@ -0,0 +1,15 @@
+{
+  "interview": {
+    "title": "Stakeholder-Interviews",
+    "subtitle": "Vier unabhängige Befragungen der simulierten Stakeholder-Population.",
+    "runAll": "Alle Post-Simulations-Interviews starten",
+    "downloadCsv": "CSV herunterladen",
+    "tab": {
+      "longitudinal": "Längsschnitt (Δ)",
+      "diversity": "Diversität",
+      "delphi": "Delphi",
+      "scenario": "Szenarien",
+      "synthesis": "Synthese"
+    }
+  }
+}
diff --git a/locales/en.json b/locales/en.json
index 544c68b1..d22cf64f 100644
--- a/locales/en.json
+++ b/locales/en.json
@@ -661,5 +661,18 @@
     "llmSelectAgentFailed": "LLM agent selection failed, using default selection: {error}",
     "generateInterviewQuestionsFailed": "Failed to generate interview questions: {error}",
     "generateInterviewSummaryFailed": "Failed to generate interview summary: {error}"
+  },
+  "interview": {
+    "title": "Stakeholder interviews",
+    "subtitle": "Four independent surveys of the simulated stakeholder population.",
+    "runAll": "Run all post-simulation interviews",
+    "downloadCsv": "Download CSV",
+    "tab": {
+      "longitudinal": "Longitudinal (Δ)",
+      "diversity": "Diversity",
+      "delphi": "Delphi",
+      "scenario": "Scenarios",
+      "synthesis": "Synthesis"
+    }
   }
 }
diff --git a/locales/zh.json b/locales/zh.json
index cd747e2f..71ed6c4b 100644
--- a/locales/zh.json
+++ b/locales/zh.json
@@ -661,5 +661,18 @@
     "llmSelectAgentFailed": "LLM选择Agent失败，使用默认选择: {error}",
     "generateInterviewQuestionsFailed": "生成采访问题失败: {error}",
     "generateInterviewSummaryFailed": "生成采访摘要失败: {error}"
+  },
+  "interview": {
+    "title": "利益相关者访谈",
+    "subtitle": "对模拟利益相关者群体进行的四项独立调查。",
+    "runAll": "运行所有模拟后访谈",
+    "downloadCsv": "下载 CSV",
+    "tab": {
+      "longitudinal": "纵向分析 (Δ)",
+      "diversity": "多样性",
+      "delphi": "德尔菲法",
+      "scenario": "情景分析",
+      "synthesis": "综合分析"
+    }
   }
 }