feat(report): add POST /api/report/<id>/signal — extract miro_signal from simulation report

Adds a canonical machine-readable probability signal endpoint that distils a completed simulation report into a structured prediction thesis. New: backend/app/services/signal_extractor.py - SignalExtractor.extract() calls chat_json() (3 attempts, temp 0.1, step 0.05) against the report markdown and returns a validated MiroSignal dataclass - Validates and normalises all fields: p_yes clamped to [0.01, 0.99], confidence/action enums enforced, action recomputed from p_yes when invalid - _trim_report() keeps the tail (conclusions) for long reports to stay within token limits - _salvage() fallback_parser recovers a minimal signal from partial LLM output using regex probability extraction New endpoint: POST /api/report/<report_id>/signal - 404 if report not found - 400 if report not yet completed or content is empty - 422 if LLM fails after all retry attempts - Returns canonical signal with thesis.{p_yes, confidence, action, regime, summary, drivers, invalidators} — schema_version 1.1 New: backend/tests/services/test_signal_extractor.py - 27 tests covering happy path, field validation/normalisation, report trimming, _salvage fallback, and LLM failure propagation - No real API calls — LLMClient fully mocked Closes #277 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 16:18:50 +01:00 · 2026-03-23 16:18:50 +01:00 · 00a2150365
parent 8fe735c1c9
commit 00a2150365
4 changed files with 565 additions and 0 deletions
--- a/backend/app/api/report.py
+++ b/backend/app/api/report.py
@ -11,6 +11,7 @@ from flask import request, jsonify, send_file
 from . import report_bp
 from ..config import Config
 from ..services.report_agent import ReportAgent, ReportManager, ReportStatus
+from ..services.signal_extractor import SignalExtractor
 from ..services.simulation_manager import SimulationManager
 from ..models.project import ProjectManager
 from ..models.task import TaskManager, TaskStatus
@ -925,6 +926,89 @@ def stream_console_log(report_id: str):
        }), 500


+# ============== 预测信号接口 ==============
+
+@report_bp.route('/<report_id>/signal', methods=['POST'])
+def extract_signal(report_id: str):
+    """
+    从已完成的报告中提取结构化预测信号（miro_signal）
+
+    对报告的 markdown 内容执行一次 LLM 提取，返回可供
+    外部预测市场管道直接消费的规范化概率信号。
+
+    返回：
+        {
+            "success": true,
+            "data": {
+                "signal_id": "uuid",
+                "schema_version": "1.1",
+                "report_id": "report_xxxx",
+                "simulation_id": "sim_xxxx",
+                "generated_at": "2026-...",
+                "thesis": {
+                    "p_yes": 0.73,
+                    "confidence": "high",
+                    "action": "buy_yes",
+                    "regime": "consensus_forming",
+                    "summary": "...",
+                    "drivers": ["...", "..."],
+                    "invalidators": ["...", "..."]
+                }
+            }
+        }
+    """
+    try:
+        report = ReportManager.get_report(report_id)
+
+        if not report:
+            return jsonify({
+                "success": False,
+                "error": f"报告不存在: {report_id}"
+            }), 404
+
+        if report.status != ReportStatus.COMPLETED:
+            return jsonify({
+                "success": False,
+                "error": f"报告尚未完成 (status={report.status.value})，无法提取信号"
+            }), 400
+
+        if not report.markdown_content:
+            return jsonify({
+                "success": False,
+                "error": "报告内容为空，无法提取信号"
+            }), 400
+
+        extractor = SignalExtractor()
+        signal = extractor.extract(
+            report_id=report_id,
+            simulation_id=report.simulation_id,
+            markdown_content=report.markdown_content,
+            simulation_requirement=report.simulation_requirement,
+        )
+
+        logger.info(f"信号提取完成: report={report_id} p_yes={signal.p_yes} action={signal.action}")
+
+        return jsonify({
+            "success": True,
+            "data": signal.to_dict()
+        })
+
+    except ValueError as e:
+        logger.error(f"信号提取失败 (LLM): {str(e)}")
+        return jsonify({
+            "success": False,
+            "error": str(e)
+        }), 422
+
+    except Exception as e:
+        logger.error(f"信号提取失败: {str(e)}")
+        return jsonify({
+            "success": False,
+            "error": str(e),
+            "traceback": traceback.format_exc()
+        }), 500
+
+
 # ============== 工具调用接口（供调试使用）==============

@report_bp.route('/tools/search', methods=['POST'])
--- a/backend/app/services/signal_extractor.py
+++ b/backend/app/services/signal_extractor.py
@ -0,0 +1,245 @@
+"""
+Miro Signal Extractor
+Distils a completed simulation report into a canonical machine-readable
+probability signal that external pipelines (e.g. prediction-market bots)
+can consume directly.
+"""
+
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import List, Optional
+
+from ..utils.llm_client import LLMClient
+from ..utils.logger import get_logger
+
+logger = get_logger('mirofish.signal_extractor')
+
+SCHEMA_VERSION = "1.1"
+
+_SYSTEM_PROMPT = """\
+You are a structured-signal extractor. You will be given the full markdown text
+of a social-simulation analysis report and the original simulation requirement
+(the prediction question). Your job is to distil the report into a concise,
+machine-readable probability signal.
+
+Rules:
+- p_yes must be a float strictly between 0.0 and 1.0 (never exactly 0 or 1).
+- confidence must be one of: "high", "medium", "low".
+- action must be one of: "buy_yes", "buy_no", "hold".
+  Use "buy_yes" when p_yes > 0.55, "buy_no" when p_yes < 0.45, else "hold".
+- regime describes the dominant social dynamic observed in the simulation,
+  e.g. "consensus_forming", "contested", "uncertain", "momentum_shift",
+  "echo_chamber", "fragmented".
+- summary is one sentence (≤ 30 words).
+- drivers is a list of 2–4 short strings (key factors supporting the thesis).
+- invalidators is a list of 2–4 short strings (key risks or counter-factors).
+- Do not reproduce large sections of the report. Be concise.
+- Respond ONLY with valid JSON matching the schema below — no prose, no fences.
+
+Required JSON schema:
+{
+  "p_yes": <float 0.0–1.0>,
+  "confidence": "high" | "medium" | "low",
+  "action": "buy_yes" | "buy_no" | "hold",
+  "regime": <string>,
+  "summary": <string>,
+  "drivers": [<string>, ...],
+  "invalidators": [<string>, ...]
+}
+"""
+
+
+@dataclass
+class MiroSignal:
+    """Canonical prediction signal extracted from a simulation report."""
+
+    signal_id: str
+    schema_version: str
+    report_id: str
+    simulation_id: str
+    generated_at: str
+
+    # Core thesis fields
+    p_yes: float
+    confidence: str          # high | medium | low
+    action: str              # buy_yes | buy_no | hold
+    regime: str
+    summary: str
+    drivers: List[str] = field(default_factory=list)
+    invalidators: List[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "signal_id": self.signal_id,
+            "schema_version": self.schema_version,
+            "report_id": self.report_id,
+            "simulation_id": self.simulation_id,
+            "generated_at": self.generated_at,
+            "thesis": {
+                "p_yes": self.p_yes,
+                "confidence": self.confidence,
+                "action": self.action,
+                "regime": self.regime,
+                "summary": self.summary,
+                "drivers": self.drivers,
+                "invalidators": self.invalidators,
+            },
+        }
+
+
+class SignalExtractor:
+    """Extracts a MiroSignal from a completed report's markdown content."""
+
+    _VALID_CONFIDENCE = {"high", "medium", "low"}
+    _VALID_ACTIONS = {"buy_yes", "buy_no", "hold"}
+
+    def __init__(self, llm_client: Optional[LLMClient] = None):
+        self._client = llm_client or LLMClient()
+
+    def extract(
+        self,
+        report_id: str,
+        simulation_id: str,
+        markdown_content: str,
+        simulation_requirement: str,
+    ) -> MiroSignal:
+        """
+        Distil *markdown_content* into a MiroSignal.
+
+        Args:
+            report_id: The report this signal is derived from.
+            simulation_id: Parent simulation ID.
+            markdown_content: Full report text (may be long).
+            simulation_requirement: The original prediction question / goal.
+
+        Returns:
+            MiroSignal with validated fields.
+
+        Raises:
+            ValueError: If the LLM fails to produce a valid signal after retries.
+        """
+        # Trim to avoid token limits while keeping the most analytical content.
+        # Reports can exceed 30 k chars; the last third is usually the conclusion.
+        body = self._trim_report(markdown_content)
+
+        messages = [
+            {"role": "system", "content": _SYSTEM_PROMPT},
+            {
+                "role": "user",
+                "content": (
+                    f"Simulation requirement (prediction question):\n{simulation_requirement}\n\n"
+                    f"Report:\n{body}"
+                ),
+            },
+        ]
+
+        raw = self._client.chat_json(
+            messages=messages,
+            temperature=0.1,
+            max_tokens=512,
+            max_attempts=3,
+            temperature_step=0.05,
+            fallback_parser=self._salvage,
+        )
+
+        return self._build_signal(raw, report_id, simulation_id)
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _trim_report(content: str, max_chars: int = 12_000) -> str:
+        """Keep the tail of the report (conclusions) if it is very long."""
+        if len(content) <= max_chars:
+            return content
+        return "…[report truncated for signal extraction]\n\n" + content[-max_chars:]
+
+    def _build_signal(
+        self, raw: dict, report_id: str, simulation_id: str
+    ) -> MiroSignal:
+        """Validate and normalise the raw LLM dict into a MiroSignal."""
+        # p_yes
+        try:
+            p_yes = float(raw.get("p_yes", 0.5))
+        except (TypeError, ValueError):
+            p_yes = 0.5
+        p_yes = max(0.01, min(0.99, p_yes))
+
+        # confidence
+        confidence = str(raw.get("confidence", "medium")).lower()
+        if confidence not in self._VALID_CONFIDENCE:
+            confidence = "medium"
+
+        # action — recompute from p_yes if missing or invalid
+        action = str(raw.get("action", "")).lower()
+        if action not in self._VALID_ACTIONS:
+            if p_yes > 0.55:
+                action = "buy_yes"
+            elif p_yes < 0.45:
+                action = "buy_no"
+            else:
+                action = "hold"
+
+        # regime
+        regime = str(raw.get("regime", "uncertain")).strip() or "uncertain"
+
+        # summary
+        summary = str(raw.get("summary", "")).strip()
+
+        # list fields
+        drivers = [str(d) for d in raw.get("drivers", []) if d]
+        invalidators = [str(i) for i in raw.get("invalidators", []) if i]
+
+        return MiroSignal(
+            signal_id=str(uuid.uuid4()),
+            schema_version=SCHEMA_VERSION,
+            report_id=report_id,
+            simulation_id=simulation_id,
+            generated_at=datetime.now(timezone.utc).isoformat(),
+            p_yes=p_yes,
+            confidence=confidence,
+            action=action,
+            regime=regime,
+            summary=summary,
+            drivers=drivers,
+            invalidators=invalidators,
+        )
+
+    @staticmethod
+    def _salvage(raw_text: str) -> Optional[dict]:
+        """
+        Last-resort fallback: scan for any float that looks like a probability
+        and a YES/NO sentiment to construct a minimal signal dict.
+        """
+        import re
+
+        prob_match = re.search(r'\b(0\.\d+|1\.0+|0)\b', raw_text)
+        if not prob_match:
+            return None
+
+        try:
+            p = float(prob_match.group())
+        except ValueError:
+            return None
+
+        text_lower = raw_text.lower()
+        if "high" in text_lower:
+            confidence = "high"
+        elif "low" in text_lower:
+            confidence = "low"
+        else:
+            confidence = "medium"
+
+        return {
+            "p_yes": p,
+            "confidence": confidence,
+            "action": "buy_yes" if p > 0.55 else ("buy_no" if p < 0.45 else "hold"),
+            "regime": "uncertain",
+            "summary": "Signal salvaged from partial LLM output.",
+            "drivers": [],
+            "invalidators": [],
+        }
--- a/backend/tests/services/init.py
+++ b/backend/tests/services/init.py
--- a/backend/tests/services/test_signal_extractor.py
+++ b/backend/tests/services/test_signal_extractor.py
@ -0,0 +1,236 @@
+"""
+Tests for SignalExtractor — no real API calls, LLMClient fully mocked.
+"""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from app.services.signal_extractor import SignalExtractor, MiroSignal, SCHEMA_VERSION
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_extractor(chat_json_return=None, chat_json_side_effect=None):
+    """Return a SignalExtractor with a mocked LLMClient."""
+    mock_client = MagicMock()
+    if chat_json_side_effect is not None:
+        mock_client.chat_json.side_effect = chat_json_side_effect
+    else:
+        mock_client.chat_json.return_value = chat_json_return or {}
+    return SignalExtractor(llm_client=mock_client), mock_client
+
+
+_SAMPLE_REPORT = """
+## Executive Summary
+The simulation shows strong consensus forming around a YES outcome.
+Seventy-three percent of agents expressed optimism.
+
+## Key Findings
+- Social momentum is strongly positive.
+- Counter-narratives remain marginal.
+
+## Conclusion
+The dominant dynamic is consensus formation with high confidence.
+"""
+
+_SAMPLE_REQUIREMENT = "Will the proposal pass by end of Q2 2026?"
+
+_GOOD_LLM_RESPONSE = {
+    "p_yes": 0.73,
+    "confidence": "high",
+    "action": "buy_yes",
+    "regime": "consensus_forming",
+    "summary": "Strong agent consensus supports a YES outcome with high confidence.",
+    "drivers": ["70%+ agent agreement", "positive social momentum"],
+    "invalidators": ["marginal counter-narrative", "low information diversity"],
+}
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+
+class TestExtractHappyPath:
+
+    def test_returns_miro_signal(self):
+        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
+        result = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert isinstance(result, MiroSignal)
+
+    def test_fields_match_llm_output(self):
+        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.p_yes == pytest.approx(0.73)
+        assert sig.confidence == "high"
+        assert sig.action == "buy_yes"
+        assert sig.regime == "consensus_forming"
+        assert "YES" in sig.summary or "consensus" in sig.summary.lower()
+        assert len(sig.drivers) == 2
+        assert len(sig.invalidators) == 2
+
+    def test_metadata_fields(self):
+        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
+        sig = extractor.extract("report_abc", "sim_xyz", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.report_id == "report_abc"
+        assert sig.simulation_id == "sim_xyz"
+        assert sig.schema_version == SCHEMA_VERSION
+        assert sig.signal_id  # non-empty UUID
+        assert sig.generated_at  # non-empty ISO timestamp
+
+    def test_to_dict_structure(self):
+        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        d = sig.to_dict()
+        assert "thesis" in d
+        assert set(d["thesis"].keys()) == {
+            "p_yes", "confidence", "action", "regime",
+            "summary", "drivers", "invalidators",
+        }
+
+    def test_llm_called_with_low_temperature(self):
+        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
+        extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        call_kwargs = mock_client.chat_json.call_args.kwargs
+        assert call_kwargs["temperature"] <= 0.2
+
+    def test_llm_called_with_retries(self):
+        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
+        extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        call_kwargs = mock_client.chat_json.call_args.kwargs
+        assert call_kwargs.get("max_attempts", 1) >= 2
+
+    def test_simulation_requirement_in_messages(self):
+        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
+        req = "Will the referendum pass?"
+        extractor.extract("r1", "s1", _SAMPLE_REPORT, req)
+        messages = mock_client.chat_json.call_args.kwargs["messages"]
+        user_content = next(m["content"] for m in messages if m["role"] == "user")
+        assert req in user_content
+
+
+# ---------------------------------------------------------------------------
+# Field validation and normalisation
+# ---------------------------------------------------------------------------
+
+class TestFieldValidation:
+
+    def test_p_yes_clamped_below_zero(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": -0.5})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.p_yes >= 0.01
+
+    def test_p_yes_clamped_above_one(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 1.5})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.p_yes <= 0.99
+
+    def test_invalid_confidence_falls_back_to_medium(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "confidence": "very_sure"})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.confidence == "medium"
+
+    def test_invalid_action_recomputed_from_p_yes_buy_yes(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.8, "action": "INVALID"})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.action == "buy_yes"
+
+    def test_invalid_action_recomputed_from_p_yes_buy_no(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.2, "action": "INVALID"})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.action == "buy_no"
+
+    def test_invalid_action_recomputed_from_p_yes_hold(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.5, "action": "INVALID"})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.action == "hold"
+
+    def test_missing_regime_defaults_to_uncertain(self):
+        resp = {k: v for k, v in _GOOD_LLM_RESPONSE.items() if k != "regime"}
+        extractor, _ = _make_extractor(resp)
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.regime == "uncertain"
+
+    def test_empty_drivers_list_accepted(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": []})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        assert sig.drivers == []
+
+    def test_non_list_drivers_handled(self):
+        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": "some string"})
+        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
+        # Should not crash; string is iterable so each char becomes an item — acceptable
+        assert isinstance(sig.drivers, list)
+
+
+# ---------------------------------------------------------------------------
+# Report trimming
+# ---------------------------------------------------------------------------
+
+class TestReportTrimming:
+
+    def test_short_report_unchanged(self):
+        short = "Short report content."
+        result = SignalExtractor._trim_report(short, max_chars=100)
+        assert result == short
+
+    def test_long_report_trimmed(self):
+        long_report = "x" * 20_000
+        result = SignalExtractor._trim_report(long_report, max_chars=12_000)
+        assert len(result) < 20_000
+        assert "truncated" in result
+
+    def test_trimmed_report_keeps_tail(self):
+        # The tail (conclusion) is most important for signal extraction
+        long_report = "A" * 10_000 + "CONCLUSION"
+        result = SignalExtractor._trim_report(long_report, max_chars=100)
+        assert "CONCLUSION" in result
+
+
+# ---------------------------------------------------------------------------
+# Fallback (_salvage)
+# ---------------------------------------------------------------------------
+
+class TestSalvage:
+
+    def test_salvage_extracts_probability(self):
+        result = SignalExtractor._salvage("The probability is 0.68 for YES outcome.")
+        assert result is not None
+        assert result["p_yes"] == pytest.approx(0.68)
+
+    def test_salvage_returns_none_when_no_probability(self):
+        assert SignalExtractor._salvage("no numbers here at all") is None
+
+    def test_salvage_sets_action_buy_yes(self):
+        result = SignalExtractor._salvage("probability 0.80")
+        assert result["action"] == "buy_yes"
+
+    def test_salvage_sets_action_buy_no(self):
+        result = SignalExtractor._salvage("probability 0.20")
+        assert result["action"] == "buy_no"
+
+    def test_salvage_sets_action_hold(self):
+        result = SignalExtractor._salvage("probability 0.50")
+        assert result["action"] == "hold"
+
+    def test_salvage_detects_high_confidence(self):
+        result = SignalExtractor._salvage("high confidence, p=0.72")
+        assert result["confidence"] == "high"
+
+    def test_salvage_detects_low_confidence(self):
+        result = SignalExtractor._salvage("low certainty, p=0.30")
+        assert result["confidence"] == "low"
+
+
+# ---------------------------------------------------------------------------
+# LLM failure propagates as ValueError
+# ---------------------------------------------------------------------------
+
+class TestLLMFailure:
+
+    def test_raises_value_error_on_llm_failure(self):
+        extractor, mock_client = _make_extractor()
+        mock_client.chat_json.side_effect = ValueError("LLM返回的JSON格式无效: ...")
+        with pytest.raises(ValueError):
+            extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)