feat(report): add POST /api/report/<id>/signal — extract miro_signal from simulation report

Adds a canonical machine-readable probability signal endpoint that distils a completed simulation report into a structured prediction thesis. New: backend/app/services/signal_extractor.py - SignalExtractor.extract() calls chat_json() (3 attempts, temp 0.1, step 0.05) against the report markdown and returns a validated MiroSignal dataclass - Validates and normalises all fields: p_yes clamped to [0.01, 0.99], confidence/action enums enforced, action recomputed from p_yes when invalid - _trim_report() keeps the tail (conclusions) for long reports to stay within token limits - _salvage() fallback_parser recovers a minimal signal from partial LLM output using regex probability extraction New endpoint: POST /api/report/<report_id>/signal - 404 if report not found - 400 if report not yet completed or content is empty - 422 if LLM fails after all retry attempts - Returns canonical signal with thesis.{p_yes, confidence, action, regime, summary, drivers, invalidators} — schema_version 1.1 New: backend/tests/services/test_signal_extractor.py - 27 tests covering happy path, field validation/normalisation, report trimming, _salvage fallback, and LLM failure propagation - No real API calls — LLMClient fully mocked Closes #277 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 16:18:50 +01:00 · 2026-03-23 16:18:50 +01:00 · 00a2150365
parent 8fe735c1c9
commit 00a2150365
4 changed files with 565 additions and 0 deletions
--- a/backend/app/api/report.py
+++ b/backend/app/api/report.py
@ -11,6 +11,7 @@ from flask import request, jsonify, send_file
 from . import report_bp
 from ..config import Config
 from ..services.report_agent import ReportAgent, ReportManager, ReportStatus
 from ..services.signal_extractor import SignalExtractor
 from ..services.simulation_manager import SimulationManager
 from ..models.project import ProjectManager
 from ..models.task import TaskManager, TaskStatus
@ -925,6 +926,89 @@ def stream_console_log(report_id: str):
        }), 500
 # ============== 预测信号接口 ==============
@report_bp.route('/<report_id>/signal', methods=['POST'])
 def extract_signal(report_id: str):
    """
    从已完成的报告中提取结构化预测信号（miro_signal）
    对报告的 markdown 内容执行一次 LLM 提取，返回可供
    外部预测市场管道直接消费的规范化概率信号。
    返回：
        {
            "success": true,
            "data": {
                "signal_id": "uuid",
                "schema_version": "1.1",
                "report_id": "report_xxxx",
                "simulation_id": "sim_xxxx",
                "generated_at": "2026-...",
                "thesis": {
                    "p_yes": 0.73,
                    "confidence": "high",
                    "action": "buy_yes",
                    "regime": "consensus_forming",
                    "summary": "...",
                    "drivers": ["...", "..."],
                    "invalidators": ["...", "..."]
                }
            }
        }
    """
    try:
        report = ReportManager.get_report(report_id)
        if not report:
            return jsonify({
                "success": False,
                "error": f"报告不存在: {report_id}"
            }), 404
        if report.status != ReportStatus.COMPLETED:
            return jsonify({
                "success": False,
                "error": f"报告尚未完成 (status={report.status.value})，无法提取信号"
            }), 400
        if not report.markdown_content:
            return jsonify({
                "success": False,
                "error": "报告内容为空，无法提取信号"
            }), 400
        extractor = SignalExtractor()
        signal = extractor.extract(
            report_id=report_id,
            simulation_id=report.simulation_id,
            markdown_content=report.markdown_content,
            simulation_requirement=report.simulation_requirement,
        )
        logger.info(f"信号提取完成: report={report_id} p_yes={signal.p_yes} action={signal.action}")
        return jsonify({
            "success": True,
            "data": signal.to_dict()
        })
    except ValueError as e:
        logger.error(f"信号提取失败 (LLM): {str(e)}")
        return jsonify({
            "success": False,
            "error": str(e)
        }), 422
    except Exception as e:
        logger.error(f"信号提取失败: {str(e)}")
        return jsonify({
            "success": False,
            "error": str(e),
            "traceback": traceback.format_exc()
        }), 500
 # ============== 工具调用接口（供调试使用）==============
@report_bp.route('/tools/search', methods=['POST'])
--- a/backend/app/services/signal_extractor.py
+++ b/backend/app/services/signal_extractor.py
@ -0,0 +1,245 @@
 """
 Miro Signal Extractor
 Distils a completed simulation report into a canonical machine-readable
 probability signal that external pipelines (e.g. prediction-market bots)
 can consume directly.
 """
 from __future__ import annotations
 import uuid
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import List, Optional
 from ..utils.llm_client import LLMClient
 from ..utils.logger import get_logger
 logger = get_logger('mirofish.signal_extractor')
 SCHEMA_VERSION = "1.1"
 _SYSTEM_PROMPT = """\
 You are a structured-signal extractor. You will be given the full markdown text
 of a social-simulation analysis report and the original simulation requirement
 (the prediction question). Your job is to distil the report into a concise,
 machine-readable probability signal.
 Rules:
 - p_yes must be a float strictly between 0.0 and 1.0 (never exactly 0 or 1).
 - confidence must be one of: "high", "medium", "low".
 - action must be one of: "buy_yes", "buy_no", "hold".
  Use "buy_yes" when p_yes > 0.55, "buy_no" when p_yes < 0.45, else "hold".
 - regime describes the dominant social dynamic observed in the simulation,
  e.g. "consensus_forming", "contested", "uncertain", "momentum_shift",
  "echo_chamber", "fragmented".
 - summary is one sentence (≤ 30 words).
 - drivers is a list of 2–4 short strings (key factors supporting the thesis).
 - invalidators is a list of 2–4 short strings (key risks or counter-factors).
 - Do not reproduce large sections of the report. Be concise.
 - Respond ONLY with valid JSON matching the schema below — no prose, no fences.
 Required JSON schema:
 {
  "p_yes": <float 0.0–1.0>,
  "confidence": "high" | "medium" | "low",
  "action": "buy_yes" | "buy_no" | "hold",
  "regime": <string>,
  "summary": <string>,
  "drivers": [<string>, ...],
  "invalidators": [<string>, ...]
 }
 """
@dataclass
 class MiroSignal:
    """Canonical prediction signal extracted from a simulation report."""
    signal_id: str
    schema_version: str
    report_id: str
    simulation_id: str
    generated_at: str
    # Core thesis fields
    p_yes: float
    confidence: str          # high | medium | low
    action: str              # buy_yes | buy_no | hold
    regime: str
    summary: str
    drivers: List[str] = field(default_factory=list)
    invalidators: List[str] = field(default_factory=list)
    def to_dict(self) -> dict:
        return {
            "signal_id": self.signal_id,
            "schema_version": self.schema_version,
            "report_id": self.report_id,
            "simulation_id": self.simulation_id,
            "generated_at": self.generated_at,
            "thesis": {
                "p_yes": self.p_yes,
                "confidence": self.confidence,
                "action": self.action,
                "regime": self.regime,
                "summary": self.summary,
                "drivers": self.drivers,
                "invalidators": self.invalidators,
            },
        }
 class SignalExtractor:
    """Extracts a MiroSignal from a completed report's markdown content."""
    _VALID_CONFIDENCE = {"high", "medium", "low"}
    _VALID_ACTIONS = {"buy_yes", "buy_no", "hold"}
    def __init__(self, llm_client: Optional[LLMClient] = None):
        self._client = llm_client or LLMClient()
    def extract(
        self,
        report_id: str,
        simulation_id: str,
        markdown_content: str,
        simulation_requirement: str,
    ) -> MiroSignal:
        """
        Distil *markdown_content* into a MiroSignal.
        Args:
            report_id: The report this signal is derived from.
            simulation_id: Parent simulation ID.
            markdown_content: Full report text (may be long).
            simulation_requirement: The original prediction question / goal.
        Returns:
            MiroSignal with validated fields.
        Raises:
            ValueError: If the LLM fails to produce a valid signal after retries.
        """
        # Trim to avoid token limits while keeping the most analytical content.
        # Reports can exceed 30 k chars; the last third is usually the conclusion.
        body = self._trim_report(markdown_content)
        messages = [
            {"role": "system", "content": _SYSTEM_PROMPT},
            {
                "role": "user",
                "content": (
                    f"Simulation requirement (prediction question):\n{simulation_requirement}\n\n"
                    f"Report:\n{body}"
                ),
            },
        ]
        raw = self._client.chat_json(
            messages=messages,
            temperature=0.1,
            max_tokens=512,
            max_attempts=3,
            temperature_step=0.05,
            fallback_parser=self._salvage,
        )
        return self._build_signal(raw, report_id, simulation_id)
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _trim_report(content: str, max_chars: int = 12_000) -> str:
        """Keep the tail of the report (conclusions) if it is very long."""
        if len(content) <= max_chars:
            return content
        return "…[report truncated for signal extraction]\n\n" + content[-max_chars:]
    def _build_signal(
        self, raw: dict, report_id: str, simulation_id: str
    ) -> MiroSignal:
        """Validate and normalise the raw LLM dict into a MiroSignal."""
        # p_yes
        try:
            p_yes = float(raw.get("p_yes", 0.5))
        except (TypeError, ValueError):
            p_yes = 0.5
        p_yes = max(0.01, min(0.99, p_yes))
        # confidence
        confidence = str(raw.get("confidence", "medium")).lower()
        if confidence not in self._VALID_CONFIDENCE:
            confidence = "medium"
        # action — recompute from p_yes if missing or invalid
        action = str(raw.get("action", "")).lower()
        if action not in self._VALID_ACTIONS:
            if p_yes > 0.55:
                action = "buy_yes"
            elif p_yes < 0.45:
                action = "buy_no"
            else:
                action = "hold"
        # regime
        regime = str(raw.get("regime", "uncertain")).strip() or "uncertain"
        # summary
        summary = str(raw.get("summary", "")).strip()
        # list fields
        drivers = [str(d) for d in raw.get("drivers", []) if d]
        invalidators = [str(i) for i in raw.get("invalidators", []) if i]
        return MiroSignal(
            signal_id=str(uuid.uuid4()),
            schema_version=SCHEMA_VERSION,
            report_id=report_id,
            simulation_id=simulation_id,
            generated_at=datetime.now(timezone.utc).isoformat(),
            p_yes=p_yes,
            confidence=confidence,
            action=action,
            regime=regime,
            summary=summary,
            drivers=drivers,
            invalidators=invalidators,
        )
    @staticmethod
    def _salvage(raw_text: str) -> Optional[dict]:
        """
        Last-resort fallback: scan for any float that looks like a probability
        and a YES/NO sentiment to construct a minimal signal dict.
        """
        import re
        prob_match = re.search(r'\b(0\.\d+|1\.0+|0)\b', raw_text)
        if not prob_match:
            return None
        try:
            p = float(prob_match.group())
        except ValueError:
            return None
        text_lower = raw_text.lower()
        if "high" in text_lower:
            confidence = "high"
        elif "low" in text_lower:
            confidence = "low"
        else:
            confidence = "medium"
        return {
            "p_yes": p,
            "confidence": confidence,
            "action": "buy_yes" if p > 0.55 else ("buy_no" if p < 0.45 else "hold"),
            "regime": "uncertain",
            "summary": "Signal salvaged from partial LLM output.",
            "drivers": [],
            "invalidators": [],
        }
--- a/backend/tests/services/init.py
+++ b/backend/tests/services/init.py
--- a/backend/tests/services/test_signal_extractor.py
+++ b/backend/tests/services/test_signal_extractor.py
@ -0,0 +1,236 @@
 """
 Tests for SignalExtractor — no real API calls, LLMClient fully mocked.
 """
 import pytest
 from unittest.mock import MagicMock, patch
 from app.services.signal_extractor import SignalExtractor, MiroSignal, SCHEMA_VERSION
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _make_extractor(chat_json_return=None, chat_json_side_effect=None):
    """Return a SignalExtractor with a mocked LLMClient."""
    mock_client = MagicMock()
    if chat_json_side_effect is not None:
        mock_client.chat_json.side_effect = chat_json_side_effect
    else:
        mock_client.chat_json.return_value = chat_json_return or {}
    return SignalExtractor(llm_client=mock_client), mock_client
 _SAMPLE_REPORT = """
 ## Executive Summary
 The simulation shows strong consensus forming around a YES outcome.
 Seventy-three percent of agents expressed optimism.
 ## Key Findings
 - Social momentum is strongly positive.
 - Counter-narratives remain marginal.
 ## Conclusion
 The dominant dynamic is consensus formation with high confidence.
 """
 _SAMPLE_REQUIREMENT = "Will the proposal pass by end of Q2 2026?"
 _GOOD_LLM_RESPONSE = {
    "p_yes": 0.73,
    "confidence": "high",
    "action": "buy_yes",
    "regime": "consensus_forming",
    "summary": "Strong agent consensus supports a YES outcome with high confidence.",
    "drivers": ["70%+ agent agreement", "positive social momentum"],
    "invalidators": ["marginal counter-narrative", "low information diversity"],
 }
 # ---------------------------------------------------------------------------
 # Happy path
 # ---------------------------------------------------------------------------
 class TestExtractHappyPath:
    def test_returns_miro_signal(self):
        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
        result = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert isinstance(result, MiroSignal)
    def test_fields_match_llm_output(self):
        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.p_yes == pytest.approx(0.73)
        assert sig.confidence == "high"
        assert sig.action == "buy_yes"
        assert sig.regime == "consensus_forming"
        assert "YES" in sig.summary or "consensus" in sig.summary.lower()
        assert len(sig.drivers) == 2
        assert len(sig.invalidators) == 2
    def test_metadata_fields(self):
        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
        sig = extractor.extract("report_abc", "sim_xyz", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.report_id == "report_abc"
        assert sig.simulation_id == "sim_xyz"
        assert sig.schema_version == SCHEMA_VERSION
        assert sig.signal_id  # non-empty UUID
        assert sig.generated_at  # non-empty ISO timestamp
    def test_to_dict_structure(self):
        extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        d = sig.to_dict()
        assert "thesis" in d
        assert set(d["thesis"].keys()) == {
            "p_yes", "confidence", "action", "regime",
            "summary", "drivers", "invalidators",
        }
    def test_llm_called_with_low_temperature(self):
        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
        extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        call_kwargs = mock_client.chat_json.call_args.kwargs
        assert call_kwargs["temperature"] <= 0.2
    def test_llm_called_with_retries(self):
        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
        extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        call_kwargs = mock_client.chat_json.call_args.kwargs
        assert call_kwargs.get("max_attempts", 1) >= 2
    def test_simulation_requirement_in_messages(self):
        extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
        req = "Will the referendum pass?"
        extractor.extract("r1", "s1", _SAMPLE_REPORT, req)
        messages = mock_client.chat_json.call_args.kwargs["messages"]
        user_content = next(m["content"] for m in messages if m["role"] == "user")
        assert req in user_content
 # ---------------------------------------------------------------------------
 # Field validation and normalisation
 # ---------------------------------------------------------------------------
 class TestFieldValidation:
    def test_p_yes_clamped_below_zero(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": -0.5})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.p_yes >= 0.01
    def test_p_yes_clamped_above_one(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 1.5})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.p_yes <= 0.99
    def test_invalid_confidence_falls_back_to_medium(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "confidence": "very_sure"})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.confidence == "medium"
    def test_invalid_action_recomputed_from_p_yes_buy_yes(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.8, "action": "INVALID"})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.action == "buy_yes"
    def test_invalid_action_recomputed_from_p_yes_buy_no(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.2, "action": "INVALID"})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.action == "buy_no"
    def test_invalid_action_recomputed_from_p_yes_hold(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.5, "action": "INVALID"})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.action == "hold"
    def test_missing_regime_defaults_to_uncertain(self):
        resp = {k: v for k, v in _GOOD_LLM_RESPONSE.items() if k != "regime"}
        extractor, _ = _make_extractor(resp)
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.regime == "uncertain"
    def test_empty_drivers_list_accepted(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": []})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        assert sig.drivers == []
    def test_non_list_drivers_handled(self):
        extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": "some string"})
        sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
        # Should not crash; string is iterable so each char becomes an item — acceptable
        assert isinstance(sig.drivers, list)
 # ---------------------------------------------------------------------------
 # Report trimming
 # ---------------------------------------------------------------------------
 class TestReportTrimming:
    def test_short_report_unchanged(self):
        short = "Short report content."
        result = SignalExtractor._trim_report(short, max_chars=100)
        assert result == short
    def test_long_report_trimmed(self):
        long_report = "x" * 20_000
        result = SignalExtractor._trim_report(long_report, max_chars=12_000)
        assert len(result) < 20_000
        assert "truncated" in result
    def test_trimmed_report_keeps_tail(self):
        # The tail (conclusion) is most important for signal extraction
        long_report = "A" * 10_000 + "CONCLUSION"
        result = SignalExtractor._trim_report(long_report, max_chars=100)
        assert "CONCLUSION" in result
 # ---------------------------------------------------------------------------
 # Fallback (_salvage)
 # ---------------------------------------------------------------------------
 class TestSalvage:
    def test_salvage_extracts_probability(self):
        result = SignalExtractor._salvage("The probability is 0.68 for YES outcome.")
        assert result is not None
        assert result["p_yes"] == pytest.approx(0.68)
    def test_salvage_returns_none_when_no_probability(self):
        assert SignalExtractor._salvage("no numbers here at all") is None
    def test_salvage_sets_action_buy_yes(self):
        result = SignalExtractor._salvage("probability 0.80")
        assert result["action"] == "buy_yes"
    def test_salvage_sets_action_buy_no(self):
        result = SignalExtractor._salvage("probability 0.20")
        assert result["action"] == "buy_no"
    def test_salvage_sets_action_hold(self):
        result = SignalExtractor._salvage("probability 0.50")
        assert result["action"] == "hold"
    def test_salvage_detects_high_confidence(self):
        result = SignalExtractor._salvage("high confidence, p=0.72")
        assert result["confidence"] == "high"
    def test_salvage_detects_low_confidence(self):
        result = SignalExtractor._salvage("low certainty, p=0.30")
        assert result["confidence"] == "low"
 # ---------------------------------------------------------------------------
 # LLM failure propagates as ValueError
 # ---------------------------------------------------------------------------
 class TestLLMFailure:
    def test_raises_value_error_on_llm_failure(self):
        extractor, mock_client = _make_extractor()
        mock_client.chat_json.side_effect = ValueError("LLM返回的JSON格式无效: ...")
        with pytest.raises(ValueError):
            extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)