feat(report): add POST /api/report/<id>/signal — extract miro_signal from simulation report

Adds a canonical machine-readable probability signal endpoint that distils
a completed simulation report into a structured prediction thesis.

New: backend/app/services/signal_extractor.py
- SignalExtractor.extract() calls chat_json() (3 attempts, temp 0.1, step 0.05)
  against the report markdown and returns a validated MiroSignal dataclass
- Validates and normalises all fields: p_yes clamped to [0.01, 0.99],
  confidence/action enums enforced, action recomputed from p_yes when invalid
- _trim_report() keeps the tail (conclusions) for long reports to stay within
  token limits
- _salvage() fallback_parser recovers a minimal signal from partial LLM output
  using regex probability extraction

New endpoint: POST /api/report/<report_id>/signal
- 404 if report not found
- 400 if report not yet completed or content is empty
- 422 if LLM fails after all retry attempts
- Returns canonical signal with thesis.{p_yes, confidence, action, regime,
  summary, drivers, invalidators} — schema_version 1.1

New: backend/tests/services/test_signal_extractor.py
- 27 tests covering happy path, field validation/normalisation, report
  trimming, _salvage fallback, and LLM failure propagation
- No real API calls — LLMClient fully mocked

Closes #277

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
LoryGlory 2026-03-23 16:18:50 +01:00
parent 8fe735c1c9
commit 00a2150365
4 changed files with 565 additions and 0 deletions

View File

@ -11,6 +11,7 @@ from flask import request, jsonify, send_file
from . import report_bp
from ..config import Config
from ..services.report_agent import ReportAgent, ReportManager, ReportStatus
from ..services.signal_extractor import SignalExtractor
from ..services.simulation_manager import SimulationManager
from ..models.project import ProjectManager
from ..models.task import TaskManager, TaskStatus
@ -925,6 +926,89 @@ def stream_console_log(report_id: str):
}), 500
# ============== 预测信号接口 ==============
@report_bp.route('/<report_id>/signal', methods=['POST'])
def extract_signal(report_id: str):
"""
从已完成的报告中提取结构化预测信号miro_signal
对报告的 markdown 内容执行一次 LLM 提取返回可供
外部预测市场管道直接消费的规范化概率信号
返回
{
"success": true,
"data": {
"signal_id": "uuid",
"schema_version": "1.1",
"report_id": "report_xxxx",
"simulation_id": "sim_xxxx",
"generated_at": "2026-...",
"thesis": {
"p_yes": 0.73,
"confidence": "high",
"action": "buy_yes",
"regime": "consensus_forming",
"summary": "...",
"drivers": ["...", "..."],
"invalidators": ["...", "..."]
}
}
}
"""
try:
report = ReportManager.get_report(report_id)
if not report:
return jsonify({
"success": False,
"error": f"报告不存在: {report_id}"
}), 404
if report.status != ReportStatus.COMPLETED:
return jsonify({
"success": False,
"error": f"报告尚未完成 (status={report.status.value}),无法提取信号"
}), 400
if not report.markdown_content:
return jsonify({
"success": False,
"error": "报告内容为空,无法提取信号"
}), 400
extractor = SignalExtractor()
signal = extractor.extract(
report_id=report_id,
simulation_id=report.simulation_id,
markdown_content=report.markdown_content,
simulation_requirement=report.simulation_requirement,
)
logger.info(f"信号提取完成: report={report_id} p_yes={signal.p_yes} action={signal.action}")
return jsonify({
"success": True,
"data": signal.to_dict()
})
except ValueError as e:
logger.error(f"信号提取失败 (LLM): {str(e)}")
return jsonify({
"success": False,
"error": str(e)
}), 422
except Exception as e:
logger.error(f"信号提取失败: {str(e)}")
return jsonify({
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}), 500
# ============== 工具调用接口(供调试使用)==============
@report_bp.route('/tools/search', methods=['POST'])

View File

@ -0,0 +1,245 @@
"""
Miro Signal Extractor
Distils a completed simulation report into a canonical machine-readable
probability signal that external pipelines (e.g. prediction-market bots)
can consume directly.
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import List, Optional
from ..utils.llm_client import LLMClient
from ..utils.logger import get_logger
logger = get_logger('mirofish.signal_extractor')
SCHEMA_VERSION = "1.1"
_SYSTEM_PROMPT = """\
You are a structured-signal extractor. You will be given the full markdown text
of a social-simulation analysis report and the original simulation requirement
(the prediction question). Your job is to distil the report into a concise,
machine-readable probability signal.
Rules:
- p_yes must be a float strictly between 0.0 and 1.0 (never exactly 0 or 1).
- confidence must be one of: "high", "medium", "low".
- action must be one of: "buy_yes", "buy_no", "hold".
Use "buy_yes" when p_yes > 0.55, "buy_no" when p_yes < 0.45, else "hold".
- regime describes the dominant social dynamic observed in the simulation,
e.g. "consensus_forming", "contested", "uncertain", "momentum_shift",
"echo_chamber", "fragmented".
- summary is one sentence ( 30 words).
- drivers is a list of 24 short strings (key factors supporting the thesis).
- invalidators is a list of 24 short strings (key risks or counter-factors).
- Do not reproduce large sections of the report. Be concise.
- Respond ONLY with valid JSON matching the schema below no prose, no fences.
Required JSON schema:
{
"p_yes": <float 0.01.0>,
"confidence": "high" | "medium" | "low",
"action": "buy_yes" | "buy_no" | "hold",
"regime": <string>,
"summary": <string>,
"drivers": [<string>, ...],
"invalidators": [<string>, ...]
}
"""
@dataclass
class MiroSignal:
"""Canonical prediction signal extracted from a simulation report."""
signal_id: str
schema_version: str
report_id: str
simulation_id: str
generated_at: str
# Core thesis fields
p_yes: float
confidence: str # high | medium | low
action: str # buy_yes | buy_no | hold
regime: str
summary: str
drivers: List[str] = field(default_factory=list)
invalidators: List[str] = field(default_factory=list)
def to_dict(self) -> dict:
return {
"signal_id": self.signal_id,
"schema_version": self.schema_version,
"report_id": self.report_id,
"simulation_id": self.simulation_id,
"generated_at": self.generated_at,
"thesis": {
"p_yes": self.p_yes,
"confidence": self.confidence,
"action": self.action,
"regime": self.regime,
"summary": self.summary,
"drivers": self.drivers,
"invalidators": self.invalidators,
},
}
class SignalExtractor:
"""Extracts a MiroSignal from a completed report's markdown content."""
_VALID_CONFIDENCE = {"high", "medium", "low"}
_VALID_ACTIONS = {"buy_yes", "buy_no", "hold"}
def __init__(self, llm_client: Optional[LLMClient] = None):
self._client = llm_client or LLMClient()
def extract(
self,
report_id: str,
simulation_id: str,
markdown_content: str,
simulation_requirement: str,
) -> MiroSignal:
"""
Distil *markdown_content* into a MiroSignal.
Args:
report_id: The report this signal is derived from.
simulation_id: Parent simulation ID.
markdown_content: Full report text (may be long).
simulation_requirement: The original prediction question / goal.
Returns:
MiroSignal with validated fields.
Raises:
ValueError: If the LLM fails to produce a valid signal after retries.
"""
# Trim to avoid token limits while keeping the most analytical content.
# Reports can exceed 30 k chars; the last third is usually the conclusion.
body = self._trim_report(markdown_content)
messages = [
{"role": "system", "content": _SYSTEM_PROMPT},
{
"role": "user",
"content": (
f"Simulation requirement (prediction question):\n{simulation_requirement}\n\n"
f"Report:\n{body}"
),
},
]
raw = self._client.chat_json(
messages=messages,
temperature=0.1,
max_tokens=512,
max_attempts=3,
temperature_step=0.05,
fallback_parser=self._salvage,
)
return self._build_signal(raw, report_id, simulation_id)
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
@staticmethod
def _trim_report(content: str, max_chars: int = 12_000) -> str:
"""Keep the tail of the report (conclusions) if it is very long."""
if len(content) <= max_chars:
return content
return "…[report truncated for signal extraction]\n\n" + content[-max_chars:]
def _build_signal(
self, raw: dict, report_id: str, simulation_id: str
) -> MiroSignal:
"""Validate and normalise the raw LLM dict into a MiroSignal."""
# p_yes
try:
p_yes = float(raw.get("p_yes", 0.5))
except (TypeError, ValueError):
p_yes = 0.5
p_yes = max(0.01, min(0.99, p_yes))
# confidence
confidence = str(raw.get("confidence", "medium")).lower()
if confidence not in self._VALID_CONFIDENCE:
confidence = "medium"
# action — recompute from p_yes if missing or invalid
action = str(raw.get("action", "")).lower()
if action not in self._VALID_ACTIONS:
if p_yes > 0.55:
action = "buy_yes"
elif p_yes < 0.45:
action = "buy_no"
else:
action = "hold"
# regime
regime = str(raw.get("regime", "uncertain")).strip() or "uncertain"
# summary
summary = str(raw.get("summary", "")).strip()
# list fields
drivers = [str(d) for d in raw.get("drivers", []) if d]
invalidators = [str(i) for i in raw.get("invalidators", []) if i]
return MiroSignal(
signal_id=str(uuid.uuid4()),
schema_version=SCHEMA_VERSION,
report_id=report_id,
simulation_id=simulation_id,
generated_at=datetime.now(timezone.utc).isoformat(),
p_yes=p_yes,
confidence=confidence,
action=action,
regime=regime,
summary=summary,
drivers=drivers,
invalidators=invalidators,
)
@staticmethod
def _salvage(raw_text: str) -> Optional[dict]:
"""
Last-resort fallback: scan for any float that looks like a probability
and a YES/NO sentiment to construct a minimal signal dict.
"""
import re
prob_match = re.search(r'\b(0\.\d+|1\.0+|0)\b', raw_text)
if not prob_match:
return None
try:
p = float(prob_match.group())
except ValueError:
return None
text_lower = raw_text.lower()
if "high" in text_lower:
confidence = "high"
elif "low" in text_lower:
confidence = "low"
else:
confidence = "medium"
return {
"p_yes": p,
"confidence": confidence,
"action": "buy_yes" if p > 0.55 else ("buy_no" if p < 0.45 else "hold"),
"regime": "uncertain",
"summary": "Signal salvaged from partial LLM output.",
"drivers": [],
"invalidators": [],
}

View File

View File

@ -0,0 +1,236 @@
"""
Tests for SignalExtractor no real API calls, LLMClient fully mocked.
"""
import pytest
from unittest.mock import MagicMock, patch
from app.services.signal_extractor import SignalExtractor, MiroSignal, SCHEMA_VERSION
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_extractor(chat_json_return=None, chat_json_side_effect=None):
"""Return a SignalExtractor with a mocked LLMClient."""
mock_client = MagicMock()
if chat_json_side_effect is not None:
mock_client.chat_json.side_effect = chat_json_side_effect
else:
mock_client.chat_json.return_value = chat_json_return or {}
return SignalExtractor(llm_client=mock_client), mock_client
_SAMPLE_REPORT = """
## Executive Summary
The simulation shows strong consensus forming around a YES outcome.
Seventy-three percent of agents expressed optimism.
## Key Findings
- Social momentum is strongly positive.
- Counter-narratives remain marginal.
## Conclusion
The dominant dynamic is consensus formation with high confidence.
"""
_SAMPLE_REQUIREMENT = "Will the proposal pass by end of Q2 2026?"
_GOOD_LLM_RESPONSE = {
"p_yes": 0.73,
"confidence": "high",
"action": "buy_yes",
"regime": "consensus_forming",
"summary": "Strong agent consensus supports a YES outcome with high confidence.",
"drivers": ["70%+ agent agreement", "positive social momentum"],
"invalidators": ["marginal counter-narrative", "low information diversity"],
}
# ---------------------------------------------------------------------------
# Happy path
# ---------------------------------------------------------------------------
class TestExtractHappyPath:
def test_returns_miro_signal(self):
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
result = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert isinstance(result, MiroSignal)
def test_fields_match_llm_output(self):
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.p_yes == pytest.approx(0.73)
assert sig.confidence == "high"
assert sig.action == "buy_yes"
assert sig.regime == "consensus_forming"
assert "YES" in sig.summary or "consensus" in sig.summary.lower()
assert len(sig.drivers) == 2
assert len(sig.invalidators) == 2
def test_metadata_fields(self):
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
sig = extractor.extract("report_abc", "sim_xyz", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.report_id == "report_abc"
assert sig.simulation_id == "sim_xyz"
assert sig.schema_version == SCHEMA_VERSION
assert sig.signal_id # non-empty UUID
assert sig.generated_at # non-empty ISO timestamp
def test_to_dict_structure(self):
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
d = sig.to_dict()
assert "thesis" in d
assert set(d["thesis"].keys()) == {
"p_yes", "confidence", "action", "regime",
"summary", "drivers", "invalidators",
}
def test_llm_called_with_low_temperature(self):
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
call_kwargs = mock_client.chat_json.call_args.kwargs
assert call_kwargs["temperature"] <= 0.2
def test_llm_called_with_retries(self):
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
call_kwargs = mock_client.chat_json.call_args.kwargs
assert call_kwargs.get("max_attempts", 1) >= 2
def test_simulation_requirement_in_messages(self):
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
req = "Will the referendum pass?"
extractor.extract("r1", "s1", _SAMPLE_REPORT, req)
messages = mock_client.chat_json.call_args.kwargs["messages"]
user_content = next(m["content"] for m in messages if m["role"] == "user")
assert req in user_content
# ---------------------------------------------------------------------------
# Field validation and normalisation
# ---------------------------------------------------------------------------
class TestFieldValidation:
def test_p_yes_clamped_below_zero(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": -0.5})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.p_yes >= 0.01
def test_p_yes_clamped_above_one(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 1.5})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.p_yes <= 0.99
def test_invalid_confidence_falls_back_to_medium(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "confidence": "very_sure"})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.confidence == "medium"
def test_invalid_action_recomputed_from_p_yes_buy_yes(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.8, "action": "INVALID"})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.action == "buy_yes"
def test_invalid_action_recomputed_from_p_yes_buy_no(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.2, "action": "INVALID"})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.action == "buy_no"
def test_invalid_action_recomputed_from_p_yes_hold(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.5, "action": "INVALID"})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.action == "hold"
def test_missing_regime_defaults_to_uncertain(self):
resp = {k: v for k, v in _GOOD_LLM_RESPONSE.items() if k != "regime"}
extractor, _ = _make_extractor(resp)
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.regime == "uncertain"
def test_empty_drivers_list_accepted(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": []})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
assert sig.drivers == []
def test_non_list_drivers_handled(self):
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": "some string"})
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
# Should not crash; string is iterable so each char becomes an item — acceptable
assert isinstance(sig.drivers, list)
# ---------------------------------------------------------------------------
# Report trimming
# ---------------------------------------------------------------------------
class TestReportTrimming:
def test_short_report_unchanged(self):
short = "Short report content."
result = SignalExtractor._trim_report(short, max_chars=100)
assert result == short
def test_long_report_trimmed(self):
long_report = "x" * 20_000
result = SignalExtractor._trim_report(long_report, max_chars=12_000)
assert len(result) < 20_000
assert "truncated" in result
def test_trimmed_report_keeps_tail(self):
# The tail (conclusion) is most important for signal extraction
long_report = "A" * 10_000 + "CONCLUSION"
result = SignalExtractor._trim_report(long_report, max_chars=100)
assert "CONCLUSION" in result
# ---------------------------------------------------------------------------
# Fallback (_salvage)
# ---------------------------------------------------------------------------
class TestSalvage:
def test_salvage_extracts_probability(self):
result = SignalExtractor._salvage("The probability is 0.68 for YES outcome.")
assert result is not None
assert result["p_yes"] == pytest.approx(0.68)
def test_salvage_returns_none_when_no_probability(self):
assert SignalExtractor._salvage("no numbers here at all") is None
def test_salvage_sets_action_buy_yes(self):
result = SignalExtractor._salvage("probability 0.80")
assert result["action"] == "buy_yes"
def test_salvage_sets_action_buy_no(self):
result = SignalExtractor._salvage("probability 0.20")
assert result["action"] == "buy_no"
def test_salvage_sets_action_hold(self):
result = SignalExtractor._salvage("probability 0.50")
assert result["action"] == "hold"
def test_salvage_detects_high_confidence(self):
result = SignalExtractor._salvage("high confidence, p=0.72")
assert result["confidence"] == "high"
def test_salvage_detects_low_confidence(self):
result = SignalExtractor._salvage("low certainty, p=0.30")
assert result["confidence"] == "low"
# ---------------------------------------------------------------------------
# LLM failure propagates as ValueError
# ---------------------------------------------------------------------------
class TestLLMFailure:
def test_raises_value_error_on_llm_failure(self):
extractor, mock_client = _make_extractor()
mock_client.chat_json.side_effect = ValueError("LLM返回的JSON格式无效: ...")
with pytest.raises(ValueError):
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)