feat(report): add POST /api/report/<id>/signal — extract miro_signal from simulation report
Adds a canonical machine-readable probability signal endpoint that distils
a completed simulation report into a structured prediction thesis.
New: backend/app/services/signal_extractor.py
- SignalExtractor.extract() calls chat_json() (3 attempts, temp 0.1, step 0.05)
against the report markdown and returns a validated MiroSignal dataclass
- Validates and normalises all fields: p_yes clamped to [0.01, 0.99],
confidence/action enums enforced, action recomputed from p_yes when invalid
- _trim_report() keeps the tail (conclusions) for long reports to stay within
token limits
- _salvage() fallback_parser recovers a minimal signal from partial LLM output
using regex probability extraction
New endpoint: POST /api/report/<report_id>/signal
- 404 if report not found
- 400 if report not yet completed or content is empty
- 422 if LLM fails after all retry attempts
- Returns canonical signal with thesis.{p_yes, confidence, action, regime,
summary, drivers, invalidators} — schema_version 1.1
New: backend/tests/services/test_signal_extractor.py
- 27 tests covering happy path, field validation/normalisation, report
trimming, _salvage fallback, and LLM failure propagation
- No real API calls — LLMClient fully mocked
Closes #277
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8fe735c1c9
commit
00a2150365
|
|
@ -11,6 +11,7 @@ from flask import request, jsonify, send_file
|
||||||
from . import report_bp
|
from . import report_bp
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
from ..services.report_agent import ReportAgent, ReportManager, ReportStatus
|
from ..services.report_agent import ReportAgent, ReportManager, ReportStatus
|
||||||
|
from ..services.signal_extractor import SignalExtractor
|
||||||
from ..services.simulation_manager import SimulationManager
|
from ..services.simulation_manager import SimulationManager
|
||||||
from ..models.project import ProjectManager
|
from ..models.project import ProjectManager
|
||||||
from ..models.task import TaskManager, TaskStatus
|
from ..models.task import TaskManager, TaskStatus
|
||||||
|
|
@ -925,6 +926,89 @@ def stream_console_log(report_id: str):
|
||||||
}), 500
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
|
# ============== 预测信号接口 ==============
|
||||||
|
|
||||||
|
@report_bp.route('/<report_id>/signal', methods=['POST'])
|
||||||
|
def extract_signal(report_id: str):
|
||||||
|
"""
|
||||||
|
从已完成的报告中提取结构化预测信号(miro_signal)
|
||||||
|
|
||||||
|
对报告的 markdown 内容执行一次 LLM 提取,返回可供
|
||||||
|
外部预测市场管道直接消费的规范化概率信号。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"data": {
|
||||||
|
"signal_id": "uuid",
|
||||||
|
"schema_version": "1.1",
|
||||||
|
"report_id": "report_xxxx",
|
||||||
|
"simulation_id": "sim_xxxx",
|
||||||
|
"generated_at": "2026-...",
|
||||||
|
"thesis": {
|
||||||
|
"p_yes": 0.73,
|
||||||
|
"confidence": "high",
|
||||||
|
"action": "buy_yes",
|
||||||
|
"regime": "consensus_forming",
|
||||||
|
"summary": "...",
|
||||||
|
"drivers": ["...", "..."],
|
||||||
|
"invalidators": ["...", "..."]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
report = ReportManager.get_report(report_id)
|
||||||
|
|
||||||
|
if not report:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": f"报告不存在: {report_id}"
|
||||||
|
}), 404
|
||||||
|
|
||||||
|
if report.status != ReportStatus.COMPLETED:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": f"报告尚未完成 (status={report.status.value}),无法提取信号"
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
if not report.markdown_content:
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": "报告内容为空,无法提取信号"
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
extractor = SignalExtractor()
|
||||||
|
signal = extractor.extract(
|
||||||
|
report_id=report_id,
|
||||||
|
simulation_id=report.simulation_id,
|
||||||
|
markdown_content=report.markdown_content,
|
||||||
|
simulation_requirement=report.simulation_requirement,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"信号提取完成: report={report_id} p_yes={signal.p_yes} action={signal.action}")
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"success": True,
|
||||||
|
"data": signal.to_dict()
|
||||||
|
})
|
||||||
|
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error(f"信号提取失败 (LLM): {str(e)}")
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": str(e)
|
||||||
|
}), 422
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"信号提取失败: {str(e)}")
|
||||||
|
return jsonify({
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"traceback": traceback.format_exc()
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
|
||||||
# ============== 工具调用接口(供调试使用)==============
|
# ============== 工具调用接口(供调试使用)==============
|
||||||
|
|
||||||
@report_bp.route('/tools/search', methods=['POST'])
|
@report_bp.route('/tools/search', methods=['POST'])
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,245 @@
|
||||||
|
"""
|
||||||
|
Miro Signal Extractor
|
||||||
|
Distils a completed simulation report into a canonical machine-readable
|
||||||
|
probability signal that external pipelines (e.g. prediction-market bots)
|
||||||
|
can consume directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from ..utils.llm_client import LLMClient
|
||||||
|
from ..utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger('mirofish.signal_extractor')
|
||||||
|
|
||||||
|
SCHEMA_VERSION = "1.1"
|
||||||
|
|
||||||
|
_SYSTEM_PROMPT = """\
|
||||||
|
You are a structured-signal extractor. You will be given the full markdown text
|
||||||
|
of a social-simulation analysis report and the original simulation requirement
|
||||||
|
(the prediction question). Your job is to distil the report into a concise,
|
||||||
|
machine-readable probability signal.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- p_yes must be a float strictly between 0.0 and 1.0 (never exactly 0 or 1).
|
||||||
|
- confidence must be one of: "high", "medium", "low".
|
||||||
|
- action must be one of: "buy_yes", "buy_no", "hold".
|
||||||
|
Use "buy_yes" when p_yes > 0.55, "buy_no" when p_yes < 0.45, else "hold".
|
||||||
|
- regime describes the dominant social dynamic observed in the simulation,
|
||||||
|
e.g. "consensus_forming", "contested", "uncertain", "momentum_shift",
|
||||||
|
"echo_chamber", "fragmented".
|
||||||
|
- summary is one sentence (≤ 30 words).
|
||||||
|
- drivers is a list of 2–4 short strings (key factors supporting the thesis).
|
||||||
|
- invalidators is a list of 2–4 short strings (key risks or counter-factors).
|
||||||
|
- Do not reproduce large sections of the report. Be concise.
|
||||||
|
- Respond ONLY with valid JSON matching the schema below — no prose, no fences.
|
||||||
|
|
||||||
|
Required JSON schema:
|
||||||
|
{
|
||||||
|
"p_yes": <float 0.0–1.0>,
|
||||||
|
"confidence": "high" | "medium" | "low",
|
||||||
|
"action": "buy_yes" | "buy_no" | "hold",
|
||||||
|
"regime": <string>,
|
||||||
|
"summary": <string>,
|
||||||
|
"drivers": [<string>, ...],
|
||||||
|
"invalidators": [<string>, ...]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MiroSignal:
|
||||||
|
"""Canonical prediction signal extracted from a simulation report."""
|
||||||
|
|
||||||
|
signal_id: str
|
||||||
|
schema_version: str
|
||||||
|
report_id: str
|
||||||
|
simulation_id: str
|
||||||
|
generated_at: str
|
||||||
|
|
||||||
|
# Core thesis fields
|
||||||
|
p_yes: float
|
||||||
|
confidence: str # high | medium | low
|
||||||
|
action: str # buy_yes | buy_no | hold
|
||||||
|
regime: str
|
||||||
|
summary: str
|
||||||
|
drivers: List[str] = field(default_factory=list)
|
||||||
|
invalidators: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"signal_id": self.signal_id,
|
||||||
|
"schema_version": self.schema_version,
|
||||||
|
"report_id": self.report_id,
|
||||||
|
"simulation_id": self.simulation_id,
|
||||||
|
"generated_at": self.generated_at,
|
||||||
|
"thesis": {
|
||||||
|
"p_yes": self.p_yes,
|
||||||
|
"confidence": self.confidence,
|
||||||
|
"action": self.action,
|
||||||
|
"regime": self.regime,
|
||||||
|
"summary": self.summary,
|
||||||
|
"drivers": self.drivers,
|
||||||
|
"invalidators": self.invalidators,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SignalExtractor:
|
||||||
|
"""Extracts a MiroSignal from a completed report's markdown content."""
|
||||||
|
|
||||||
|
_VALID_CONFIDENCE = {"high", "medium", "low"}
|
||||||
|
_VALID_ACTIONS = {"buy_yes", "buy_no", "hold"}
|
||||||
|
|
||||||
|
def __init__(self, llm_client: Optional[LLMClient] = None):
|
||||||
|
self._client = llm_client or LLMClient()
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
self,
|
||||||
|
report_id: str,
|
||||||
|
simulation_id: str,
|
||||||
|
markdown_content: str,
|
||||||
|
simulation_requirement: str,
|
||||||
|
) -> MiroSignal:
|
||||||
|
"""
|
||||||
|
Distil *markdown_content* into a MiroSignal.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
report_id: The report this signal is derived from.
|
||||||
|
simulation_id: Parent simulation ID.
|
||||||
|
markdown_content: Full report text (may be long).
|
||||||
|
simulation_requirement: The original prediction question / goal.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MiroSignal with validated fields.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the LLM fails to produce a valid signal after retries.
|
||||||
|
"""
|
||||||
|
# Trim to avoid token limits while keeping the most analytical content.
|
||||||
|
# Reports can exceed 30 k chars; the last third is usually the conclusion.
|
||||||
|
body = self._trim_report(markdown_content)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
f"Simulation requirement (prediction question):\n{simulation_requirement}\n\n"
|
||||||
|
f"Report:\n{body}"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
raw = self._client.chat_json(
|
||||||
|
messages=messages,
|
||||||
|
temperature=0.1,
|
||||||
|
max_tokens=512,
|
||||||
|
max_attempts=3,
|
||||||
|
temperature_step=0.05,
|
||||||
|
fallback_parser=self._salvage,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._build_signal(raw, report_id, simulation_id)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _trim_report(content: str, max_chars: int = 12_000) -> str:
|
||||||
|
"""Keep the tail of the report (conclusions) if it is very long."""
|
||||||
|
if len(content) <= max_chars:
|
||||||
|
return content
|
||||||
|
return "…[report truncated for signal extraction]\n\n" + content[-max_chars:]
|
||||||
|
|
||||||
|
def _build_signal(
|
||||||
|
self, raw: dict, report_id: str, simulation_id: str
|
||||||
|
) -> MiroSignal:
|
||||||
|
"""Validate and normalise the raw LLM dict into a MiroSignal."""
|
||||||
|
# p_yes
|
||||||
|
try:
|
||||||
|
p_yes = float(raw.get("p_yes", 0.5))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
p_yes = 0.5
|
||||||
|
p_yes = max(0.01, min(0.99, p_yes))
|
||||||
|
|
||||||
|
# confidence
|
||||||
|
confidence = str(raw.get("confidence", "medium")).lower()
|
||||||
|
if confidence not in self._VALID_CONFIDENCE:
|
||||||
|
confidence = "medium"
|
||||||
|
|
||||||
|
# action — recompute from p_yes if missing or invalid
|
||||||
|
action = str(raw.get("action", "")).lower()
|
||||||
|
if action not in self._VALID_ACTIONS:
|
||||||
|
if p_yes > 0.55:
|
||||||
|
action = "buy_yes"
|
||||||
|
elif p_yes < 0.45:
|
||||||
|
action = "buy_no"
|
||||||
|
else:
|
||||||
|
action = "hold"
|
||||||
|
|
||||||
|
# regime
|
||||||
|
regime = str(raw.get("regime", "uncertain")).strip() or "uncertain"
|
||||||
|
|
||||||
|
# summary
|
||||||
|
summary = str(raw.get("summary", "")).strip()
|
||||||
|
|
||||||
|
# list fields
|
||||||
|
drivers = [str(d) for d in raw.get("drivers", []) if d]
|
||||||
|
invalidators = [str(i) for i in raw.get("invalidators", []) if i]
|
||||||
|
|
||||||
|
return MiroSignal(
|
||||||
|
signal_id=str(uuid.uuid4()),
|
||||||
|
schema_version=SCHEMA_VERSION,
|
||||||
|
report_id=report_id,
|
||||||
|
simulation_id=simulation_id,
|
||||||
|
generated_at=datetime.now(timezone.utc).isoformat(),
|
||||||
|
p_yes=p_yes,
|
||||||
|
confidence=confidence,
|
||||||
|
action=action,
|
||||||
|
regime=regime,
|
||||||
|
summary=summary,
|
||||||
|
drivers=drivers,
|
||||||
|
invalidators=invalidators,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _salvage(raw_text: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Last-resort fallback: scan for any float that looks like a probability
|
||||||
|
and a YES/NO sentiment to construct a minimal signal dict.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
prob_match = re.search(r'\b(0\.\d+|1\.0+|0)\b', raw_text)
|
||||||
|
if not prob_match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
p = float(prob_match.group())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
text_lower = raw_text.lower()
|
||||||
|
if "high" in text_lower:
|
||||||
|
confidence = "high"
|
||||||
|
elif "low" in text_lower:
|
||||||
|
confidence = "low"
|
||||||
|
else:
|
||||||
|
confidence = "medium"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"p_yes": p,
|
||||||
|
"confidence": confidence,
|
||||||
|
"action": "buy_yes" if p > 0.55 else ("buy_no" if p < 0.45 else "hold"),
|
||||||
|
"regime": "uncertain",
|
||||||
|
"summary": "Signal salvaged from partial LLM output.",
|
||||||
|
"drivers": [],
|
||||||
|
"invalidators": [],
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,236 @@
|
||||||
|
"""
|
||||||
|
Tests for SignalExtractor — no real API calls, LLMClient fully mocked.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from app.services.signal_extractor import SignalExtractor, MiroSignal, SCHEMA_VERSION
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _make_extractor(chat_json_return=None, chat_json_side_effect=None):
|
||||||
|
"""Return a SignalExtractor with a mocked LLMClient."""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
if chat_json_side_effect is not None:
|
||||||
|
mock_client.chat_json.side_effect = chat_json_side_effect
|
||||||
|
else:
|
||||||
|
mock_client.chat_json.return_value = chat_json_return or {}
|
||||||
|
return SignalExtractor(llm_client=mock_client), mock_client
|
||||||
|
|
||||||
|
|
||||||
|
_SAMPLE_REPORT = """
|
||||||
|
## Executive Summary
|
||||||
|
The simulation shows strong consensus forming around a YES outcome.
|
||||||
|
Seventy-three percent of agents expressed optimism.
|
||||||
|
|
||||||
|
## Key Findings
|
||||||
|
- Social momentum is strongly positive.
|
||||||
|
- Counter-narratives remain marginal.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
The dominant dynamic is consensus formation with high confidence.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_SAMPLE_REQUIREMENT = "Will the proposal pass by end of Q2 2026?"
|
||||||
|
|
||||||
|
_GOOD_LLM_RESPONSE = {
|
||||||
|
"p_yes": 0.73,
|
||||||
|
"confidence": "high",
|
||||||
|
"action": "buy_yes",
|
||||||
|
"regime": "consensus_forming",
|
||||||
|
"summary": "Strong agent consensus supports a YES outcome with high confidence.",
|
||||||
|
"drivers": ["70%+ agent agreement", "positive social momentum"],
|
||||||
|
"invalidators": ["marginal counter-narrative", "low information diversity"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Happy path
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestExtractHappyPath:
|
||||||
|
|
||||||
|
def test_returns_miro_signal(self):
|
||||||
|
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
result = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert isinstance(result, MiroSignal)
|
||||||
|
|
||||||
|
def test_fields_match_llm_output(self):
|
||||||
|
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.p_yes == pytest.approx(0.73)
|
||||||
|
assert sig.confidence == "high"
|
||||||
|
assert sig.action == "buy_yes"
|
||||||
|
assert sig.regime == "consensus_forming"
|
||||||
|
assert "YES" in sig.summary or "consensus" in sig.summary.lower()
|
||||||
|
assert len(sig.drivers) == 2
|
||||||
|
assert len(sig.invalidators) == 2
|
||||||
|
|
||||||
|
def test_metadata_fields(self):
|
||||||
|
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
sig = extractor.extract("report_abc", "sim_xyz", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.report_id == "report_abc"
|
||||||
|
assert sig.simulation_id == "sim_xyz"
|
||||||
|
assert sig.schema_version == SCHEMA_VERSION
|
||||||
|
assert sig.signal_id # non-empty UUID
|
||||||
|
assert sig.generated_at # non-empty ISO timestamp
|
||||||
|
|
||||||
|
def test_to_dict_structure(self):
|
||||||
|
extractor, _ = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
d = sig.to_dict()
|
||||||
|
assert "thesis" in d
|
||||||
|
assert set(d["thesis"].keys()) == {
|
||||||
|
"p_yes", "confidence", "action", "regime",
|
||||||
|
"summary", "drivers", "invalidators",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_llm_called_with_low_temperature(self):
|
||||||
|
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
call_kwargs = mock_client.chat_json.call_args.kwargs
|
||||||
|
assert call_kwargs["temperature"] <= 0.2
|
||||||
|
|
||||||
|
def test_llm_called_with_retries(self):
|
||||||
|
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
call_kwargs = mock_client.chat_json.call_args.kwargs
|
||||||
|
assert call_kwargs.get("max_attempts", 1) >= 2
|
||||||
|
|
||||||
|
def test_simulation_requirement_in_messages(self):
|
||||||
|
extractor, mock_client = _make_extractor(_GOOD_LLM_RESPONSE)
|
||||||
|
req = "Will the referendum pass?"
|
||||||
|
extractor.extract("r1", "s1", _SAMPLE_REPORT, req)
|
||||||
|
messages = mock_client.chat_json.call_args.kwargs["messages"]
|
||||||
|
user_content = next(m["content"] for m in messages if m["role"] == "user")
|
||||||
|
assert req in user_content
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Field validation and normalisation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestFieldValidation:
|
||||||
|
|
||||||
|
def test_p_yes_clamped_below_zero(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": -0.5})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.p_yes >= 0.01
|
||||||
|
|
||||||
|
def test_p_yes_clamped_above_one(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 1.5})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.p_yes <= 0.99
|
||||||
|
|
||||||
|
def test_invalid_confidence_falls_back_to_medium(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "confidence": "very_sure"})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.confidence == "medium"
|
||||||
|
|
||||||
|
def test_invalid_action_recomputed_from_p_yes_buy_yes(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.8, "action": "INVALID"})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.action == "buy_yes"
|
||||||
|
|
||||||
|
def test_invalid_action_recomputed_from_p_yes_buy_no(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.2, "action": "INVALID"})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.action == "buy_no"
|
||||||
|
|
||||||
|
def test_invalid_action_recomputed_from_p_yes_hold(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "p_yes": 0.5, "action": "INVALID"})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.action == "hold"
|
||||||
|
|
||||||
|
def test_missing_regime_defaults_to_uncertain(self):
|
||||||
|
resp = {k: v for k, v in _GOOD_LLM_RESPONSE.items() if k != "regime"}
|
||||||
|
extractor, _ = _make_extractor(resp)
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.regime == "uncertain"
|
||||||
|
|
||||||
|
def test_empty_drivers_list_accepted(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": []})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
assert sig.drivers == []
|
||||||
|
|
||||||
|
def test_non_list_drivers_handled(self):
|
||||||
|
extractor, _ = _make_extractor({**_GOOD_LLM_RESPONSE, "drivers": "some string"})
|
||||||
|
sig = extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
|
# Should not crash; string is iterable so each char becomes an item — acceptable
|
||||||
|
assert isinstance(sig.drivers, list)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Report trimming
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestReportTrimming:
|
||||||
|
|
||||||
|
def test_short_report_unchanged(self):
|
||||||
|
short = "Short report content."
|
||||||
|
result = SignalExtractor._trim_report(short, max_chars=100)
|
||||||
|
assert result == short
|
||||||
|
|
||||||
|
def test_long_report_trimmed(self):
|
||||||
|
long_report = "x" * 20_000
|
||||||
|
result = SignalExtractor._trim_report(long_report, max_chars=12_000)
|
||||||
|
assert len(result) < 20_000
|
||||||
|
assert "truncated" in result
|
||||||
|
|
||||||
|
def test_trimmed_report_keeps_tail(self):
|
||||||
|
# The tail (conclusion) is most important for signal extraction
|
||||||
|
long_report = "A" * 10_000 + "CONCLUSION"
|
||||||
|
result = SignalExtractor._trim_report(long_report, max_chars=100)
|
||||||
|
assert "CONCLUSION" in result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fallback (_salvage)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSalvage:
|
||||||
|
|
||||||
|
def test_salvage_extracts_probability(self):
|
||||||
|
result = SignalExtractor._salvage("The probability is 0.68 for YES outcome.")
|
||||||
|
assert result is not None
|
||||||
|
assert result["p_yes"] == pytest.approx(0.68)
|
||||||
|
|
||||||
|
def test_salvage_returns_none_when_no_probability(self):
|
||||||
|
assert SignalExtractor._salvage("no numbers here at all") is None
|
||||||
|
|
||||||
|
def test_salvage_sets_action_buy_yes(self):
|
||||||
|
result = SignalExtractor._salvage("probability 0.80")
|
||||||
|
assert result["action"] == "buy_yes"
|
||||||
|
|
||||||
|
def test_salvage_sets_action_buy_no(self):
|
||||||
|
result = SignalExtractor._salvage("probability 0.20")
|
||||||
|
assert result["action"] == "buy_no"
|
||||||
|
|
||||||
|
def test_salvage_sets_action_hold(self):
|
||||||
|
result = SignalExtractor._salvage("probability 0.50")
|
||||||
|
assert result["action"] == "hold"
|
||||||
|
|
||||||
|
def test_salvage_detects_high_confidence(self):
|
||||||
|
result = SignalExtractor._salvage("high confidence, p=0.72")
|
||||||
|
assert result["confidence"] == "high"
|
||||||
|
|
||||||
|
def test_salvage_detects_low_confidence(self):
|
||||||
|
result = SignalExtractor._salvage("low certainty, p=0.30")
|
||||||
|
assert result["confidence"] == "low"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LLM failure propagates as ValueError
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestLLMFailure:
|
||||||
|
|
||||||
|
def test_raises_value_error_on_llm_failure(self):
|
||||||
|
extractor, mock_client = _make_extractor()
|
||||||
|
mock_client.chat_json.side_effect = ValueError("LLM返回的JSON格式无效: ...")
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
extractor.extract("r1", "s1", _SAMPLE_REPORT, _SAMPLE_REQUIREMENT)
|
||||||
Loading…
Reference in New Issue