MicroFish/backend/app/utils/llm_client.py

394 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
LLM客户端封装
统一使用OpenAI格式调用
支持三层容错机制:
1. 截断检测finish_reason == 'length'
2. JSON修复尝试关闭未闭合的括号
3. 级联回退(自动切换到 Boost LLM
"""
import json
import logging
import re
from typing import Optional, Dict, Any, List, Tuple
from openai import OpenAI
from ..config import Config
logger = logging.getLogger(__name__)
def repair_truncated_json(text: str) -> Optional[Dict[str, Any]]:
"""
尝试修复被截断的JSON字符串。
两阶段策略:
1. 精确修复:找到最后一个结构完整的安全截断点,关闭括号
2. 激进修复:剥离末尾不完整的字符串/值,关闭所有括号
Args:
text: 被截断的JSON字符串
Returns:
修复后的字典,如果无法修复则返回 None
"""
if not text or not text.strip():
return None
text = text.strip()
# 清理 markdown 代码块标记
text = re.sub(r'^```(?:json)?\s*\n?', '', text, flags=re.IGNORECASE)
text = re.sub(r'\n?```\s*$', '', text)
text = text.strip()
# 先尝试直接解析也许已经是有效JSON
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# === 阶段1精确安全点修复 ===
# 扫描结构,找到 }, ] 或顶层逗号作为安全截断点
safe_points = []
depth_brace = 0
depth_bracket = 0
in_string = False
escape_next = False
for i, ch in enumerate(text):
if escape_next:
escape_next = False
continue
if ch == '\\' and in_string:
escape_next = True
continue
if ch == '"' and not escape_next:
in_string = not in_string
continue
if in_string:
continue
if ch == '{':
depth_brace += 1
elif ch == '}':
depth_brace -= 1
safe_points.append(i + 1)
elif ch == '[':
depth_bracket += 1
elif ch == ']':
depth_bracket -= 1
safe_points.append(i + 1)
elif ch == ',' and depth_brace >= 1:
safe_points.append(i)
# 从最后一个安全点开始尝试
for point in reversed(safe_points):
candidate = text[:point].rstrip().rstrip(',')
result = _try_close_and_parse(candidate)
if result is not None:
logger.info(f"JSON repair (phase 1) succeeded at position {point}/{len(text)}")
return result
# === 阶段2激进修复 ===
# 处理截断发生在字符串值中间的情况(如 "description": "A
# 策略:从末尾向前找到最后一个完整的 }, 然后关闭括号
# 先尝试关闭可能未闭合的字符串
# 用正则找到最后一个看起来像截断字符串值的位置
# 模式:找最后一个 "key": "...(未闭合的字符串),截断到前一个完整的 }
# 逐步从末尾剥离,找到能解析的子串
for strip_len in range(1, min(len(text), 500)):
candidate = text[:len(text) - strip_len]
# 尝试在最后一个完整对象/数组闭合符处截断
# 找最后一个 } 或 ]
last_close = max(candidate.rfind('}'), candidate.rfind(']'))
if last_close < 0:
continue
truncated = candidate[:last_close + 1].rstrip().rstrip(',')
result = _try_close_and_parse(truncated)
if result is not None:
logger.info(f"JSON repair (phase 2) succeeded, stripped {strip_len + len(text) - last_close - 1} chars")
return result
logger.warning("JSON repair failed: no recoverable structure found")
return None
def _try_close_and_parse(candidate: str) -> Optional[Dict[str, Any]]:
"""
使用栈追踪未闭合的括号,按正确顺序关闭它们,然后尝试解析。
JSON 关闭顺序很重要:{[{ }]} 而不是 {[{ ]}}
Returns:
解析后的字典,或 None
"""
stack = [] # 记录开启的括号类型,用于按正确顺序关闭
in_str = False
esc = False
for ch in candidate:
if esc:
esc = False
continue
if ch == '\\' and in_str:
esc = True
continue
if ch == '"':
in_str = not in_str
continue
if in_str:
continue
if ch == '{':
stack.append('}')
elif ch == '[':
stack.append(']')
elif ch in ('}', ']'):
if stack and stack[-1] == ch:
stack.pop()
# 如果字符串未闭合,不尝试此候选
if in_str:
return None
# 按栈逆序关闭LIFO
closing = ''.join(reversed(stack))
repaired = candidate + closing
try:
return json.loads(repaired)
except json.JSONDecodeError:
return None
class LLMClient:
"""LLM客户端支持级联回退"""
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model: Optional[str] = None
):
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
# 检查是否有 Boost LLM 配置可用于回退
self._has_boost = bool(Config.LLM_BOOST_API_KEY)
def _chat_raw(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: int = 4096,
response_format: Optional[Dict] = None,
client: Optional[OpenAI] = None,
model: Optional[str] = None
) -> Tuple[str, str]:
"""
底层聊天请求,返回 (content, finish_reason) 元组。
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式
client: 可选的替代客户端(用于 Boost 回退)
model: 可选的替代模型名
Returns:
(content, finish_reason) 元组
"""
use_client = client or self.client
use_model = model or self.model
kwargs = {
"model": use_model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if response_format:
kwargs["response_format"] = response_format
response = use_client.chat.completions.create(**kwargs)
content = response.choices[0].message.content or ""
finish_reason = response.choices[0].finish_reason or "unknown"
# 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
return content, finish_reason
def chat(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: int = 4096,
response_format: Optional[Dict] = None
) -> str:
"""
发送聊天请求
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式如JSON模式
Returns:
模型响应文本
"""
content, _ = self._chat_raw(
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format=response_format
)
return content
def _create_boost_client(self) -> Tuple[OpenAI, str]:
"""创建 Boost LLM 客户端(按需创建,不缓存)"""
return (
OpenAI(
api_key=Config.LLM_BOOST_API_KEY,
base_url=Config.LLM_BOOST_BASE_URL
),
Config.LLM_BOOST_MODEL_NAME
)
def chat_json(
self,
messages: List[Dict[str, str]],
temperature: float = 0.3,
max_tokens: int = 4096
) -> Dict[str, Any]:
"""
发送聊天请求并返回JSON支持三层容错
1. 截断检测 + JSON修复
2. 级联回退到 Boost LLM
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
Returns:
解析后的JSON对象
"""
# === 第一层:尝试主 LLM ===
try:
content, finish_reason = self._chat_raw(
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
# 清理 markdown 代码块标记
cleaned = self._clean_json_response(content)
# 正常完成 → 尝试解析
if finish_reason == "stop":
try:
return json.loads(cleaned)
except json.JSONDecodeError:
logger.warning("Primary LLM returned invalid JSON despite finish_reason=stop, attempting repair")
repaired = repair_truncated_json(content)
if repaired is not None:
return repaired
# 回退到 Boost
# 截断 → 尝试修复
elif finish_reason == "length":
logger.warning(f"Primary LLM response truncated (finish_reason=length, {len(content)} chars)")
repaired = repair_truncated_json(content)
if repaired is not None:
logger.info("Truncated JSON repaired successfully from primary LLM")
return repaired
logger.warning("JSON repair failed, falling back to Boost LLM")
else:
logger.warning(f"Unexpected finish_reason='{finish_reason}', attempting parse")
try:
return json.loads(cleaned)
except json.JSONDecodeError:
pass
except Exception as e:
logger.warning(f"Primary LLM failed: {type(e).__name__}: {e}")
# === 第二层:回退到 Boost LLM ===
if not self._has_boost:
raise ValueError(
f"Primary LLM failed and no Boost LLM configured. "
f"Set LLM_BOOST_API_KEY, LLM_BOOST_BASE_URL, LLM_BOOST_MODEL_NAME in .env"
)
logger.info(f"Falling back to Boost LLM: {Config.LLM_BOOST_BASE_URL} / {Config.LLM_BOOST_MODEL_NAME}")
try:
boost_client, boost_model = self._create_boost_client()
content, finish_reason = self._chat_raw(
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"},
client=boost_client,
model=boost_model
)
cleaned = self._clean_json_response(content)
if finish_reason == "stop":
try:
return json.loads(cleaned)
except json.JSONDecodeError:
repaired = repair_truncated_json(content)
if repaired is not None:
logger.info("Boost LLM JSON repaired successfully")
return repaired
raise ValueError(f"Boost LLM returned invalid JSON: {cleaned[:200]}...")
elif finish_reason == "length":
logger.warning(f"Boost LLM also truncated ({len(content)} chars), attempting repair")
repaired = repair_truncated_json(content)
if repaired is not None:
logger.info("Truncated JSON from Boost LLM repaired successfully")
return repaired
raise ValueError(f"Boost LLM response truncated and repair failed: {cleaned[:200]}...")
else:
try:
return json.loads(cleaned)
except json.JSONDecodeError:
raise ValueError(f"Boost LLM returned unparseable response: {cleaned[:200]}...")
except ValueError:
raise
except Exception as e:
raise ValueError(f"Both primary and Boost LLM failed. Boost error: {type(e).__name__}: {e}")
@staticmethod
def _clean_json_response(content: str) -> str:
"""清理 LLM 响应中的 markdown 代码块标记"""
cleaned = content.strip()
cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
return cleaned.strip()