feat(utils): implement robust LLM client with cascading fallback and JSON repair
This commit is contained in:
parent
fa0f6519b1
commit
15bd1142fc
|
|
@ -57,4 +57,7 @@ backend/logs/
|
||||||
backend/uploads/
|
backend/uploads/
|
||||||
|
|
||||||
# Docker 数据
|
# Docker 数据
|
||||||
data/
|
data/
|
||||||
|
# Personal configuration
|
||||||
|
CLAUDE.md
|
||||||
|
skills/
|
||||||
|
|
|
||||||
|
|
@ -32,9 +32,20 @@ class Config:
|
||||||
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
|
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
|
||||||
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
|
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
|
||||||
|
|
||||||
|
# Boost/Fallback LLM配置(可选,主 LLM 失败时自动回退)
|
||||||
|
LLM_BOOST_API_KEY = os.environ.get('LLM_BOOST_API_KEY')
|
||||||
|
LLM_BOOST_BASE_URL = os.environ.get('LLM_BOOST_BASE_URL')
|
||||||
|
LLM_BOOST_MODEL_NAME = os.environ.get('LLM_BOOST_MODEL_NAME')
|
||||||
|
|
||||||
# Zep配置
|
# Zep配置
|
||||||
ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
|
ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
|
||||||
|
|
||||||
|
# Zep 速率限制配置(可通过 .env 调整,升级付费计划后放宽)
|
||||||
|
ZEP_RATE_LIMIT = int(os.environ.get('ZEP_RATE_LIMIT', '5')) # 每个窗口期允许的请求数
|
||||||
|
ZEP_RATE_LIMIT_WINDOW = int(os.environ.get('ZEP_RATE_LIMIT_WINDOW', '60')) # 窗口期(秒)
|
||||||
|
ZEP_CACHE_TTL = int(os.environ.get('ZEP_CACHE_TTL', '30')) # graph data 缓存时间(秒),0=不缓存
|
||||||
|
ZEP_GRAPH_POLL_INTERVAL = int(os.environ.get('ZEP_GRAPH_POLL_INTERVAL', '0')) # 前端自动轮询间隔(秒),0=仅手动刷新
|
||||||
|
|
||||||
# 文件上传配置
|
# 文件上传配置
|
||||||
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB
|
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB
|
||||||
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
|
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,173 @@
|
||||||
"""
|
"""
|
||||||
LLM客户端封装
|
LLM客户端封装
|
||||||
统一使用OpenAI格式调用
|
统一使用OpenAI格式调用
|
||||||
|
|
||||||
|
支持三层容错机制:
|
||||||
|
1. 截断检测(finish_reason == 'length')
|
||||||
|
2. JSON修复(尝试关闭未闭合的括号)
|
||||||
|
3. 级联回退(自动切换到 Boost LLM)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Optional, Dict, Any, List
|
from typing import Optional, Dict, Any, List, Tuple
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def repair_truncated_json(text: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
尝试修复被截断的JSON字符串。
|
||||||
|
|
||||||
|
两阶段策略:
|
||||||
|
1. 精确修复:找到最后一个结构完整的安全截断点,关闭括号
|
||||||
|
2. 激进修复:剥离末尾不完整的字符串/值,关闭所有括号
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 被截断的JSON字符串
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
修复后的字典,如果无法修复则返回 None
|
||||||
|
"""
|
||||||
|
if not text or not text.strip():
|
||||||
|
return None
|
||||||
|
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
# 清理 markdown 代码块标记
|
||||||
|
text = re.sub(r'^```(?:json)?\s*\n?', '', text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r'\n?```\s*$', '', text)
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
|
# 先尝试直接解析(也许已经是有效JSON)
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# === 阶段1:精确安全点修复 ===
|
||||||
|
# 扫描结构,找到 }, ] 或顶层逗号作为安全截断点
|
||||||
|
safe_points = []
|
||||||
|
depth_brace = 0
|
||||||
|
depth_bracket = 0
|
||||||
|
in_string = False
|
||||||
|
escape_next = False
|
||||||
|
|
||||||
|
for i, ch in enumerate(text):
|
||||||
|
if escape_next:
|
||||||
|
escape_next = False
|
||||||
|
continue
|
||||||
|
if ch == '\\' and in_string:
|
||||||
|
escape_next = True
|
||||||
|
continue
|
||||||
|
if ch == '"' and not escape_next:
|
||||||
|
in_string = not in_string
|
||||||
|
continue
|
||||||
|
if in_string:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if ch == '{':
|
||||||
|
depth_brace += 1
|
||||||
|
elif ch == '}':
|
||||||
|
depth_brace -= 1
|
||||||
|
safe_points.append(i + 1)
|
||||||
|
elif ch == '[':
|
||||||
|
depth_bracket += 1
|
||||||
|
elif ch == ']':
|
||||||
|
depth_bracket -= 1
|
||||||
|
safe_points.append(i + 1)
|
||||||
|
elif ch == ',' and depth_brace >= 1:
|
||||||
|
safe_points.append(i)
|
||||||
|
|
||||||
|
# 从最后一个安全点开始尝试
|
||||||
|
for point in reversed(safe_points):
|
||||||
|
candidate = text[:point].rstrip().rstrip(',')
|
||||||
|
result = _try_close_and_parse(candidate)
|
||||||
|
if result is not None:
|
||||||
|
logger.info(f"JSON repair (phase 1) succeeded at position {point}/{len(text)}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# === 阶段2:激进修复 ===
|
||||||
|
# 处理截断发生在字符串值中间的情况(如 "description": "A)
|
||||||
|
# 策略:从末尾向前找到最后一个完整的 }, 然后关闭括号
|
||||||
|
|
||||||
|
# 先尝试关闭可能未闭合的字符串
|
||||||
|
# 用正则找到最后一个看起来像截断字符串值的位置
|
||||||
|
# 模式:找最后一个 "key": "...(未闭合的字符串),截断到前一个完整的 }
|
||||||
|
|
||||||
|
# 逐步从末尾剥离,找到能解析的子串
|
||||||
|
for strip_len in range(1, min(len(text), 500)):
|
||||||
|
candidate = text[:len(text) - strip_len]
|
||||||
|
|
||||||
|
# 尝试在最后一个完整对象/数组闭合符处截断
|
||||||
|
# 找最后一个 } 或 ]
|
||||||
|
last_close = max(candidate.rfind('}'), candidate.rfind(']'))
|
||||||
|
if last_close < 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
truncated = candidate[:last_close + 1].rstrip().rstrip(',')
|
||||||
|
result = _try_close_and_parse(truncated)
|
||||||
|
if result is not None:
|
||||||
|
logger.info(f"JSON repair (phase 2) succeeded, stripped {strip_len + len(text) - last_close - 1} chars")
|
||||||
|
return result
|
||||||
|
|
||||||
|
logger.warning("JSON repair failed: no recoverable structure found")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _try_close_and_parse(candidate: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
使用栈追踪未闭合的括号,按正确顺序关闭它们,然后尝试解析。
|
||||||
|
|
||||||
|
JSON 关闭顺序很重要:{[{ }]} 而不是 {[{ ]}}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
解析后的字典,或 None
|
||||||
|
"""
|
||||||
|
stack = [] # 记录开启的括号类型,用于按正确顺序关闭
|
||||||
|
in_str = False
|
||||||
|
esc = False
|
||||||
|
|
||||||
|
for ch in candidate:
|
||||||
|
if esc:
|
||||||
|
esc = False
|
||||||
|
continue
|
||||||
|
if ch == '\\' and in_str:
|
||||||
|
esc = True
|
||||||
|
continue
|
||||||
|
if ch == '"':
|
||||||
|
in_str = not in_str
|
||||||
|
continue
|
||||||
|
if in_str:
|
||||||
|
continue
|
||||||
|
if ch == '{':
|
||||||
|
stack.append('}')
|
||||||
|
elif ch == '[':
|
||||||
|
stack.append(']')
|
||||||
|
elif ch in ('}', ']'):
|
||||||
|
if stack and stack[-1] == ch:
|
||||||
|
stack.pop()
|
||||||
|
|
||||||
|
# 如果字符串未闭合,不尝试此候选
|
||||||
|
if in_str:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 按栈逆序关闭(LIFO)
|
||||||
|
closing = ''.join(reversed(stack))
|
||||||
|
repaired = candidate + closing
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(repaired)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
"""LLM客户端"""
|
"""LLM客户端,支持级联回退"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -31,6 +186,54 @@ class LLMClient:
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
base_url=self.base_url
|
base_url=self.base_url
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 检查是否有 Boost LLM 配置可用于回退
|
||||||
|
self._has_boost = bool(Config.LLM_BOOST_API_KEY)
|
||||||
|
|
||||||
|
def _chat_raw(
|
||||||
|
self,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: int = 4096,
|
||||||
|
response_format: Optional[Dict] = None,
|
||||||
|
client: Optional[OpenAI] = None,
|
||||||
|
model: Optional[str] = None
|
||||||
|
) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
底层聊天请求,返回 (content, finish_reason) 元组。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: 消息列表
|
||||||
|
temperature: 温度参数
|
||||||
|
max_tokens: 最大token数
|
||||||
|
response_format: 响应格式
|
||||||
|
client: 可选的替代客户端(用于 Boost 回退)
|
||||||
|
model: 可选的替代模型名
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(content, finish_reason) 元组
|
||||||
|
"""
|
||||||
|
use_client = client or self.client
|
||||||
|
use_model = model or self.model
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"model": use_model,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
}
|
||||||
|
|
||||||
|
if response_format:
|
||||||
|
kwargs["response_format"] = response_format
|
||||||
|
|
||||||
|
response = use_client.chat.completions.create(**kwargs)
|
||||||
|
content = response.choices[0].message.content or ""
|
||||||
|
finish_reason = response.choices[0].finish_reason or "unknown"
|
||||||
|
|
||||||
|
# 部分模型(如MiniMax M2.5)会在content中包含<think>思考内容,需要移除
|
||||||
|
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
|
||||||
|
|
||||||
|
return content, finish_reason
|
||||||
|
|
||||||
def chat(
|
def chat(
|
||||||
self,
|
self,
|
||||||
|
|
@ -51,22 +254,24 @@ class LLMClient:
|
||||||
Returns:
|
Returns:
|
||||||
模型响应文本
|
模型响应文本
|
||||||
"""
|
"""
|
||||||
kwargs = {
|
content, _ = self._chat_raw(
|
||||||
"model": self.model,
|
messages=messages,
|
||||||
"messages": messages,
|
temperature=temperature,
|
||||||
"temperature": temperature,
|
max_tokens=max_tokens,
|
||||||
"max_tokens": max_tokens,
|
response_format=response_format
|
||||||
}
|
)
|
||||||
|
|
||||||
if response_format:
|
|
||||||
kwargs["response_format"] = response_format
|
|
||||||
|
|
||||||
response = self.client.chat.completions.create(**kwargs)
|
|
||||||
content = response.choices[0].message.content
|
|
||||||
# 部分模型(如MiniMax M2.5)会在content中包含<think>思考内容,需要移除
|
|
||||||
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
def _create_boost_client(self) -> Tuple[OpenAI, str]:
|
||||||
|
"""创建 Boost LLM 客户端(按需创建,不缓存)"""
|
||||||
|
return (
|
||||||
|
OpenAI(
|
||||||
|
api_key=Config.LLM_BOOST_API_KEY,
|
||||||
|
base_url=Config.LLM_BOOST_BASE_URL
|
||||||
|
),
|
||||||
|
Config.LLM_BOOST_MODEL_NAME
|
||||||
|
)
|
||||||
|
|
||||||
def chat_json(
|
def chat_json(
|
||||||
self,
|
self,
|
||||||
messages: List[Dict[str, str]],
|
messages: List[Dict[str, str]],
|
||||||
|
|
@ -74,7 +279,9 @@ class LLMClient:
|
||||||
max_tokens: int = 4096
|
max_tokens: int = 4096
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
发送聊天请求并返回JSON
|
发送聊天请求并返回JSON,支持三层容错:
|
||||||
|
1. 截断检测 + JSON修复
|
||||||
|
2. 级联回退到 Boost LLM
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: 消息列表
|
messages: 消息列表
|
||||||
|
|
@ -84,20 +291,103 @@ class LLMClient:
|
||||||
Returns:
|
Returns:
|
||||||
解析后的JSON对象
|
解析后的JSON对象
|
||||||
"""
|
"""
|
||||||
response = self.chat(
|
# === 第一层:尝试主 LLM ===
|
||||||
messages=messages,
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
response_format={"type": "json_object"}
|
|
||||||
)
|
|
||||||
# 清理markdown代码块标记
|
|
||||||
cleaned_response = response.strip()
|
|
||||||
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
|
|
||||||
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
|
|
||||||
cleaned_response = cleaned_response.strip()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return json.loads(cleaned_response)
|
content, finish_reason = self._chat_raw(
|
||||||
except json.JSONDecodeError:
|
messages=messages,
|
||||||
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
response_format={"type": "json_object"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 清理 markdown 代码块标记
|
||||||
|
cleaned = self._clean_json_response(content)
|
||||||
|
|
||||||
|
# 正常完成 → 尝试解析
|
||||||
|
if finish_reason == "stop":
|
||||||
|
try:
|
||||||
|
return json.loads(cleaned)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning("Primary LLM returned invalid JSON despite finish_reason=stop, attempting repair")
|
||||||
|
repaired = repair_truncated_json(content)
|
||||||
|
if repaired is not None:
|
||||||
|
return repaired
|
||||||
|
# 回退到 Boost
|
||||||
|
|
||||||
|
# 截断 → 尝试修复
|
||||||
|
elif finish_reason == "length":
|
||||||
|
logger.warning(f"Primary LLM response truncated (finish_reason=length, {len(content)} chars)")
|
||||||
|
repaired = repair_truncated_json(content)
|
||||||
|
if repaired is not None:
|
||||||
|
logger.info("Truncated JSON repaired successfully from primary LLM")
|
||||||
|
return repaired
|
||||||
|
logger.warning("JSON repair failed, falling back to Boost LLM")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unexpected finish_reason='{finish_reason}', attempting parse")
|
||||||
|
try:
|
||||||
|
return json.loads(cleaned)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Primary LLM failed: {type(e).__name__}: {e}")
|
||||||
|
|
||||||
|
# === 第二层:回退到 Boost LLM ===
|
||||||
|
if not self._has_boost:
|
||||||
|
raise ValueError(
|
||||||
|
f"Primary LLM failed and no Boost LLM configured. "
|
||||||
|
f"Set LLM_BOOST_API_KEY, LLM_BOOST_BASE_URL, LLM_BOOST_MODEL_NAME in .env"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Falling back to Boost LLM: {Config.LLM_BOOST_BASE_URL} / {Config.LLM_BOOST_MODEL_NAME}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
boost_client, boost_model = self._create_boost_client()
|
||||||
|
content, finish_reason = self._chat_raw(
|
||||||
|
messages=messages,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
response_format={"type": "json_object"},
|
||||||
|
client=boost_client,
|
||||||
|
model=boost_model
|
||||||
|
)
|
||||||
|
|
||||||
|
cleaned = self._clean_json_response(content)
|
||||||
|
|
||||||
|
if finish_reason == "stop":
|
||||||
|
try:
|
||||||
|
return json.loads(cleaned)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
repaired = repair_truncated_json(content)
|
||||||
|
if repaired is not None:
|
||||||
|
logger.info("Boost LLM JSON repaired successfully")
|
||||||
|
return repaired
|
||||||
|
raise ValueError(f"Boost LLM returned invalid JSON: {cleaned[:200]}...")
|
||||||
|
|
||||||
|
elif finish_reason == "length":
|
||||||
|
logger.warning(f"Boost LLM also truncated ({len(content)} chars), attempting repair")
|
||||||
|
repaired = repair_truncated_json(content)
|
||||||
|
if repaired is not None:
|
||||||
|
logger.info("Truncated JSON from Boost LLM repaired successfully")
|
||||||
|
return repaired
|
||||||
|
raise ValueError(f"Boost LLM response truncated and repair failed: {cleaned[:200]}...")
|
||||||
|
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
return json.loads(cleaned)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError(f"Boost LLM returned unparseable response: {cleaned[:200]}...")
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Both primary and Boost LLM failed. Boost error: {type(e).__name__}: {e}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clean_json_response(content: str) -> str:
|
||||||
|
"""清理 LLM 响应中的 markdown 代码块标记"""
|
||||||
|
cleaned = content.strip()
|
||||||
|
cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
|
||||||
|
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
|
||||||
|
return cleaned.strip()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue