From 15bd1142fc475800b83684e5dfbc488c47589a71 Mon Sep 17 00:00:00 2001 From: Armando Maynez Date: Sun, 3 May 2026 02:18:06 -0600 Subject: [PATCH] feat(utils): implement robust LLM client with cascading fallback and JSON repair --- .gitignore | 5 +- backend/app/config.py | 11 + backend/app/utils/llm_client.py | 356 +++++++++++++++++++++++++++++--- 3 files changed, 338 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index 55d3ef19..fb82def4 100644 --- a/.gitignore +++ b/.gitignore @@ -57,4 +57,7 @@ backend/logs/ backend/uploads/ # Docker 数据 -data/ \ No newline at end of file +data/ +# Personal configuration +CLAUDE.md +skills/ diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50..026378d9 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -32,9 +32,20 @@ class Config: LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') + # Boost/Fallback LLM配置(可选,主 LLM 失败时自动回退) + LLM_BOOST_API_KEY = os.environ.get('LLM_BOOST_API_KEY') + LLM_BOOST_BASE_URL = os.environ.get('LLM_BOOST_BASE_URL') + LLM_BOOST_MODEL_NAME = os.environ.get('LLM_BOOST_MODEL_NAME') + # Zep配置 ZEP_API_KEY = os.environ.get('ZEP_API_KEY') + # Zep 速率限制配置(可通过 .env 调整,升级付费计划后放宽) + ZEP_RATE_LIMIT = int(os.environ.get('ZEP_RATE_LIMIT', '5')) # 每个窗口期允许的请求数 + ZEP_RATE_LIMIT_WINDOW = int(os.environ.get('ZEP_RATE_LIMIT_WINDOW', '60')) # 窗口期(秒) + ZEP_CACHE_TTL = int(os.environ.get('ZEP_CACHE_TTL', '30')) # graph data 缓存时间(秒),0=不缓存 + ZEP_GRAPH_POLL_INTERVAL = int(os.environ.get('ZEP_GRAPH_POLL_INTERVAL', '0')) # 前端自动轮询间隔(秒),0=仅手动刷新 + # 文件上传配置 MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads') diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f4..be58f3ee 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -1,18 +1,173 @@ """ LLM客户端封装 统一使用OpenAI格式调用 + +支持三层容错机制: +1. 截断检测(finish_reason == 'length') +2. JSON修复(尝试关闭未闭合的括号) +3. 级联回退(自动切换到 Boost LLM) """ import json +import logging import re -from typing import Optional, Dict, Any, List +from typing import Optional, Dict, Any, List, Tuple from openai import OpenAI from ..config import Config +logger = logging.getLogger(__name__) + + +def repair_truncated_json(text: str) -> Optional[Dict[str, Any]]: + """ + 尝试修复被截断的JSON字符串。 + + 两阶段策略: + 1. 精确修复:找到最后一个结构完整的安全截断点,关闭括号 + 2. 激进修复:剥离末尾不完整的字符串/值,关闭所有括号 + + Args: + text: 被截断的JSON字符串 + + Returns: + 修复后的字典,如果无法修复则返回 None + """ + if not text or not text.strip(): + return None + + text = text.strip() + + # 清理 markdown 代码块标记 + text = re.sub(r'^```(?:json)?\s*\n?', '', text, flags=re.IGNORECASE) + text = re.sub(r'\n?```\s*$', '', text) + text = text.strip() + + # 先尝试直接解析(也许已经是有效JSON) + try: + return json.loads(text) + except json.JSONDecodeError: + pass + + # === 阶段1:精确安全点修复 === + # 扫描结构,找到 }, ] 或顶层逗号作为安全截断点 + safe_points = [] + depth_brace = 0 + depth_bracket = 0 + in_string = False + escape_next = False + + for i, ch in enumerate(text): + if escape_next: + escape_next = False + continue + if ch == '\\' and in_string: + escape_next = True + continue + if ch == '"' and not escape_next: + in_string = not in_string + continue + if in_string: + continue + + if ch == '{': + depth_brace += 1 + elif ch == '}': + depth_brace -= 1 + safe_points.append(i + 1) + elif ch == '[': + depth_bracket += 1 + elif ch == ']': + depth_bracket -= 1 + safe_points.append(i + 1) + elif ch == ',' and depth_brace >= 1: + safe_points.append(i) + + # 从最后一个安全点开始尝试 + for point in reversed(safe_points): + candidate = text[:point].rstrip().rstrip(',') + result = _try_close_and_parse(candidate) + if result is not None: + logger.info(f"JSON repair (phase 1) succeeded at position {point}/{len(text)}") + return result + + # === 阶段2:激进修复 === + # 处理截断发生在字符串值中间的情况(如 "description": "A) + # 策略:从末尾向前找到最后一个完整的 }, 然后关闭括号 + + # 先尝试关闭可能未闭合的字符串 + # 用正则找到最后一个看起来像截断字符串值的位置 + # 模式:找最后一个 "key": "...(未闭合的字符串),截断到前一个完整的 } + + # 逐步从末尾剥离,找到能解析的子串 + for strip_len in range(1, min(len(text), 500)): + candidate = text[:len(text) - strip_len] + + # 尝试在最后一个完整对象/数组闭合符处截断 + # 找最后一个 } 或 ] + last_close = max(candidate.rfind('}'), candidate.rfind(']')) + if last_close < 0: + continue + + truncated = candidate[:last_close + 1].rstrip().rstrip(',') + result = _try_close_and_parse(truncated) + if result is not None: + logger.info(f"JSON repair (phase 2) succeeded, stripped {strip_len + len(text) - last_close - 1} chars") + return result + + logger.warning("JSON repair failed: no recoverable structure found") + return None + + +def _try_close_and_parse(candidate: str) -> Optional[Dict[str, Any]]: + """ + 使用栈追踪未闭合的括号,按正确顺序关闭它们,然后尝试解析。 + + JSON 关闭顺序很重要:{[{ }]} 而不是 {[{ ]}} + + Returns: + 解析后的字典,或 None + """ + stack = [] # 记录开启的括号类型,用于按正确顺序关闭 + in_str = False + esc = False + + for ch in candidate: + if esc: + esc = False + continue + if ch == '\\' and in_str: + esc = True + continue + if ch == '"': + in_str = not in_str + continue + if in_str: + continue + if ch == '{': + stack.append('}') + elif ch == '[': + stack.append(']') + elif ch in ('}', ']'): + if stack and stack[-1] == ch: + stack.pop() + + # 如果字符串未闭合,不尝试此候选 + if in_str: + return None + + # 按栈逆序关闭(LIFO) + closing = ''.join(reversed(stack)) + repaired = candidate + closing + + try: + return json.loads(repaired) + except json.JSONDecodeError: + return None + class LLMClient: - """LLM客户端""" + """LLM客户端,支持级联回退""" def __init__( self, @@ -31,6 +186,54 @@ class LLMClient: api_key=self.api_key, base_url=self.base_url ) + + # 检查是否有 Boost LLM 配置可用于回退 + self._has_boost = bool(Config.LLM_BOOST_API_KEY) + + def _chat_raw( + self, + messages: List[Dict[str, str]], + temperature: float = 0.7, + max_tokens: int = 4096, + response_format: Optional[Dict] = None, + client: Optional[OpenAI] = None, + model: Optional[str] = None + ) -> Tuple[str, str]: + """ + 底层聊天请求,返回 (content, finish_reason) 元组。 + + Args: + messages: 消息列表 + temperature: 温度参数 + max_tokens: 最大token数 + response_format: 响应格式 + client: 可选的替代客户端(用于 Boost 回退) + model: 可选的替代模型名 + + Returns: + (content, finish_reason) 元组 + """ + use_client = client or self.client + use_model = model or self.model + + kwargs = { + "model": use_model, + "messages": messages, + "temperature": temperature, + "max_tokens": max_tokens, + } + + if response_format: + kwargs["response_format"] = response_format + + response = use_client.chat.completions.create(**kwargs) + content = response.choices[0].message.content or "" + finish_reason = response.choices[0].finish_reason or "unknown" + + # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 + content = re.sub(r'[\s\S]*?', '', content).strip() + + return content, finish_reason def chat( self, @@ -51,22 +254,24 @@ class LLMClient: Returns: 模型响应文本 """ - kwargs = { - "model": self.model, - "messages": messages, - "temperature": temperature, - "max_tokens": max_tokens, - } - - if response_format: - kwargs["response_format"] = response_format - - response = self.client.chat.completions.create(**kwargs) - content = response.choices[0].message.content - # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 - content = re.sub(r'[\s\S]*?', '', content).strip() + content, _ = self._chat_raw( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format + ) return content + def _create_boost_client(self) -> Tuple[OpenAI, str]: + """创建 Boost LLM 客户端(按需创建,不缓存)""" + return ( + OpenAI( + api_key=Config.LLM_BOOST_API_KEY, + base_url=Config.LLM_BOOST_BASE_URL + ), + Config.LLM_BOOST_MODEL_NAME + ) + def chat_json( self, messages: List[Dict[str, str]], @@ -74,7 +279,9 @@ class LLMClient: max_tokens: int = 4096 ) -> Dict[str, Any]: """ - 发送聊天请求并返回JSON + 发送聊天请求并返回JSON,支持三层容错: + 1. 截断检测 + JSON修复 + 2. 级联回退到 Boost LLM Args: messages: 消息列表 @@ -84,20 +291,103 @@ class LLMClient: Returns: 解析后的JSON对象 """ - response = self.chat( - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - response_format={"type": "json_object"} - ) - # 清理markdown代码块标记 - cleaned_response = response.strip() - cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) - cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) - cleaned_response = cleaned_response.strip() - + # === 第一层:尝试主 LLM === try: - return json.loads(cleaned_response) - except json.JSONDecodeError: - raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") - + content, finish_reason = self._chat_raw( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format={"type": "json_object"} + ) + + # 清理 markdown 代码块标记 + cleaned = self._clean_json_response(content) + + # 正常完成 → 尝试解析 + if finish_reason == "stop": + try: + return json.loads(cleaned) + except json.JSONDecodeError: + logger.warning("Primary LLM returned invalid JSON despite finish_reason=stop, attempting repair") + repaired = repair_truncated_json(content) + if repaired is not None: + return repaired + # 回退到 Boost + + # 截断 → 尝试修复 + elif finish_reason == "length": + logger.warning(f"Primary LLM response truncated (finish_reason=length, {len(content)} chars)") + repaired = repair_truncated_json(content) + if repaired is not None: + logger.info("Truncated JSON repaired successfully from primary LLM") + return repaired + logger.warning("JSON repair failed, falling back to Boost LLM") + + else: + logger.warning(f"Unexpected finish_reason='{finish_reason}', attempting parse") + try: + return json.loads(cleaned) + except json.JSONDecodeError: + pass + + except Exception as e: + logger.warning(f"Primary LLM failed: {type(e).__name__}: {e}") + + # === 第二层:回退到 Boost LLM === + if not self._has_boost: + raise ValueError( + f"Primary LLM failed and no Boost LLM configured. " + f"Set LLM_BOOST_API_KEY, LLM_BOOST_BASE_URL, LLM_BOOST_MODEL_NAME in .env" + ) + + logger.info(f"Falling back to Boost LLM: {Config.LLM_BOOST_BASE_URL} / {Config.LLM_BOOST_MODEL_NAME}") + + try: + boost_client, boost_model = self._create_boost_client() + content, finish_reason = self._chat_raw( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format={"type": "json_object"}, + client=boost_client, + model=boost_model + ) + + cleaned = self._clean_json_response(content) + + if finish_reason == "stop": + try: + return json.loads(cleaned) + except json.JSONDecodeError: + repaired = repair_truncated_json(content) + if repaired is not None: + logger.info("Boost LLM JSON repaired successfully") + return repaired + raise ValueError(f"Boost LLM returned invalid JSON: {cleaned[:200]}...") + + elif finish_reason == "length": + logger.warning(f"Boost LLM also truncated ({len(content)} chars), attempting repair") + repaired = repair_truncated_json(content) + if repaired is not None: + logger.info("Truncated JSON from Boost LLM repaired successfully") + return repaired + raise ValueError(f"Boost LLM response truncated and repair failed: {cleaned[:200]}...") + + else: + try: + return json.loads(cleaned) + except json.JSONDecodeError: + raise ValueError(f"Boost LLM returned unparseable response: {cleaned[:200]}...") + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Both primary and Boost LLM failed. Boost error: {type(e).__name__}: {e}") + + @staticmethod + def _clean_json_response(content: str) -> str: + """清理 LLM 响应中的 markdown 代码块标记""" + cleaned = content.strip() + cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\n?```\s*$', '', cleaned) + return cleaned.strip()