MicroFish/backend/app/utils/llm_client.py

224 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
LLM客户端封装
统一使用OpenAI格式调用
"""
import json
import re
import time
from typing import Optional, Dict, Any, List
from openai import OpenAI
from ..config import Config
from .logger import get_logger
from .llm_gate import main_llm_slot
logger = get_logger('mirofish.llm')
class LLMClient:
"""LLM客户端"""
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
model: Optional[str] = None
):
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
def chat(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: int = 4096,
response_format: Optional[Dict] = None
) -> str:
"""
发送聊天请求
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式如JSON模式
Returns:
模型响应文本
"""
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if response_format:
kwargs["response_format"] = response_format
with main_llm_slot():
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
content = re.sub(r'^Thinking Process:[\s\S]*?(?=\{|\[)', '', content).strip()
return content
def chat_json(
self,
messages: List[Dict[str, str]],
temperature: float = 0.3,
max_tokens: int = 4096,
max_retries: Optional[int] = None
) -> Dict[str, Any]:
"""
发送聊天请求并返回JSON
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
Returns:
解析后的JSON对象
"""
retries = Config.LLM_JSON_MAX_RETRIES if max_retries is None else max_retries
last_error: Exception | None = None
last_response = ""
for attempt in range(retries + 1):
attempt_messages = list(messages)
if attempt > 0:
attempt_messages.append({
"role": "user",
"content": (
"The previous answer was not valid JSON. Return exactly one valid JSON object. "
"Do not include markdown fences, explanation, comments, or thinking text."
)
})
try:
response = self.chat(
messages=attempt_messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
last_response = response
parsed = self._parse_json_lenient(response)
if parsed is not None:
if attempt > 0:
logger.info("LLM JSON recovered after retry %s/%s", attempt, retries)
return parsed
last_error = ValueError("LLM returned unparsable JSON")
logger.warning(
"LLM returned invalid JSON on attempt %s/%s: %s",
attempt + 1,
retries + 1,
self._cleanup_response(response)[:1000],
)
except Exception as exc:
last_error = exc
logger.warning(
"LLM JSON call failed on attempt %s/%s: %s",
attempt + 1,
retries + 1,
str(exc)[:1000],
)
if attempt < retries:
time.sleep(min(2.0, 0.5 * (attempt + 1)))
cleaned = self._cleanup_response(last_response)
if cleaned:
logger.error("LLM returned invalid JSON after retries: %s", cleaned[:2000])
raise ValueError(f"LLM返回的JSON格式无效: {cleaned[:1000]}")
raise last_error or ValueError("LLM JSON call failed")
def _parse_json_lenient(self, text: str) -> Dict[str, Any] | None:
cleaned = self._cleanup_response(text)
candidates = [cleaned]
for block in re.findall(r"```(?:json)?\s*([\s\S]*?)```", text or "", flags=re.IGNORECASE):
candidates.append(self._cleanup_response(block))
candidates.extend(self._balanced_json_objects(cleaned))
start = cleaned.find('{')
end = cleaned.rfind('}')
if start >= 0 and end > start:
candidates.append(cleaned[start:end + 1])
for candidate in candidates:
candidate = candidate.strip()
if not candidate:
continue
repaired = self._repair_json(candidate)
try:
parsed = json.loads(repaired)
return parsed if isinstance(parsed, dict) else None
except json.JSONDecodeError:
pass
return None
def _cleanup_response(self, text: str) -> str:
cleaned = text or ""
cleaned = cleaned.replace("\ufeff", "").replace("\u200b", "")
cleaned = re.sub(r"<think>[\s\S]*?</think>", "", cleaned).strip()
cleaned = re.sub(r"^Thinking Process:[\s\S]*?(?=\{|\[)", "", cleaned).strip()
cleaned = re.sub(r"^```(?:json)?\s*\n?", "", cleaned, flags=re.IGNORECASE)
cleaned = re.sub(r"\n?```\s*$", "", cleaned)
return cleaned.strip()
def _repair_json(self, text: str) -> str:
repaired = text.strip()
repaired = re.sub(r",(\s*[}\]])", r"\1", repaired)
repaired = re.sub(r"\bNone\b", "null", repaired)
repaired = re.sub(r"\bTrue\b", "true", repaired)
repaired = re.sub(r"\bFalse\b", "false", repaired)
return repaired
def _balanced_json_objects(self, text: str) -> List[str]:
objects: List[str] = []
start = None
depth = 0
in_string = False
escape = False
for index, char in enumerate(text or ""):
if in_string:
if escape:
escape = False
elif char == "\\":
escape = True
elif char == '"':
in_string = False
continue
if char == '"':
in_string = True
elif char == "{":
if depth == 0:
start = index
depth += 1
elif char == "}" and depth:
depth -= 1
if depth == 0 and start is not None:
objects.append(text[start:index + 1])
start = None
# Prefer larger objects first; noisy LLM text may contain small examples.
return sorted(objects, key=len, reverse=True)