Correction GPT-5 family error 500

2026-04-08 11:29:37 -03:00 · 2026-04-08 11:29:37 -03:00 · 7e44b9cfff
parent fa0f6519b1
commit 7e44b9cfff
5 changed files with 196 additions and 22 deletions
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@ -21,6 +21,7 @@ from zep_cloud.client import Zep
 from ..config import Config
 from ..utils.logger import get_logger
 from ..utils.locale import get_language_instruction, get_locale, set_locale, t
+from ..utils.openai_chat_compat import create_chat_completion, extract_chat_completion_text
 from .zep_entity_reader import EntityNode, ZepEntityReader

 logger = get_logger('mirofish.oasis_profile')
@ -527,18 +528,19 @@ class OasisProfileGenerator:
        
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
+                response = create_chat_completion(
+                    self.client,
                    model=self.model_name,
                    messages=[
                        {"role": "system", "content": self._get_system_prompt(is_individual)},
                        {"role": "user", "content": prompt}
                    ],
                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                    temperature=0.7 - (attempt * 0.1),  # 每次重试降低温度
                    # 不设置max_tokens，让LLM自由发挥
                )
                
-                content = response.choices[0].message.content
+                content = extract_chat_completion_text(response)
                
                # 检查是否被截断（finish_reason不是'stop'）
                finish_reason = response.choices[0].finish_reason
--- a/backend/app/services/simulation_config_generator.py
+++ b/backend/app/services/simulation_config_generator.py
@ -21,6 +21,7 @@ from openai import OpenAI
 from ..config import Config
 from ..utils.logger import get_logger
 from ..utils.locale import get_language_instruction, t
+from ..utils.openai_chat_compat import create_chat_completion, extract_chat_completion_text
 from .zep_entity_reader import EntityNode, ZepEntityReader

 logger = get_logger('mirofish.simulation_config')
@ -440,18 +441,19 @@ class SimulationConfigGenerator:
        
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
+                response = create_chat_completion(
+                    self.client,
                    model=self.model_name,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": prompt}
                    ],
                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                    temperature=0.7 - (attempt * 0.1),  # 每次重试降低温度
                    # 不设置max_tokens，让LLM自由发挥
                )
                
-                content = response.choices[0].message.content
+                content = extract_chat_completion_text(response)
                finish_reason = response.choices[0].finish_reason
                
                # 检查是否被截断
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@ -9,6 +9,7 @@ from typing import Optional, Dict, Any, List
 from openai import OpenAI

 from ..config import Config
+from .openai_chat_compat import create_chat_completion, extract_chat_completion_text


 class LLMClient:
@ -51,18 +52,15 @@ class LLMClient:
        Returns:
            模型响应文本
        """
-        kwargs = {
-            "model": self.model,
-            "messages": messages,
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-        }
-        
-        if response_format:
-            kwargs["response_format"] = response_format
-        
-        response = self.client.chat.completions.create(**kwargs)
-        content = response.choices[0].message.content
+        response = create_chat_completion(
+            self.client,
+            model=self.model,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            response_format=response_format,
+        )
+        content = extract_chat_completion_text(response)
        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
        return content
--- a/backend/app/utils/openai_chat_compat.py
+++ b/backend/app/utils/openai_chat_compat.py
@ -0,0 +1,176 @@
+"""
+OpenAI Chat Completions compatibility helpers.
+
+This module keeps existing behavior for legacy models/providers while
+gracefully adapting request parameters for GPT-5 family models.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional
+
+
+UNSUPPORTED_PARAM_HINTS = (
+    "unsupported",
+    "not supported",
+    "does not support",
+    "unknown parameter",
+    "unexpected keyword",
+    "extra fields",
+    "only supported",
+)
+
+
+def is_gpt5_family(model: Optional[str]) -> bool:
+    """Return True when model belongs to GPT-5 family aliases/snapshots."""
+    if not model:
+        return False
+    return model.strip().lower().startswith("gpt-5")
+
+
+def _is_unsupported_param_error(message: str, param_name: str) -> bool:
+    msg = (message or "").lower()
+    if param_name.lower() not in msg:
+        return False
+    return any(hint in msg for hint in UNSUPPORTED_PARAM_HINTS)
+
+
+def _extract_error_message(error: Exception) -> str:
+    # openai.BadRequestError string usually includes the API message; keep it generic
+    return str(error)
+
+
+def create_chat_completion(
+    client: Any,
+    *,
+    model: str,
+    messages: List[Dict[str, Any]],
+    temperature: Optional[float] = None,
+    max_tokens: Optional[int] = None,
+    response_format: Optional[Dict[str, Any]] = None,
+    extra_params: Optional[Dict[str, Any]] = None,
+    max_attempts: int = 4,
+) -> Any:
+    """
+    Create a chat completion with adaptive parameter fallback.
+
+    Compatibility strategy:
+    - For GPT-5 family, avoid sending temperature by default.
+    - For token limit, try `max_completion_tokens` on GPT-5, `max_tokens` otherwise.
+    - On parameter-support errors, adapt and retry without changing caller behavior.
+    """
+    kwargs: Dict[str, Any] = {
+        "model": model,
+        "messages": messages,
+    }
+
+    if response_format is not None:
+        kwargs["response_format"] = response_format
+
+    # GPT-5 family rejects temperature unless reasoning effort is explicitly `none`.
+    if temperature is not None and not is_gpt5_family(model):
+        kwargs["temperature"] = temperature
+
+    if max_tokens is not None:
+        if is_gpt5_family(model):
+            kwargs["max_completion_tokens"] = max_tokens
+        else:
+            kwargs["max_tokens"] = max_tokens
+
+    if extra_params:
+        kwargs.update(extra_params)
+
+    attempted_signatures = set()
+    unsupported_params = set()
+    last_error: Optional[Exception] = None
+
+    for _ in range(max_attempts):
+        signature = tuple(sorted(kwargs.keys()))
+        if signature in attempted_signatures:
+            break
+        attempted_signatures.add(signature)
+
+        try:
+            return client.chat.completions.create(**kwargs)
+        except Exception as error:
+            last_error = error
+            error_msg = _extract_error_message(error)
+            changed = False
+
+            if _is_unsupported_param_error(error_msg, "temperature") and "temperature" in kwargs:
+                kwargs.pop("temperature", None)
+                unsupported_params.add("temperature")
+                changed = True
+
+            if _is_unsupported_param_error(error_msg, "response_format") and "response_format" in kwargs:
+                kwargs.pop("response_format", None)
+                unsupported_params.add("response_format")
+                changed = True
+
+            if _is_unsupported_param_error(error_msg, "max_tokens") and "max_tokens" in kwargs:
+                token_value = kwargs.pop("max_tokens")
+                unsupported_params.add("max_tokens")
+                if "max_completion_tokens" not in unsupported_params:
+                    kwargs["max_completion_tokens"] = token_value
+                changed = True
+
+            if (
+                _is_unsupported_param_error(error_msg, "max_completion_tokens")
+                and "max_completion_tokens" in kwargs
+            ):
+                token_value = kwargs.pop("max_completion_tokens")
+                unsupported_params.add("max_completion_tokens")
+                if "max_tokens" not in unsupported_params:
+                    kwargs["max_tokens"] = token_value
+                changed = True
+
+            if not changed:
+                raise
+
+    if last_error:
+        raise last_error
+    raise RuntimeError("Chat completion failed with unknown error.")
+
+
+def extract_chat_completion_text(response: Any) -> str:
+    """Extract plain text from chat completion response across SDK content shapes."""
+    choices = getattr(response, "choices", None) or []
+    if not choices:
+        return ""
+
+    message = getattr(choices[0], "message", None)
+    if message is None:
+        return ""
+
+    content = getattr(message, "content", "")
+
+    if isinstance(content, str):
+        return content
+
+    if isinstance(content, list):
+        chunks: List[str] = []
+        for item in content:
+            if isinstance(item, dict):
+                text_obj = item.get("text")
+                if isinstance(text_obj, dict):
+                    text_obj = text_obj.get("value")
+                if isinstance(text_obj, str):
+                    chunks.append(text_obj)
+                elif isinstance(item.get("content"), str):
+                    chunks.append(item["content"])
+                continue
+
+            text_obj = getattr(item, "text", None)
+            if isinstance(text_obj, dict):
+                text_obj = text_obj.get("value")
+            if isinstance(text_obj, str):
+                chunks.append(text_obj)
+                continue
+
+            content_obj = getattr(item, "content", None)
+            if isinstance(content_obj, str):
+                chunks.append(content_obj)
+
+        return "".join(chunks).strip()
+
+    return str(content or "")
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -1435,7 +1435,6 @@
      "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
      "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
      "license": "ISC",
-      "peer": true,
      "engines": {
        "node": ">=12"
      }
@ -1913,7 +1912,6 @@
      "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "engines": {
        "node": ">=12"
      },
@ -2053,7 +2051,6 @@
      "integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==",
      "dev": true,
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.5.0",
@ -2128,7 +2125,6 @@
      "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz",
      "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==",
      "license": "MIT",
-      "peer": true,
      "dependencies": {
        "@vue/compiler-dom": "3.5.25",
        "@vue/compiler-sfc": "3.5.25",