diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 7704a627..5847bcd1 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -21,6 +21,7 @@ from zep_cloud.client import Zep from ..config import Config from ..utils.logger import get_logger from ..utils.locale import get_language_instruction, get_locale, set_locale, t +from ..utils.openai_chat_compat import create_chat_completion, extract_chat_completion_text from .zep_entity_reader import EntityNode, ZepEntityReader logger = get_logger('mirofish.oasis_profile') @@ -527,18 +528,19 @@ class OasisProfileGenerator: for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( + response = create_chat_completion( + self.client, model=self.model_name, messages=[ {"role": "system", "content": self._get_system_prompt(is_individual)}, {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + temperature=0.7 - (attempt * 0.1), # 每次重试降低温度 # 不设置max_tokens,让LLM自由发挥 ) - content = response.choices[0].message.content + content = extract_chat_completion_text(response) # 检查是否被截断(finish_reason不是'stop') finish_reason = response.choices[0].finish_reason diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cb77f6b6..2c6fbbf1 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -21,6 +21,7 @@ from openai import OpenAI from ..config import Config from ..utils.logger import get_logger from ..utils.locale import get_language_instruction, t +from ..utils.openai_chat_compat import create_chat_completion, extract_chat_completion_text from .zep_entity_reader import EntityNode, ZepEntityReader logger = get_logger('mirofish.simulation_config') @@ -440,18 +441,19 @@ class SimulationConfigGenerator: for attempt in range(max_attempts): try: - response = self.client.chat.completions.create( + response = create_chat_completion( + self.client, model=self.model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 + temperature=0.7 - (attempt * 0.1), # 每次重试降低温度 # 不设置max_tokens,让LLM自由发挥 ) - content = response.choices[0].message.content + content = extract_chat_completion_text(response) finish_reason = response.choices[0].finish_reason # 检查是否被截断 diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f4..fc316fce 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -9,6 +9,7 @@ from typing import Optional, Dict, Any, List from openai import OpenAI from ..config import Config +from .openai_chat_compat import create_chat_completion, extract_chat_completion_text class LLMClient: @@ -51,18 +52,15 @@ class LLMClient: Returns: 模型响应文本 """ - kwargs = { - "model": self.model, - "messages": messages, - "temperature": temperature, - "max_tokens": max_tokens, - } - - if response_format: - kwargs["response_format"] = response_format - - response = self.client.chat.completions.create(**kwargs) - content = response.choices[0].message.content + response = create_chat_completion( + self.client, + model=self.model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + response_format=response_format, + ) + content = extract_chat_completion_text(response) # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 content = re.sub(r'[\s\S]*?', '', content).strip() return content diff --git a/backend/app/utils/openai_chat_compat.py b/backend/app/utils/openai_chat_compat.py new file mode 100644 index 00000000..44ee941a --- /dev/null +++ b/backend/app/utils/openai_chat_compat.py @@ -0,0 +1,176 @@ +""" +OpenAI Chat Completions compatibility helpers. + +This module keeps existing behavior for legacy models/providers while +gracefully adapting request parameters for GPT-5 family models. +""" + +from __future__ import annotations + +from typing import Any, Dict, List, Optional + + +UNSUPPORTED_PARAM_HINTS = ( + "unsupported", + "not supported", + "does not support", + "unknown parameter", + "unexpected keyword", + "extra fields", + "only supported", +) + + +def is_gpt5_family(model: Optional[str]) -> bool: + """Return True when model belongs to GPT-5 family aliases/snapshots.""" + if not model: + return False + return model.strip().lower().startswith("gpt-5") + + +def _is_unsupported_param_error(message: str, param_name: str) -> bool: + msg = (message or "").lower() + if param_name.lower() not in msg: + return False + return any(hint in msg for hint in UNSUPPORTED_PARAM_HINTS) + + +def _extract_error_message(error: Exception) -> str: + # openai.BadRequestError string usually includes the API message; keep it generic + return str(error) + + +def create_chat_completion( + client: Any, + *, + model: str, + messages: List[Dict[str, Any]], + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + response_format: Optional[Dict[str, Any]] = None, + extra_params: Optional[Dict[str, Any]] = None, + max_attempts: int = 4, +) -> Any: + """ + Create a chat completion with adaptive parameter fallback. + + Compatibility strategy: + - For GPT-5 family, avoid sending temperature by default. + - For token limit, try `max_completion_tokens` on GPT-5, `max_tokens` otherwise. + - On parameter-support errors, adapt and retry without changing caller behavior. + """ + kwargs: Dict[str, Any] = { + "model": model, + "messages": messages, + } + + if response_format is not None: + kwargs["response_format"] = response_format + + # GPT-5 family rejects temperature unless reasoning effort is explicitly `none`. + if temperature is not None and not is_gpt5_family(model): + kwargs["temperature"] = temperature + + if max_tokens is not None: + if is_gpt5_family(model): + kwargs["max_completion_tokens"] = max_tokens + else: + kwargs["max_tokens"] = max_tokens + + if extra_params: + kwargs.update(extra_params) + + attempted_signatures = set() + unsupported_params = set() + last_error: Optional[Exception] = None + + for _ in range(max_attempts): + signature = tuple(sorted(kwargs.keys())) + if signature in attempted_signatures: + break + attempted_signatures.add(signature) + + try: + return client.chat.completions.create(**kwargs) + except Exception as error: + last_error = error + error_msg = _extract_error_message(error) + changed = False + + if _is_unsupported_param_error(error_msg, "temperature") and "temperature" in kwargs: + kwargs.pop("temperature", None) + unsupported_params.add("temperature") + changed = True + + if _is_unsupported_param_error(error_msg, "response_format") and "response_format" in kwargs: + kwargs.pop("response_format", None) + unsupported_params.add("response_format") + changed = True + + if _is_unsupported_param_error(error_msg, "max_tokens") and "max_tokens" in kwargs: + token_value = kwargs.pop("max_tokens") + unsupported_params.add("max_tokens") + if "max_completion_tokens" not in unsupported_params: + kwargs["max_completion_tokens"] = token_value + changed = True + + if ( + _is_unsupported_param_error(error_msg, "max_completion_tokens") + and "max_completion_tokens" in kwargs + ): + token_value = kwargs.pop("max_completion_tokens") + unsupported_params.add("max_completion_tokens") + if "max_tokens" not in unsupported_params: + kwargs["max_tokens"] = token_value + changed = True + + if not changed: + raise + + if last_error: + raise last_error + raise RuntimeError("Chat completion failed with unknown error.") + + +def extract_chat_completion_text(response: Any) -> str: + """Extract plain text from chat completion response across SDK content shapes.""" + choices = getattr(response, "choices", None) or [] + if not choices: + return "" + + message = getattr(choices[0], "message", None) + if message is None: + return "" + + content = getattr(message, "content", "") + + if isinstance(content, str): + return content + + if isinstance(content, list): + chunks: List[str] = [] + for item in content: + if isinstance(item, dict): + text_obj = item.get("text") + if isinstance(text_obj, dict): + text_obj = text_obj.get("value") + if isinstance(text_obj, str): + chunks.append(text_obj) + elif isinstance(item.get("content"), str): + chunks.append(item["content"]) + continue + + text_obj = getattr(item, "text", None) + if isinstance(text_obj, dict): + text_obj = text_obj.get("value") + if isinstance(text_obj, str): + chunks.append(text_obj) + continue + + content_obj = getattr(item, "content", None) + if isinstance(content_obj, str): + chunks.append(content_obj) + + return "".join(chunks).strip() + + return str(content or "") diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 3e56d752..fdab7ac4 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1435,7 +1435,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -1913,7 +1912,6 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -2053,7 +2051,6 @@ "integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -2128,7 +2125,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz", "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.25", "@vue/compiler-sfc": "3.5.25",