feat: add MiniMax provider support

- Add MiniMax model detection and compatibility handling in LLMClient - Handle response_format incompatibility: MiniMax does not support response_format parameter, use prompt engineering for JSON output - Add temperature clamping for MiniMax (must be > 0) - Add robust JSON parsing from LLM responses (parse_json_from_response) - Update simulation_config_generator and oasis_profile_generator for MiniMax compatibility - Add MiniMax configuration examples in .env.example - Add MiniMax documentation in README.md and README-EN.md - Add unit tests for MiniMax compatibility functions Supported models: MiniMax-M2.5, MiniMax-M2.5-highspeed API docs: https://platform.minimax.io/docs/api-reference/text-openai-api
2026-03-12 14:03:07 +08:00 · 2026-03-12 14:03:07 +08:00 · 1b9c5609c9
parent 985f89f49a
commit 1b9c5609c9
7 changed files with 366 additions and 59 deletions
--- a/.env.example
+++ b/.env.example
@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus

+# ===== 使用 MiniMax 模型（可选）=====
+# MiniMax M2.5: 高性能、高性价比，支持 204,800 tokens 上下文
+# 获取 API Key: https://platform.minimax.io/
+# LLM_API_KEY=your_minimax_api_key_here
+# LLM_BASE_URL=https://api.minimax.io/v1
+# LLM_MODEL_NAME=MiniMax-M2.5
+# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1
+
 # ===== ZEP记忆图谱配置 =====
 # 每月免费额度即可支撑简单使用：https://app.getzep.com/
 ZEP_API_KEY=your_zep_api_key_here
--- a/README-EN.md
+++ b/README-EN.md
@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
 ZEP_API_KEY=your_zep_api_key
 ```

+<details>
+<summary><b>Using MiniMax Models</b></summary>
+
+[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API:
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| Model | Description |
+|-------|-------------|
+| `MiniMax-M2.5` | Flagship model, 204K context window |
+| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile |
+
+For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+</details>
+
 #### 2. Install Dependencies

 ```bash
--- a/README.md
+++ b/README.md
@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
 ZEP_API_KEY=your_zep_api_key
 ```

+<details>
+<summary><b>使用 MiniMax 模型</b></summary>
+
+[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型，支持 OpenAI 兼容 API：
+
+```env
+LLM_API_KEY=your_minimax_api_key
+LLM_BASE_URL=https://api.minimax.io/v1
+LLM_MODEL_NAME=MiniMax-M2.5
+```
+
+| 模型 | 说明 |
+|------|------|
+| `MiniMax-M2.5` | 旗舰模型，204K 上下文窗口 |
+| `MiniMax-M2.5-highspeed` | 同等性能，更快更敏捷 |
+
+国内用户可使用：`LLM_BASE_URL=https://api.minimaxi.com/v1`
+
+API 文档：[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api)
+
+</details>
+
 #### 2. 安装依赖

 ```bash
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@ -20,6 +20,7 @@ from zep_cloud.client import Zep

 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
 from .zep_entity_reader import EntityNode, ZepEntityReader

 logger = get_logger('mirofish.oasis_profile')
@ -523,43 +524,53 @@ class OasisProfileGenerator:
        # 尝试多次生成，直到成功或达到最大重试次数
        max_attempts = 3
        last_error = None
-        
+        use_minimax = _is_minimax(self.model_name, self.base_url)
+
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": self._get_system_prompt(is_individual)},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                import re as _re
+                temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+                messages = [
+                    {"role": "system", "content": self._get_system_prompt(is_individual)},
+                    {"role": "user", "content": prompt}
+                ]
+
+                kwargs = {
+                    "model": self.model_name,
+                    "messages": _inject_json_instruction(messages) if use_minimax else messages,
+                    "temperature": temperature,
                    # 不设置max_tokens，让LLM自由发挥
-                )
-                
+                }
+                if not use_minimax:
+                    kwargs["response_format"] = {"type": "json_object"}
+
+                response = self.client.chat.completions.create(**kwargs)
+
                content = response.choices[0].message.content
-                
+                # 移除 <think> 标签
+                content = _re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
+
                # 检查是否被截断（finish_reason不是'stop'）
                finish_reason = response.choices[0].finish_reason
                if finish_reason == 'length':
                    logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
                    content = self._fix_truncated_json(content)
-                
+
                # 尝试解析JSON
                try:
-                    result = json.loads(content)
-                    
+                    result = parse_json_from_response(content)
+
                    # 验证必需字段
                    if "bio" not in result or not result["bio"]:
                        result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
                    if "persona" not in result or not result["persona"]:
                        result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。"
-                    
+
                    return result
-                    
-                except json.JSONDecodeError as je:
+
+                except (json.JSONDecodeError, ValueError) as je:
                    logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")
-                    
+
                    # 尝试修复JSON
                    result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
                    if result.get("_fixed"):
--- a/backend/app/services/simulation_config_generator.py
+++ b/backend/app/services/simulation_config_generator.py
@ -20,6 +20,7 @@ from openai import OpenAI

 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
 from .zep_entity_reader import EntityNode, ZepEntityReader

 logger = get_logger('mirofish.simulation_config')
@ -433,42 +434,51 @@ class SimulationConfigGenerator:
    def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
        """带重试的LLM调用，包含JSON修复逻辑"""
        import re
-        
+
        max_attempts = 3
        last_error = None
-        
+        use_minimax = _is_minimax(self.model_name, self.base_url)
+
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
+                messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt}
+                ]
+
+                kwargs = {
+                    "model": self.model_name,
+                    "messages": _inject_json_instruction(messages) if use_minimax else messages,
+                    "temperature": temperature,
                    # 不设置max_tokens，让LLM自由发挥
-                )
-                
+                }
+                if not use_minimax:
+                    kwargs["response_format"] = {"type": "json_object"}
+
+                response = self.client.chat.completions.create(**kwargs)
+
                content = response.choices[0].message.content
+                # 移除 <think> 标签
+                content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
                finish_reason = response.choices[0].finish_reason
-                
+
                # 检查是否被截断
                if finish_reason == 'length':
                    logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
                    content = self._fix_truncated_json(content)
-                
+
                # 尝试解析JSON
                try:
-                    return json.loads(content)
-                except json.JSONDecodeError as e:
+                    return parse_json_from_response(content)
+                except (json.JSONDecodeError, ValueError) as e:
                    logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
-                    
+
                    # 尝试修复JSON
                    fixed = self._try_fix_config_json(content)
                    if fixed:
                        return fixed
-                    
+
                    last_error = e
                    
            except Exception as e:
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@ -1,6 +1,6 @@
 """
 LLM客户端封装
-统一使用OpenAI格式调用
+统一使用OpenAI格式调用，兼容 MiniMax 等 OpenAI 兼容 API
 """

 import json
@ -11,9 +11,52 @@ from openai import OpenAI
 from ..config import Config


+def _is_minimax(model: str, base_url: str) -> bool:
+    """检测当前是否使用 MiniMax 模型"""
+    model_lower = (model or "").lower()
+    url_lower = (base_url or "").lower()
+    return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature: float, model: str, base_url: str) -> float:
+    """MiniMax 要求 temperature 在 (0.0, 1.0] 之间，不能为 0"""
+    if _is_minimax(model, base_url) and temperature <= 0:
+        return 0.01
+    return temperature
+
+
+def parse_json_from_response(content: str) -> Any:
+    """从 LLM 响应中解析 JSON，支持多种格式"""
+    trimmed = content.strip()
+
+    # 1. 直接解析
+    try:
+        return json.loads(trimmed)
+    except json.JSONDecodeError:
+        pass
+
+    # 2. 提取 markdown code block
+    code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+    if code_block_match:
+        try:
+            return json.loads(code_block_match.group(1).strip())
+        except json.JSONDecodeError:
+            pass
+
+    # 3. 提取 { } 或 [ ]
+    json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+
+    raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
 class LLMClient:
    """LLM客户端"""
-    
+
    def __init__(
        self,
        api_key: Optional[str] = None,
@ -23,15 +66,20 @@ class LLMClient:
        self.api_key = api_key or Config.LLM_API_KEY
        self.base_url = base_url or Config.LLM_BASE_URL
        self.model = model or Config.LLM_MODEL_NAME
-        
+
        if not self.api_key:
            raise ValueError("LLM_API_KEY 未配置")
-        
+
        self.client = OpenAI(
            api_key=self.api_key,
            base_url=self.base_url
        )
-    
+
+    @property
+    def is_minimax(self) -> bool:
+        """检测当前是否使用 MiniMax 模型"""
+        return _is_minimax(self.model, self.base_url)
+
    def chat(
        self,
        messages: List[Dict[str, str]],
@ -41,32 +89,37 @@ class LLMClient:
    ) -> str:
        """
        发送聊天请求
-        
+
        Args:
            messages: 消息列表
            temperature: 温度参数
            max_tokens: 最大token数
            response_format: 响应格式（如JSON模式）
-            
+
        Returns:
            模型响应文本
        """
+        temperature = _clamp_temperature(temperature, self.model, self.base_url)
+
        kwargs = {
            "model": self.model,
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
        }
-        
-        if response_format:
+
+        # MiniMax 不支持 response_format，改用 prompt 引导 JSON 输出
+        if response_format and self.is_minimax:
+            messages = _inject_json_instruction(messages)
+        elif response_format:
            kwargs["response_format"] = response_format
-        
+
        response = self.client.chat.completions.create(**kwargs)
        content = response.choices[0].message.content
        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
        return content
-    
+
    def chat_json(
        self,
        messages: List[Dict[str, str]],
@ -75,12 +128,12 @@ class LLMClient:
    ) -> Dict[str, Any]:
        """
        发送聊天请求并返回JSON
-        
+
        Args:
            messages: 消息列表
            temperature: 温度参数
            max_tokens: 最大token数
-            
+
        Returns:
            解析后的JSON对象
        """
@ -90,14 +143,19 @@ class LLMClient:
            max_tokens=max_tokens,
            response_format={"type": "json_object"}
        )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
+        return parse_json_from_response(response)

-        try:
-            return json.loads(cleaned_response)
-        except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
+
+def _inject_json_instruction(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
+    """在消息列表中注入 JSON 输出指令（用于不支持 response_format 的模型）"""
+    json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+    messages = [msg.copy() for msg in messages]
+    # 优先追加到 system 消息
+    for msg in messages:
+        if msg.get("role") == "system":
+            msg["content"] = msg["content"] + json_hint
+            return messages
+    # 如果没有 system 消息，在开头插入一条
+    messages.insert(0, {"role": "system", "content": json_hint.strip()})
+    return messages

--- a/backend/tests/test_minimax_compat.py
+++ b/backend/tests/test_minimax_compat.py
@ -0,0 +1,176 @@
+"""
+MiniMax 兼容性测试
+验证 LLMClient 对 MiniMax 模型的兼容处理
+"""
+
+import json
+import re
+import pytest
+import sys
+import os
+
+# 直接导入 llm_client 模块中的独立函数，绕过 Flask 依赖
+# 通过模拟 Config 来避免导入整个 app 模块
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+
+# 直接从源文件提取独立函数进行测试
+def _is_minimax(model, base_url):
+    model_lower = (model or "").lower()
+    url_lower = (base_url or "").lower()
+    return "minimax" in model_lower or "minimax" in url_lower
+
+
+def _clamp_temperature(temperature, model, base_url):
+    if _is_minimax(model, base_url) and temperature <= 0:
+        return 0.01
+    return temperature
+
+
+def parse_json_from_response(content):
+    trimmed = content.strip()
+    try:
+        return json.loads(trimmed)
+    except json.JSONDecodeError:
+        pass
+    code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
+    if code_block_match:
+        try:
+            return json.loads(code_block_match.group(1).strip())
+        except json.JSONDecodeError:
+            pass
+    json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
+
+
+def _inject_json_instruction(messages):
+    json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
+    messages = [msg.copy() for msg in messages]
+    for msg in messages:
+        if msg.get("role") == "system":
+            msg["content"] = msg["content"] + json_hint
+            return messages
+    messages.insert(0, {"role": "system", "content": json_hint.strip()})
+    return messages
+
+
+class TestIsMinimax:
+    def test_minimax_model_name(self):
+        assert _is_minimax("MiniMax-M2.5", "https://api.openai.com/v1") is True
+
+    def test_minimax_model_name_lowercase(self):
+        assert _is_minimax("minimax-m2.5", "https://api.openai.com/v1") is True
+
+    def test_minimax_base_url(self):
+        assert _is_minimax("some-model", "https://api.minimax.io/v1") is True
+
+    def test_minimax_base_url_cn(self):
+        assert _is_minimax("some-model", "https://api.minimaxi.com/v1") is True
+
+    def test_not_minimax_openai(self):
+        assert _is_minimax("gpt-4o", "https://api.openai.com/v1") is False
+
+    def test_not_minimax_dashscope(self):
+        assert _is_minimax("qwen-plus", "https://dashscope.aliyuncs.com/compatible-mode/v1") is False
+
+    def test_none_values(self):
+        assert _is_minimax(None, None) is False
+
+    def test_minimax_highspeed(self):
+        assert _is_minimax("MiniMax-M2.5-highspeed", "https://api.minimax.io/v1") is True
+
+
+class TestClampTemperature:
+    def test_zero_temperature_minimax(self):
+        result = _clamp_temperature(0.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.01
+
+    def test_negative_temperature_minimax(self):
+        result = _clamp_temperature(-0.1, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.01
+
+    def test_valid_temperature_minimax(self):
+        result = _clamp_temperature(0.7, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 0.7
+
+    def test_zero_temperature_non_minimax(self):
+        result = _clamp_temperature(0.0, "gpt-4o", "https://api.openai.com/v1")
+        assert result == 0.0
+
+    def test_max_temperature_minimax(self):
+        result = _clamp_temperature(1.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
+        assert result == 1.0
+
+
+class TestInjectJsonInstruction:
+    def test_inject_to_existing_system_message(self):
+        messages = [
+            {"role": "system", "content": "You are a helper."},
+            {"role": "user", "content": "Generate JSON."}
+        ]
+        result = _inject_json_instruction(messages)
+        assert "valid JSON only" in result[0]["content"]
+        assert result[0]["content"].startswith("You are a helper.")
+        # Original should not be mutated
+        assert "valid JSON only" not in messages[0]["content"]
+
+    def test_inject_without_system_message(self):
+        messages = [
+            {"role": "user", "content": "Generate JSON."}
+        ]
+        result = _inject_json_instruction(messages)
+        assert len(result) == 2
+        assert result[0]["role"] == "system"
+        assert "valid JSON only" in result[0]["content"]
+
+    def test_does_not_mutate_original(self):
+        messages = [
+            {"role": "system", "content": "Hello"},
+            {"role": "user", "content": "Test"}
+        ]
+        original_content = messages[0]["content"]
+        _inject_json_instruction(messages)
+        assert messages[0]["content"] == original_content
+
+
+class TestParseJsonFromResponse:
+    def test_direct_json(self):
+        result = parse_json_from_response('{"key": "value"}')
+        assert result == {"key": "value"}
+
+    def test_json_with_markdown_block(self):
+        text = '```json\n{"key": "value"}\n```'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+    def test_json_with_surrounding_text(self):
+        text = 'Here is the result:\n{"key": "value"}\nDone.'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+    def test_json_array(self):
+        result = parse_json_from_response('[1, 2, 3]')
+        assert result == [1, 2, 3]
+
+    def test_invalid_json_raises(self):
+        with pytest.raises(ValueError, match="JSON格式无效"):
+            parse_json_from_response("not json at all")
+
+    def test_nested_json(self):
+        text = '{"agents": [{"name": "Alice"}, {"name": "Bob"}]}'
+        result = parse_json_from_response(text)
+        assert len(result["agents"]) == 2
+
+    def test_markdown_block_without_json_label(self):
+        text = '```\n{"key": "value"}\n```'
+        result = parse_json_from_response(text)
+        assert result == {"key": "value"}
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])