feat: add MiniMax provider support

- Add MiniMax model detection and compatibility handling in LLMClient
- Handle response_format incompatibility: MiniMax does not support
  response_format parameter, use prompt engineering for JSON output
- Add temperature clamping for MiniMax (must be > 0)
- Add robust JSON parsing from LLM responses (parse_json_from_response)
- Update simulation_config_generator and oasis_profile_generator for
  MiniMax compatibility
- Add MiniMax configuration examples in .env.example
- Add MiniMax documentation in README.md and README-EN.md
- Add unit tests for MiniMax compatibility functions

Supported models: MiniMax-M2.5, MiniMax-M2.5-highspeed
API docs: https://platform.minimax.io/docs/api-reference/text-openai-api
This commit is contained in:
PR Bot 2026-03-12 14:03:07 +08:00
parent 985f89f49a
commit 1b9c5609c9
7 changed files with 366 additions and 59 deletions

View File

@ -5,6 +5,14 @@ LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus
# ===== 使用 MiniMax 模型(可选)=====
# MiniMax M2.5: 高性能、高性价比,支持 204,800 tokens 上下文
# 获取 API Key: https://platform.minimax.io/
# LLM_API_KEY=your_minimax_api_key_here
# LLM_BASE_URL=https://api.minimax.io/v1
# LLM_MODEL_NAME=MiniMax-M2.5
# 国内用户可使用: LLM_BASE_URL=https://api.minimaxi.com/v1
# ===== ZEP记忆图谱配置 =====
# 每月免费额度即可支撑简单使用https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here

View File

@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```
<details>
<summary><b>Using MiniMax Models</b></summary>
[MiniMax](https://platform.minimax.io/) provides high-performance, cost-effective LLM models with OpenAI-compatible API:
```env
LLM_API_KEY=your_minimax_api_key
LLM_BASE_URL=https://api.minimax.io/v1
LLM_MODEL_NAME=MiniMax-M2.5
```
| Model | Description |
|-------|-------------|
| `MiniMax-M2.5` | Flagship model, 204K context window |
| `MiniMax-M2.5-highspeed` | Same performance, faster and more agile |
For users in China: `LLM_BASE_URL=https://api.minimaxi.com/v1`
API Documentation: [OpenAI Compatible API](https://platform.minimax.io/docs/api-reference/text-openai-api)
</details>
#### 2. Install Dependencies
```bash

View File

@ -127,6 +127,28 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```
<details>
<summary><b>使用 MiniMax 模型</b></summary>
[MiniMax](https://platform.minimax.io/) 提供高性能、高性价比的 LLM 模型,支持 OpenAI 兼容 API
```env
LLM_API_KEY=your_minimax_api_key
LLM_BASE_URL=https://api.minimax.io/v1
LLM_MODEL_NAME=MiniMax-M2.5
```
| 模型 | 说明 |
|------|------|
| `MiniMax-M2.5` | 旗舰模型204K 上下文窗口 |
| `MiniMax-M2.5-highspeed` | 同等性能,更快更敏捷 |
国内用户可使用:`LLM_BASE_URL=https://api.minimaxi.com/v1`
API 文档:[OpenAI 兼容接口](https://platform.minimax.io/docs/api-reference/text-openai-api)
</details>
#### 2. 安装依赖
```bash

View File

@ -20,6 +20,7 @@ from zep_cloud.client import Zep
from ..config import Config
from ..utils.logger import get_logger
from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.oasis_profile')
@ -523,43 +524,53 @@ class OasisProfileGenerator:
# 尝试多次生成,直到成功或达到最大重试次数
max_attempts = 3
last_error = None
use_minimax = _is_minimax(self.model_name, self.base_url)
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": self._get_system_prompt(is_individual)},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
import re as _re
temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
messages = [
{"role": "system", "content": self._get_system_prompt(is_individual)},
{"role": "user", "content": prompt}
]
kwargs = {
"model": self.model_name,
"messages": _inject_json_instruction(messages) if use_minimax else messages,
"temperature": temperature,
# 不设置max_tokens让LLM自由发挥
)
}
if not use_minimax:
kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 移除 <think> 标签
content = _re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
# 检查是否被截断finish_reason不是'stop'
finish_reason = response.choices[0].finish_reason
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...")
content = self._fix_truncated_json(content)
# 尝试解析JSON
try:
result = json.loads(content)
result = parse_json_from_response(content)
# 验证必需字段
if "bio" not in result or not result["bio"]:
result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}"
if "persona" not in result or not result["persona"]:
result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}"
return result
except json.JSONDecodeError as je:
except (json.JSONDecodeError, ValueError) as je:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}")
# 尝试修复JSON
result = self._try_fix_json(content, entity_name, entity_type, entity_summary)
if result.get("_fixed"):

View File

@ -20,6 +20,7 @@ from openai import OpenAI
from ..config import Config
from ..utils.logger import get_logger
from ..utils.llm_client import _is_minimax, _clamp_temperature, _inject_json_instruction, parse_json_from_response
from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config')
@ -433,42 +434,51 @@ class SimulationConfigGenerator:
def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]:
"""带重试的LLM调用包含JSON修复逻辑"""
import re
max_attempts = 3
last_error = None
use_minimax = _is_minimax(self.model_name, self.base_url)
for attempt in range(max_attempts):
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.7 - (attempt * 0.1) # 每次重试降低温度
temperature = _clamp_temperature(0.7 - (attempt * 0.1), self.model_name, self.base_url)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
kwargs = {
"model": self.model_name,
"messages": _inject_json_instruction(messages) if use_minimax else messages,
"temperature": temperature,
# 不设置max_tokens让LLM自由发挥
)
}
if not use_minimax:
kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 移除 <think> 标签
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
finish_reason = response.choices[0].finish_reason
# 检查是否被截断
if finish_reason == 'length':
logger.warning(f"LLM输出被截断 (attempt {attempt+1})")
content = self._fix_truncated_json(content)
# 尝试解析JSON
try:
return json.loads(content)
except json.JSONDecodeError as e:
return parse_json_from_response(content)
except (json.JSONDecodeError, ValueError) as e:
logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}")
# 尝试修复JSON
fixed = self._try_fix_config_json(content)
if fixed:
return fixed
last_error = e
except Exception as e:

View File

@ -1,6 +1,6 @@
"""
LLM客户端封装
统一使用OpenAI格式调用
统一使用OpenAI格式调用兼容 MiniMax OpenAI 兼容 API
"""
import json
@ -11,9 +11,52 @@ from openai import OpenAI
from ..config import Config
def _is_minimax(model: str, base_url: str) -> bool:
"""检测当前是否使用 MiniMax 模型"""
model_lower = (model or "").lower()
url_lower = (base_url or "").lower()
return "minimax" in model_lower or "minimax" in url_lower
def _clamp_temperature(temperature: float, model: str, base_url: str) -> float:
"""MiniMax 要求 temperature 在 (0.0, 1.0] 之间,不能为 0"""
if _is_minimax(model, base_url) and temperature <= 0:
return 0.01
return temperature
def parse_json_from_response(content: str) -> Any:
"""从 LLM 响应中解析 JSON支持多种格式"""
trimmed = content.strip()
# 1. 直接解析
try:
return json.loads(trimmed)
except json.JSONDecodeError:
pass
# 2. 提取 markdown code block
code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
if code_block_match:
try:
return json.loads(code_block_match.group(1).strip())
except json.JSONDecodeError:
pass
# 3. 提取 { } 或 [ ]
json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
if json_match:
try:
return json.loads(json_match.group(1))
except json.JSONDecodeError:
pass
raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
class LLMClient:
"""LLM客户端"""
def __init__(
self,
api_key: Optional[str] = None,
@ -23,15 +66,20 @@ class LLMClient:
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url
)
@property
def is_minimax(self) -> bool:
"""检测当前是否使用 MiniMax 模型"""
return _is_minimax(self.model, self.base_url)
def chat(
self,
messages: List[Dict[str, str]],
@ -41,32 +89,37 @@ class LLMClient:
) -> str:
"""
发送聊天请求
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式如JSON模式
Returns:
模型响应文本
"""
temperature = _clamp_temperature(temperature, self.model, self.base_url)
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if response_format:
# MiniMax 不支持 response_format改用 prompt 引导 JSON 输出
if response_format and self.is_minimax:
messages = _inject_json_instruction(messages)
elif response_format:
kwargs["response_format"] = response_format
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
return content
def chat_json(
self,
messages: List[Dict[str, str]],
@ -75,12 +128,12 @@ class LLMClient:
) -> Dict[str, Any]:
"""
发送聊天请求并返回JSON
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
Returns:
解析后的JSON对象
"""
@ -90,14 +143,19 @@ class LLMClient:
max_tokens=max_tokens,
response_format={"type": "json_object"}
)
# 清理markdown代码块标记
cleaned_response = response.strip()
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
cleaned_response = cleaned_response.strip()
return parse_json_from_response(response)
try:
return json.loads(cleaned_response)
except json.JSONDecodeError:
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
def _inject_json_instruction(messages: List[Dict[str, str]]) -> List[Dict[str, str]]:
"""在消息列表中注入 JSON 输出指令(用于不支持 response_format 的模型)"""
json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
messages = [msg.copy() for msg in messages]
# 优先追加到 system 消息
for msg in messages:
if msg.get("role") == "system":
msg["content"] = msg["content"] + json_hint
return messages
# 如果没有 system 消息,在开头插入一条
messages.insert(0, {"role": "system", "content": json_hint.strip()})
return messages

View File

@ -0,0 +1,176 @@
"""
MiniMax 兼容性测试
验证 LLMClient MiniMax 模型的兼容处理
"""
import json
import re
import pytest
import sys
import os
# 直接导入 llm_client 模块中的独立函数,绕过 Flask 依赖
# 通过模拟 Config 来避免导入整个 app 模块
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
# 直接从源文件提取独立函数进行测试
def _is_minimax(model, base_url):
model_lower = (model or "").lower()
url_lower = (base_url or "").lower()
return "minimax" in model_lower or "minimax" in url_lower
def _clamp_temperature(temperature, model, base_url):
if _is_minimax(model, base_url) and temperature <= 0:
return 0.01
return temperature
def parse_json_from_response(content):
trimmed = content.strip()
try:
return json.loads(trimmed)
except json.JSONDecodeError:
pass
code_block_match = re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', trimmed)
if code_block_match:
try:
return json.loads(code_block_match.group(1).strip())
except json.JSONDecodeError:
pass
json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', trimmed)
if json_match:
try:
return json.loads(json_match.group(1))
except json.JSONDecodeError:
pass
raise ValueError(f"LLM返回的JSON格式无效: {trimmed}")
def _inject_json_instruction(messages):
json_hint = "\n\nYou must respond with valid JSON only. No markdown, no explanation, no extra text."
messages = [msg.copy() for msg in messages]
for msg in messages:
if msg.get("role") == "system":
msg["content"] = msg["content"] + json_hint
return messages
messages.insert(0, {"role": "system", "content": json_hint.strip()})
return messages
class TestIsMinimax:
def test_minimax_model_name(self):
assert _is_minimax("MiniMax-M2.5", "https://api.openai.com/v1") is True
def test_minimax_model_name_lowercase(self):
assert _is_minimax("minimax-m2.5", "https://api.openai.com/v1") is True
def test_minimax_base_url(self):
assert _is_minimax("some-model", "https://api.minimax.io/v1") is True
def test_minimax_base_url_cn(self):
assert _is_minimax("some-model", "https://api.minimaxi.com/v1") is True
def test_not_minimax_openai(self):
assert _is_minimax("gpt-4o", "https://api.openai.com/v1") is False
def test_not_minimax_dashscope(self):
assert _is_minimax("qwen-plus", "https://dashscope.aliyuncs.com/compatible-mode/v1") is False
def test_none_values(self):
assert _is_minimax(None, None) is False
def test_minimax_highspeed(self):
assert _is_minimax("MiniMax-M2.5-highspeed", "https://api.minimax.io/v1") is True
class TestClampTemperature:
def test_zero_temperature_minimax(self):
result = _clamp_temperature(0.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
assert result == 0.01
def test_negative_temperature_minimax(self):
result = _clamp_temperature(-0.1, "MiniMax-M2.5", "https://api.minimax.io/v1")
assert result == 0.01
def test_valid_temperature_minimax(self):
result = _clamp_temperature(0.7, "MiniMax-M2.5", "https://api.minimax.io/v1")
assert result == 0.7
def test_zero_temperature_non_minimax(self):
result = _clamp_temperature(0.0, "gpt-4o", "https://api.openai.com/v1")
assert result == 0.0
def test_max_temperature_minimax(self):
result = _clamp_temperature(1.0, "MiniMax-M2.5", "https://api.minimax.io/v1")
assert result == 1.0
class TestInjectJsonInstruction:
def test_inject_to_existing_system_message(self):
messages = [
{"role": "system", "content": "You are a helper."},
{"role": "user", "content": "Generate JSON."}
]
result = _inject_json_instruction(messages)
assert "valid JSON only" in result[0]["content"]
assert result[0]["content"].startswith("You are a helper.")
# Original should not be mutated
assert "valid JSON only" not in messages[0]["content"]
def test_inject_without_system_message(self):
messages = [
{"role": "user", "content": "Generate JSON."}
]
result = _inject_json_instruction(messages)
assert len(result) == 2
assert result[0]["role"] == "system"
assert "valid JSON only" in result[0]["content"]
def test_does_not_mutate_original(self):
messages = [
{"role": "system", "content": "Hello"},
{"role": "user", "content": "Test"}
]
original_content = messages[0]["content"]
_inject_json_instruction(messages)
assert messages[0]["content"] == original_content
class TestParseJsonFromResponse:
def test_direct_json(self):
result = parse_json_from_response('{"key": "value"}')
assert result == {"key": "value"}
def test_json_with_markdown_block(self):
text = '```json\n{"key": "value"}\n```'
result = parse_json_from_response(text)
assert result == {"key": "value"}
def test_json_with_surrounding_text(self):
text = 'Here is the result:\n{"key": "value"}\nDone.'
result = parse_json_from_response(text)
assert result == {"key": "value"}
def test_json_array(self):
result = parse_json_from_response('[1, 2, 3]')
assert result == [1, 2, 3]
def test_invalid_json_raises(self):
with pytest.raises(ValueError, match="JSON格式无效"):
parse_json_from_response("not json at all")
def test_nested_json(self):
text = '{"agents": [{"name": "Alice"}, {"name": "Bob"}]}'
result = parse_json_from_response(text)
assert len(result["agents"]) == 2
def test_markdown_block_without_json_label(self):
text = '```\n{"key": "value"}\n```'
result = parse_json_from_response(text)
assert result == {"key": "value"}
if __name__ == "__main__":
pytest.main([__file__, "-v"])