Merge PR #463: multi-provider LLM support via Prompture
# Conflicts: # .env.example # backend/app/utils/llm_client.py
This commit is contained in:
commit
842bfd4335
51
.env.example
51
.env.example
|
|
@ -1,19 +1,44 @@
|
||||||
# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API)
|
# ===== LLM API Configuration =====
|
||||||
# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/
|
# Default: any OpenAI-compatible API
|
||||||
# 注意消耗较大,可先进行小于40轮的模拟尝试
|
# With Prompture installed (pip install prompture): 12+ providers supported
|
||||||
|
#
|
||||||
|
# ── OpenAI-compatible (default, no Prompture needed) ──
|
||||||
LLM_API_KEY=your_api_key_here
|
LLM_API_KEY=your_api_key_here
|
||||||
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
|
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||||
LLM_MODEL_NAME=qwen-plus
|
LLM_MODEL_NAME=qwen-plus
|
||||||
|
#
|
||||||
|
# ── With Prompture: use "provider/model" format ──
|
||||||
|
# LM Studio (free, local):
|
||||||
|
# LLM_MODEL_NAME=lmstudio/local-model
|
||||||
|
# LLM_BASE_URL=http://localhost:1234/v1
|
||||||
|
# LLM_API_KEY=lm-studio
|
||||||
|
#
|
||||||
|
# Ollama (free, local):
|
||||||
|
# LLM_MODEL_NAME=ollama/llama3.1:8b
|
||||||
|
#
|
||||||
|
# Kimi / Moonshot:
|
||||||
|
# LLM_MODEL_NAME=moonshot/moonshot-v1-8k
|
||||||
|
# LLM_API_KEY=your_moonshot_key
|
||||||
|
#
|
||||||
|
# Claude:
|
||||||
|
# LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
|
||||||
|
# LLM_API_KEY=sk-ant-...
|
||||||
|
#
|
||||||
|
# Groq (fast, free tier):
|
||||||
|
# LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
|
||||||
|
# LLM_API_KEY=gsk_...
|
||||||
|
#
|
||||||
|
# See all providers: https://github.com/jhd3197/prompture#providers
|
||||||
|
|
||||||
# ===== ZEP记忆图谱配置 =====
|
# ===== ZEP Memory Graph =====
|
||||||
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
|
# Free monthly quota: https://app.getzep.com/
|
||||||
ZEP_API_KEY=your_zep_api_key_here
|
ZEP_API_KEY=your_zep_api_key_here
|
||||||
|
|
||||||
# ===== 加速 LLM 配置(可选)=====
|
# ===== Boost LLM (optional) =====
|
||||||
# 注意如果不使用加速配置,env文件中就不要出现下面的配置项
|
# LLM_BOOST_API_KEY=your_api_key_here
|
||||||
LLM_BOOST_API_KEY=your_api_key_here
|
# LLM_BOOST_BASE_URL=your_base_url_here
|
||||||
LLM_BOOST_BASE_URL=your_base_url_here
|
# LLM_BOOST_MODEL_NAME=your_model_name_here
|
||||||
LLM_BOOST_MODEL_NAME=your_model_name_here
|
|
||||||
# ===== 前端API超时配置(可选)=====
|
# ===== Frontend API timeout (optional) =====
|
||||||
# 本地大模型响应较慢时可以增加此值(毫秒)
|
# Increase this value for slow local LLMs (milliseconds)
|
||||||
# VITE_API_TIMEOUT=600000 # 10分钟
|
# VITE_API_TIMEOUT=600000 # 10 minutes
|
||||||
|
|
|
||||||
23
README.md
23
README.md
|
|
@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
|
||||||
ZEP_API_KEY=your_zep_api_key
|
ZEP_API_KEY=your_zep_api_key
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Multi-Provider Support (Optional)
|
||||||
|
|
||||||
|
Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install prompture
|
||||||
|
```
|
||||||
|
|
||||||
|
Then use `"provider/model"` format in your `.env`:
|
||||||
|
|
||||||
|
| Provider | `LLM_MODEL_NAME` | Cost |
|
||||||
|
|---|---|---|
|
||||||
|
| LM Studio | `lmstudio/local-model` | Free (local) |
|
||||||
|
| Ollama | `ollama/llama3.1:8b` | Free (local) |
|
||||||
|
| OpenAI | `openai/gpt-4o` | Paid |
|
||||||
|
| Claude | `claude/claude-sonnet-4-20250514` | Paid |
|
||||||
|
| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
|
||||||
|
| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
|
||||||
|
| Google | `google/gemini-1.5-pro` | Paid |
|
||||||
|
| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
|
||||||
|
|
||||||
|
> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
|
||||||
|
|
||||||
#### 2. Install Dependencies
|
#### 2. Install Dependencies
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -1,104 +1,209 @@
|
||||||
"""
|
"""
|
||||||
LLM客户端封装
|
LLM客户端封装
|
||||||
统一使用OpenAI格式调用
|
Supports two backends:
|
||||||
|
1. Prompture (optional) — 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
|
||||||
|
2. OpenAI SDK (default fallback) — any OpenAI-compatible API
|
||||||
|
Install Prompture for multi-provider support: pip install prompture
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Optional, Dict, Any, List
|
from typing import Optional, Dict, Any, List
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
from ..config import Config
|
from ..config import Config
|
||||||
|
|
||||||
|
# Try to import Prompture; fall back to OpenAI SDK if not installed
|
||||||
|
try:
|
||||||
|
from prompture.agents import Conversation
|
||||||
|
from prompture.infra.provider_env import ProviderEnvironment
|
||||||
|
from prompture.extraction.tools import strip_think_tags, clean_json_text
|
||||||
|
_HAS_PROMPTURE = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_PROMPTURE = False
|
||||||
|
|
||||||
|
if not _HAS_PROMPTURE:
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
|
||||||
|
# Provider name → ProviderEnvironment field name
|
||||||
|
_KEY_MAP = {
|
||||||
|
"openai": "openai_api_key",
|
||||||
|
"claude": "claude_api_key",
|
||||||
|
"google": "google_api_key",
|
||||||
|
"groq": "groq_api_key",
|
||||||
|
"grok": "grok_api_key",
|
||||||
|
"openrouter": "openrouter_api_key",
|
||||||
|
"moonshot": "moonshot_api_key",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
"""LLM客户端"""
|
"""LLM客户端
|
||||||
|
|
||||||
|
When Prompture is installed, ``model`` accepts the ``"provider/model"``
|
||||||
|
format for multi-provider support::
|
||||||
|
|
||||||
|
"lmstudio/local-model" → LM Studio (free, local)
|
||||||
|
"ollama/llama3.1:8b" → Ollama (free, local)
|
||||||
|
"openai/gpt-4o" → OpenAI
|
||||||
|
"claude/claude-sonnet-4-20250514" → Anthropic
|
||||||
|
"moonshot/moonshot-v1-8k" → Kimi / Moonshot
|
||||||
|
"groq/llama-3.1-70b" → Groq
|
||||||
|
|
||||||
|
Without Prompture, the original OpenAI SDK backend is used (any
|
||||||
|
OpenAI-compatible API via LLM_BASE_URL).
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
base_url: Optional[str] = None,
|
base_url: Optional[str] = None,
|
||||||
model: Optional[str] = None
|
model: Optional[str] = None,
|
||||||
):
|
):
|
||||||
self.api_key = api_key or Config.LLM_API_KEY
|
self.api_key = api_key or Config.LLM_API_KEY
|
||||||
self.base_url = base_url or Config.LLM_BASE_URL
|
self.base_url = base_url or Config.LLM_BASE_URL
|
||||||
self.model = model or Config.LLM_MODEL_NAME
|
self.model = model or Config.LLM_MODEL_NAME
|
||||||
|
|
||||||
|
if _HAS_PROMPTURE:
|
||||||
|
self._init_prompture()
|
||||||
|
else:
|
||||||
|
self._init_openai()
|
||||||
|
|
||||||
|
# ── Prompture backend ──────────────────────────────────────────
|
||||||
|
|
||||||
|
def _init_prompture(self):
|
||||||
|
env_kwargs: Dict[str, Any] = {}
|
||||||
|
if self.api_key:
|
||||||
|
provider = self.model.split("/")[0] if "/" in self.model else "openai"
|
||||||
|
env_field = _KEY_MAP.get(provider)
|
||||||
|
if env_field:
|
||||||
|
env_kwargs[env_field] = self.api_key
|
||||||
|
|
||||||
|
self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
|
||||||
|
self._driver_options: Dict[str, Any] = {}
|
||||||
|
if self.base_url:
|
||||||
|
self._driver_options["base_url"] = self.base_url
|
||||||
|
|
||||||
|
def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
|
||||||
|
opts: Dict[str, Any] = {
|
||||||
|
"temperature": temperature,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
**self._driver_options,
|
||||||
|
}
|
||||||
|
return Conversation(self.model, options=opts, env=self._env)
|
||||||
|
|
||||||
|
# ── OpenAI fallback backend ────────────────────────────────────
|
||||||
|
|
||||||
|
def _init_openai(self):
|
||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
raise ValueError("LLM_API_KEY 未配置")
|
raise ValueError("LLM_API_KEY 未配置")
|
||||||
|
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
||||||
self.client = OpenAI(
|
|
||||||
api_key=self.api_key,
|
# ── Public API ─────────────────────────────────────────────────
|
||||||
base_url=self.base_url
|
|
||||||
)
|
|
||||||
|
|
||||||
def chat(
|
def chat(
|
||||||
self,
|
self,
|
||||||
messages: List[Dict[str, str]],
|
messages: List[Dict[str, str]],
|
||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
response_format: Optional[Dict] = None
|
response_format: Optional[Dict] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
发送聊天请求
|
发送聊天请求
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: 消息列表
|
messages: 消息列表
|
||||||
temperature: 温度参数
|
temperature: 温度参数
|
||||||
max_tokens: 最大token数
|
max_tokens: 最大token数
|
||||||
response_format: 响应格式(如JSON模式)
|
response_format: 响应格式(如JSON模式)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
模型响应文本
|
模型响应文本
|
||||||
"""
|
"""
|
||||||
|
if _HAS_PROMPTURE:
|
||||||
|
content = self._chat_prompture(messages, temperature, max_tokens)
|
||||||
|
return strip_think_tags(content)
|
||||||
|
else:
|
||||||
|
content = self._chat_openai(messages, temperature, max_tokens, response_format)
|
||||||
|
# Fallback: strip think tags with regex when Prompture is not available
|
||||||
|
return re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
|
||||||
|
|
||||||
|
def chat_json(
|
||||||
|
self,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float = 0.3,
|
||||||
|
max_tokens: int = 4096,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
发送聊天请求并返回JSON
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: 消息列表
|
||||||
|
temperature: 温度参数
|
||||||
|
max_tokens: 最大token数
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
解析后的JSON对象
|
||||||
|
"""
|
||||||
|
if _HAS_PROMPTURE:
|
||||||
|
response = self._chat_prompture(messages, temperature, max_tokens)
|
||||||
|
# Prompture's clean_json_text strips think tags + markdown fences
|
||||||
|
cleaned = clean_json_text(response)
|
||||||
|
else:
|
||||||
|
response = self._chat_openai(
|
||||||
|
messages, temperature, max_tokens
|
||||||
|
)
|
||||||
|
# Fallback cleaning when Prompture is not available
|
||||||
|
cleaned = re.sub(r'<think>[\s\S]*?</think>', '', response).strip()
|
||||||
|
cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
|
||||||
|
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
|
||||||
|
cleaned = cleaned.strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(cleaned)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
|
||||||
|
|
||||||
|
# ── Private: Prompture path ────────────────────────────────────
|
||||||
|
|
||||||
|
def _chat_prompture(
|
||||||
|
self,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float,
|
||||||
|
max_tokens: int,
|
||||||
|
) -> str:
|
||||||
|
conv = self._make_conversation(temperature, max_tokens)
|
||||||
|
|
||||||
|
# Inject system prompt
|
||||||
|
system_parts = [m["content"] for m in messages if m["role"] == "system"]
|
||||||
|
if system_parts:
|
||||||
|
conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
|
||||||
|
|
||||||
|
# Replay prior turns
|
||||||
|
non_system = [m for m in messages if m["role"] != "system"]
|
||||||
|
for msg in non_system[:-1]:
|
||||||
|
conv._messages.append({"role": msg["role"], "content": msg["content"]})
|
||||||
|
|
||||||
|
prompt = non_system[-1]["content"] if non_system else ""
|
||||||
|
return conv.ask(prompt)
|
||||||
|
|
||||||
|
# ── Private: OpenAI fallback path ──────────────────────────────
|
||||||
|
|
||||||
|
def _chat_openai(
|
||||||
|
self,
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
temperature: float,
|
||||||
|
max_tokens: int,
|
||||||
|
response_format: Optional[Dict] = None,
|
||||||
|
) -> str:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_tokens": max_tokens,
|
"max_tokens": max_tokens,
|
||||||
}
|
}
|
||||||
|
|
||||||
if response_format:
|
if response_format:
|
||||||
kwargs["response_format"] = response_format
|
kwargs["response_format"] = response_format
|
||||||
|
|
||||||
response = self.client.chat.completions.create(**kwargs)
|
response = self.client.chat.completions.create(**kwargs)
|
||||||
content = response.choices[0].message.content
|
return response.choices[0].message.content
|
||||||
# 部分模型(如MiniMax M2.5)会在content中包含<think>思考内容,需要移除
|
|
||||||
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
|
|
||||||
return content
|
|
||||||
|
|
||||||
def chat_json(
|
|
||||||
self,
|
|
||||||
messages: List[Dict[str, str]],
|
|
||||||
temperature: float = 0.3,
|
|
||||||
max_tokens: int = 4096
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
发送聊天请求并返回JSON
|
|
||||||
|
|
||||||
Args:
|
|
||||||
messages: 消息列表
|
|
||||||
temperature: 温度参数
|
|
||||||
max_tokens: 最大token数
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
解析后的JSON对象
|
|
||||||
"""
|
|
||||||
response = self.chat(
|
|
||||||
messages=messages,
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
# 不設 response_format 以相容 LM Studio / Ollama 等本地模型
|
|
||||||
# 依賴 prompt 中的 JSON 指示 + 下方的 markdown 清理邏輯
|
|
||||||
)
|
|
||||||
# 清理markdown代码块标记
|
|
||||||
cleaned_response = response.strip()
|
|
||||||
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
|
|
||||||
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
|
|
||||||
cleaned_response = cleaned_response.strip()
|
|
||||||
|
|
||||||
try:
|
|
||||||
return json.loads(cleaned_response)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,14 @@ flask>=3.0.0
|
||||||
flask-cors>=6.0.0
|
flask-cors>=6.0.0
|
||||||
|
|
||||||
# ============= LLM 相关 =============
|
# ============= LLM 相关 =============
|
||||||
# OpenAI SDK(统一使用 OpenAI 格式调用 LLM)
|
# OpenAI SDK(默认 LLM 后端)
|
||||||
openai>=1.0.0
|
openai>=1.0.0
|
||||||
|
|
||||||
|
# Prompture(可选)— 多供应商 LLM 支持:LM Studio, Ollama, Claude, Groq, Kimi 等
|
||||||
|
# Install for multi-provider support: pip install prompture
|
||||||
|
# https://github.com/jhd3197/prompture
|
||||||
|
# prompture>=0.1.0
|
||||||
|
|
||||||
# ============= Zep Cloud =============
|
# ============= Zep Cloud =============
|
||||||
zep-cloud==3.13.0
|
zep-cloud==3.13.0
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""
|
||||||
|
Quick test: MiroFish LLMClient → LM Studio via Prompture
|
||||||
|
"""
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
# Add backend to path so we can import app modules
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
# Override env vars for LM Studio before Config loads
|
||||||
|
os.environ["LLM_MODEL_NAME"] = "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b"
|
||||||
|
os.environ["LLM_BASE_URL"] = "http://localhost:1234/v1"
|
||||||
|
os.environ["LLM_API_KEY"] = "lm-studio"
|
||||||
|
# Provide a dummy ZEP key so Config.validate() won't complain
|
||||||
|
os.environ.setdefault("ZEP_API_KEY", "dummy")
|
||||||
|
|
||||||
|
from app.utils.llm_client import LLMClient
|
||||||
|
|
||||||
|
def test_basic_chat():
|
||||||
|
print("=== Test 1: Basic chat ===")
|
||||||
|
client = LLMClient()
|
||||||
|
from app.utils.llm_client import _HAS_PROMPTURE
|
||||||
|
print(f" Backend: Prompture={_HAS_PROMPTURE}")
|
||||||
|
print(f" Model: {client.model}")
|
||||||
|
response = client.chat([
|
||||||
|
{"role": "system", "content": "You are a helpful assistant. Reply in one sentence."},
|
||||||
|
{"role": "user", "content": "What is social media simulation?"},
|
||||||
|
], temperature=0.5, max_tokens=256)
|
||||||
|
print(f" Response: {response[:300]}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
def test_json_chat():
|
||||||
|
print("=== Test 2: JSON response ===")
|
||||||
|
client = LLMClient()
|
||||||
|
result = client.chat_json([
|
||||||
|
{"role": "system", "content": "You are a JSON-only assistant. Always respond with valid JSON."},
|
||||||
|
{"role": "user", "content": 'Return a JSON object with keys "platform" and "agents" (an integer). Example: {"platform":"twitter","agents":5}'},
|
||||||
|
], temperature=0.2, max_tokens=256)
|
||||||
|
print(f" Parsed JSON: {result}")
|
||||||
|
print(f" Type: {type(result)}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
def test_multi_turn():
|
||||||
|
print("=== Test 3: Multi-turn conversation ===")
|
||||||
|
client = LLMClient()
|
||||||
|
r1 = client.chat([
|
||||||
|
{"role": "user", "content": "My name is MiroFish. Remember it."},
|
||||||
|
], max_tokens=128)
|
||||||
|
print(f" Turn 1: {r1[:200]}")
|
||||||
|
|
||||||
|
r2 = client.chat([
|
||||||
|
{"role": "user", "content": "My name is MiroFish. Remember it."},
|
||||||
|
{"role": "assistant", "content": r1},
|
||||||
|
{"role": "user", "content": "What is my name?"},
|
||||||
|
], max_tokens=128)
|
||||||
|
print(f" Turn 2: {r2[:200]}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(f"Prompture installed: True")
|
||||||
|
print(f"LM Studio endpoint: http://localhost:1234/v1\n")
|
||||||
|
try:
|
||||||
|
test_basic_chat()
|
||||||
|
test_json_chat()
|
||||||
|
test_multi_turn()
|
||||||
|
print("All tests passed!")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: {e}")
|
||||||
|
import traceback; traceback.print_exc()
|
||||||
Loading…
Reference in New Issue