feat: multi-provider LLM support via Prompture

Add optional Prompture integration for 12+ LLM providers (LM Studio,
Ollama, Claude, Groq, Kimi/Moonshot, etc.) as a drop-in backend.
Zero breaking changes — falls back to the existing OpenAI SDK client
when Prompture is not installed.

- Rewrite llm_client.py with dual-backend architecture
- Update .env.example with provider/model format examples
- Add multi-provider table to README Quick Start section
- Add prompture as optional dependency in requirements.txt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Juan Denis 2026-04-04 01:18:04 -04:00
parent fa0f6519b1
commit 79edc61563
4 changed files with 225 additions and 67 deletions

View File

@ -1,16 +1,40 @@
# LLM API配置支持 OpenAI SDK 格式的任意 LLM API # ===== LLM API Configuration =====
# 推荐使用阿里百炼平台qwen-plus模型https://bailian.console.aliyun.com/ # Default: any OpenAI-compatible API
# 注意消耗较大可先进行小于40轮的模拟尝试 # With Prompture installed (pip install prompture): 12+ providers supported
#
# ── OpenAI-compatible (default, no Prompture needed) ──
LLM_API_KEY=your_api_key_here LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus LLM_MODEL_NAME=qwen-plus
#
# ── With Prompture: use "provider/model" format ──
# LM Studio (free, local):
# LLM_MODEL_NAME=lmstudio/local-model
# LLM_BASE_URL=http://localhost:1234/v1
# LLM_API_KEY=lm-studio
#
# Ollama (free, local):
# LLM_MODEL_NAME=ollama/llama3.1:8b
#
# Kimi / Moonshot:
# LLM_MODEL_NAME=moonshot/moonshot-v1-8k
# LLM_API_KEY=your_moonshot_key
#
# Claude:
# LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
# LLM_API_KEY=sk-ant-...
#
# Groq (fast, free tier):
# LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
# LLM_API_KEY=gsk_...
#
# See all providers: https://github.com/jhd3197/prompture#providers
# ===== ZEP记忆图谱配置 ===== # ===== ZEP Memory Graph =====
# 每月免费额度即可支撑简单使用https://app.getzep.com/ # Free monthly quota: https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here ZEP_API_KEY=your_zep_api_key_here
# ===== 加速 LLM 配置(可选)===== # ===== Boost LLM (optional) =====
# 注意如果不使用加速配置env文件中就不要出现下面的配置项 # LLM_BOOST_API_KEY=your_api_key_here
LLM_BOOST_API_KEY=your_api_key_here # LLM_BOOST_BASE_URL=your_base_url_here
LLM_BOOST_BASE_URL=your_base_url_here # LLM_BOOST_MODEL_NAME=your_model_name_here
LLM_BOOST_MODEL_NAME=your_model_name_here

View File

@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key ZEP_API_KEY=your_zep_api_key
``` ```
#### Multi-Provider Support (Optional)
Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
```bash
pip install prompture
```
Then use `"provider/model"` format in your `.env`:
| Provider | `LLM_MODEL_NAME` | Cost |
|---|---|---|
| LM Studio | `lmstudio/local-model` | Free (local) |
| Ollama | `ollama/llama3.1:8b` | Free (local) |
| OpenAI | `openai/gpt-4o` | Paid |
| Claude | `claude/claude-sonnet-4-20250514` | Paid |
| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
| Google | `google/gemini-1.5-pro` | Paid |
| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
#### 2. Install Dependencies #### 2. Install Dependencies
```bash ```bash

View File

@ -1,43 +1,111 @@
""" """
LLM客户端封装 LLM客户端封装
统一使用OpenAI格式调用 Supports two backends:
1. Prompture (optional) 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
2. OpenAI SDK (default fallback) any OpenAI-compatible API
Install Prompture for multi-provider support: pip install prompture
""" """
import json import json
import re import re
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any, List
from openai import OpenAI
from ..config import Config from ..config import Config
# Try to import Prompture; fall back to OpenAI SDK if not installed
try:
from prompture.agents import Conversation
from prompture.infra.provider_env import ProviderEnvironment
_HAS_PROMPTURE = True
except ImportError:
_HAS_PROMPTURE = False
if not _HAS_PROMPTURE:
from openai import OpenAI
# Provider name → ProviderEnvironment field name
_KEY_MAP = {
"openai": "openai_api_key",
"claude": "claude_api_key",
"google": "google_api_key",
"groq": "groq_api_key",
"grok": "grok_api_key",
"openrouter": "openrouter_api_key",
"moonshot": "moonshot_api_key",
}
class LLMClient: class LLMClient:
"""LLM客户端""" """LLM客户端
When Prompture is installed, ``model`` accepts the ``"provider/model"``
format for multi-provider support::
"lmstudio/local-model" LM Studio (free, local)
"ollama/llama3.1:8b" Ollama (free, local)
"openai/gpt-4o" OpenAI
"claude/claude-sonnet-4-20250514" Anthropic
"moonshot/moonshot-v1-8k" Kimi / Moonshot
"groq/llama-3.1-70b" Groq
Without Prompture, the original OpenAI SDK backend is used (any
OpenAI-compatible API via LLM_BASE_URL).
"""
def __init__( def __init__(
self, self,
api_key: Optional[str] = None, api_key: Optional[str] = None,
base_url: Optional[str] = None, base_url: Optional[str] = None,
model: Optional[str] = None model: Optional[str] = None,
): ):
self.api_key = api_key or Config.LLM_API_KEY self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME self.model = model or Config.LLM_MODEL_NAME
if _HAS_PROMPTURE:
self._init_prompture()
else:
self._init_openai()
# ── Prompture backend ──────────────────────────────────────────
def _init_prompture(self):
env_kwargs: Dict[str, Any] = {}
if self.api_key:
provider = self.model.split("/")[0] if "/" in self.model else "openai"
env_field = _KEY_MAP.get(provider)
if env_field:
env_kwargs[env_field] = self.api_key
self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
self._driver_options: Dict[str, Any] = {}
if self.base_url:
self._driver_options["base_url"] = self.base_url
def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
opts: Dict[str, Any] = {
"temperature": temperature,
"max_tokens": max_tokens,
**self._driver_options,
}
return Conversation(self.model, options=opts, env=self._env)
# ── OpenAI fallback backend ────────────────────────────────────
def _init_openai(self):
if not self.api_key: if not self.api_key:
raise ValueError("LLM_API_KEY 未配置") raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
self.client = OpenAI( # ── Public API ─────────────────────────────────────────────────
api_key=self.api_key,
base_url=self.base_url
)
def chat( def chat(
self, self,
messages: List[Dict[str, str]], messages: List[Dict[str, str]],
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 4096, max_tokens: int = 4096,
response_format: Optional[Dict] = None response_format: Optional[Dict] = None,
) -> str: ) -> str:
""" """
发送聊天请求 发送聊天请求
@ -51,18 +119,11 @@ class LLMClient:
Returns: Returns:
模型响应文本 模型响应文本
""" """
kwargs = { if _HAS_PROMPTURE:
"model": self.model, content = self._chat_prompture(messages, temperature, max_tokens)
"messages": messages, else:
"temperature": temperature, content = self._chat_openai(messages, temperature, max_tokens, response_format)
"max_tokens": max_tokens,
}
if response_format:
kwargs["response_format"] = response_format
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除 # 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip() content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
return content return content
@ -71,7 +132,7 @@ class LLMClient:
self, self,
messages: List[Dict[str, str]], messages: List[Dict[str, str]],
temperature: float = 0.3, temperature: float = 0.3,
max_tokens: int = 4096 max_tokens: int = 4096,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
发送聊天请求并返回JSON 发送聊天请求并返回JSON
@ -84,20 +145,65 @@ class LLMClient:
Returns: Returns:
解析后的JSON对象 解析后的JSON对象
""" """
response = self.chat( if _HAS_PROMPTURE:
messages=messages, response = self._chat_prompture(messages, temperature, max_tokens)
temperature=temperature, else:
max_tokens=max_tokens, response = self._chat_openai(
response_format={"type": "json_object"} messages, temperature, max_tokens,
) response_format={"type": "json_object"},
)
# 清理markdown代码块标记 # 清理markdown代码块标记
cleaned_response = response.strip() cleaned = response.strip()
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) cleaned = re.sub(r'\n?```\s*$', '', cleaned)
cleaned_response = cleaned_response.strip() cleaned = cleaned.strip()
try: try:
return json.loads(cleaned_response) return json.loads(cleaned)
except json.JSONDecodeError: except json.JSONDecodeError:
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
# ── Private: Prompture path ────────────────────────────────────
def _chat_prompture(
self,
messages: List[Dict[str, str]],
temperature: float,
max_tokens: int,
) -> str:
conv = self._make_conversation(temperature, max_tokens)
# Inject system prompt
system_parts = [m["content"] for m in messages if m["role"] == "system"]
if system_parts:
conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
# Replay prior turns
non_system = [m for m in messages if m["role"] != "system"]
for msg in non_system[:-1]:
conv._messages.append({"role": msg["role"], "content": msg["content"]})
prompt = non_system[-1]["content"] if non_system else ""
return conv.ask(prompt)
# ── Private: OpenAI fallback path ──────────────────────────────
def _chat_openai(
self,
messages: List[Dict[str, str]],
temperature: float,
max_tokens: int,
response_format: Optional[Dict] = None,
) -> str:
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if response_format:
kwargs["response_format"] = response_format
response = self.client.chat.completions.create(**kwargs)
return response.choices[0].message.content

View File

@ -10,9 +10,14 @@ flask>=3.0.0
flask-cors>=6.0.0 flask-cors>=6.0.0
# ============= LLM 相关 ============= # ============= LLM 相关 =============
# OpenAI SDK统一使用 OpenAI 格式调用 LLM # OpenAI SDK默认 LLM 后端
openai>=1.0.0 openai>=1.0.0
# Prompture可选— 多供应商 LLM 支持LM Studio, Ollama, Claude, Groq, Kimi 等
# Install for multi-provider support: pip install prompture
# https://github.com/jhd3197/prompture
# prompture>=0.1.0
# ============= Zep Cloud ============= # ============= Zep Cloud =============
zep-cloud==3.13.0 zep-cloud==3.13.0