Merge PR #463: multi-provider LLM support via Prompture

# Conflicts:
#	.env.example
#	backend/app/utils/llm_client.py
This commit is contained in:
Lucas Ercolano 2026-04-10 16:58:04 -03:00
commit 842bfd4335
5 changed files with 297 additions and 71 deletions

View File

@ -1,19 +1,44 @@
# LLM API配置支持 OpenAI SDK 格式的任意 LLM API # ===== LLM API Configuration =====
# 推荐使用阿里百炼平台qwen-plus模型https://bailian.console.aliyun.com/ # Default: any OpenAI-compatible API
# 注意消耗较大可先进行小于40轮的模拟尝试 # With Prompture installed (pip install prompture): 12+ providers supported
#
# ── OpenAI-compatible (default, no Prompture needed) ──
LLM_API_KEY=your_api_key_here LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus LLM_MODEL_NAME=qwen-plus
#
# ── With Prompture: use "provider/model" format ──
# LM Studio (free, local):
# LLM_MODEL_NAME=lmstudio/local-model
# LLM_BASE_URL=http://localhost:1234/v1
# LLM_API_KEY=lm-studio
#
# Ollama (free, local):
# LLM_MODEL_NAME=ollama/llama3.1:8b
#
# Kimi / Moonshot:
# LLM_MODEL_NAME=moonshot/moonshot-v1-8k
# LLM_API_KEY=your_moonshot_key
#
# Claude:
# LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
# LLM_API_KEY=sk-ant-...
#
# Groq (fast, free tier):
# LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
# LLM_API_KEY=gsk_...
#
# See all providers: https://github.com/jhd3197/prompture#providers
# ===== ZEP记忆图谱配置 ===== # ===== ZEP Memory Graph =====
# 每月免费额度即可支撑简单使用https://app.getzep.com/ # Free monthly quota: https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here ZEP_API_KEY=your_zep_api_key_here
# ===== 加速 LLM 配置(可选)===== # ===== Boost LLM (optional) =====
# 注意如果不使用加速配置env文件中就不要出现下面的配置项 # LLM_BOOST_API_KEY=your_api_key_here
LLM_BOOST_API_KEY=your_api_key_here # LLM_BOOST_BASE_URL=your_base_url_here
LLM_BOOST_BASE_URL=your_base_url_here # LLM_BOOST_MODEL_NAME=your_model_name_here
LLM_BOOST_MODEL_NAME=your_model_name_here
# ===== 前端API超时配置可选===== # ===== Frontend API timeout (optional) =====
# 本地大模型响应较慢时可以增加此值(毫秒) # Increase this value for slow local LLMs (milliseconds)
# VITE_API_TIMEOUT=600000 # 10分钟 # VITE_API_TIMEOUT=600000 # 10 minutes

View File

@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key ZEP_API_KEY=your_zep_api_key
``` ```
#### Multi-Provider Support (Optional)
Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
```bash
pip install prompture
```
Then use `"provider/model"` format in your `.env`:
| Provider | `LLM_MODEL_NAME` | Cost |
|---|---|---|
| LM Studio | `lmstudio/local-model` | Free (local) |
| Ollama | `ollama/llama3.1:8b` | Free (local) |
| OpenAI | `openai/gpt-4o` | Paid |
| Claude | `claude/claude-sonnet-4-20250514` | Paid |
| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
| Google | `google/gemini-1.5-pro` | Paid |
| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
#### 2. Install Dependencies #### 2. Install Dependencies
```bash ```bash

View File

@ -1,43 +1,112 @@
""" """
LLM客户端封装 LLM客户端封装
统一使用OpenAI格式调用 Supports two backends:
1. Prompture (optional) 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
2. OpenAI SDK (default fallback) any OpenAI-compatible API
Install Prompture for multi-provider support: pip install prompture
""" """
import json import json
import re import re
from typing import Optional, Dict, Any, List from typing import Optional, Dict, Any, List
from openai import OpenAI
from ..config import Config from ..config import Config
# Try to import Prompture; fall back to OpenAI SDK if not installed
try:
from prompture.agents import Conversation
from prompture.infra.provider_env import ProviderEnvironment
from prompture.extraction.tools import strip_think_tags, clean_json_text
_HAS_PROMPTURE = True
except ImportError:
_HAS_PROMPTURE = False
if not _HAS_PROMPTURE:
from openai import OpenAI
# Provider name → ProviderEnvironment field name
_KEY_MAP = {
"openai": "openai_api_key",
"claude": "claude_api_key",
"google": "google_api_key",
"groq": "groq_api_key",
"grok": "grok_api_key",
"openrouter": "openrouter_api_key",
"moonshot": "moonshot_api_key",
}
class LLMClient: class LLMClient:
"""LLM客户端""" """LLM客户端
When Prompture is installed, ``model`` accepts the ``"provider/model"``
format for multi-provider support::
"lmstudio/local-model" LM Studio (free, local)
"ollama/llama3.1:8b" Ollama (free, local)
"openai/gpt-4o" OpenAI
"claude/claude-sonnet-4-20250514" Anthropic
"moonshot/moonshot-v1-8k" Kimi / Moonshot
"groq/llama-3.1-70b" Groq
Without Prompture, the original OpenAI SDK backend is used (any
OpenAI-compatible API via LLM_BASE_URL).
"""
def __init__( def __init__(
self, self,
api_key: Optional[str] = None, api_key: Optional[str] = None,
base_url: Optional[str] = None, base_url: Optional[str] = None,
model: Optional[str] = None model: Optional[str] = None,
): ):
self.api_key = api_key or Config.LLM_API_KEY self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME self.model = model or Config.LLM_MODEL_NAME
if _HAS_PROMPTURE:
self._init_prompture()
else:
self._init_openai()
# ── Prompture backend ──────────────────────────────────────────
def _init_prompture(self):
env_kwargs: Dict[str, Any] = {}
if self.api_key:
provider = self.model.split("/")[0] if "/" in self.model else "openai"
env_field = _KEY_MAP.get(provider)
if env_field:
env_kwargs[env_field] = self.api_key
self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
self._driver_options: Dict[str, Any] = {}
if self.base_url:
self._driver_options["base_url"] = self.base_url
def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
opts: Dict[str, Any] = {
"temperature": temperature,
"max_tokens": max_tokens,
**self._driver_options,
}
return Conversation(self.model, options=opts, env=self._env)
# ── OpenAI fallback backend ────────────────────────────────────
def _init_openai(self):
if not self.api_key: if not self.api_key:
raise ValueError("LLM_API_KEY 未配置") raise ValueError("LLM_API_KEY 未配置")
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
self.client = OpenAI( # ── Public API ─────────────────────────────────────────────────
api_key=self.api_key,
base_url=self.base_url
)
def chat( def chat(
self, self,
messages: List[Dict[str, str]], messages: List[Dict[str, str]],
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 4096, max_tokens: int = 4096,
response_format: Optional[Dict] = None response_format: Optional[Dict] = None,
) -> str: ) -> str:
""" """
发送聊天请求 发送聊天请求
@ -51,27 +120,19 @@ class LLMClient:
Returns: Returns:
模型响应文本 模型响应文本
""" """
kwargs = { if _HAS_PROMPTURE:
"model": self.model, content = self._chat_prompture(messages, temperature, max_tokens)
"messages": messages, return strip_think_tags(content)
"temperature": temperature, else:
"max_tokens": max_tokens, content = self._chat_openai(messages, temperature, max_tokens, response_format)
} # Fallback: strip think tags with regex when Prompture is not available
return re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
if response_format:
kwargs["response_format"] = response_format
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
# 部分模型如MiniMax M2.5会在content中包含<think>思考内容,需要移除
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
return content
def chat_json( def chat_json(
self, self,
messages: List[Dict[str, str]], messages: List[Dict[str, str]],
temperature: float = 0.3, temperature: float = 0.3,
max_tokens: int = 4096 max_tokens: int = 4096,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
发送聊天请求并返回JSON 发送聊天请求并返回JSON
@ -84,21 +145,65 @@ class LLMClient:
Returns: Returns:
解析后的JSON对象 解析后的JSON对象
""" """
response = self.chat( if _HAS_PROMPTURE:
messages=messages, response = self._chat_prompture(messages, temperature, max_tokens)
temperature=temperature, # Prompture's clean_json_text strips think tags + markdown fences
max_tokens=max_tokens, cleaned = clean_json_text(response)
# 不設 response_format 以相容 LM Studio / Ollama 等本地模型 else:
# 依賴 prompt 中的 JSON 指示 + 下方的 markdown 清理邏輯 response = self._chat_openai(
) messages, temperature, max_tokens
# 清理markdown代码块标记 )
cleaned_response = response.strip() # Fallback cleaning when Prompture is not available
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) cleaned = re.sub(r'<think>[\s\S]*?</think>', '', response).strip()
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
cleaned_response = cleaned_response.strip() cleaned = re.sub(r'\n?```\s*$', '', cleaned)
cleaned = cleaned.strip()
try: try:
return json.loads(cleaned_response) return json.loads(cleaned)
except json.JSONDecodeError: except json.JSONDecodeError:
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
# ── Private: Prompture path ────────────────────────────────────
def _chat_prompture(
self,
messages: List[Dict[str, str]],
temperature: float,
max_tokens: int,
) -> str:
conv = self._make_conversation(temperature, max_tokens)
# Inject system prompt
system_parts = [m["content"] for m in messages if m["role"] == "system"]
if system_parts:
conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
# Replay prior turns
non_system = [m for m in messages if m["role"] != "system"]
for msg in non_system[:-1]:
conv._messages.append({"role": msg["role"], "content": msg["content"]})
prompt = non_system[-1]["content"] if non_system else ""
return conv.ask(prompt)
# ── Private: OpenAI fallback path ──────────────────────────────
def _chat_openai(
self,
messages: List[Dict[str, str]],
temperature: float,
max_tokens: int,
response_format: Optional[Dict] = None,
) -> str:
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
if response_format:
kwargs["response_format"] = response_format
response = self.client.chat.completions.create(**kwargs)
return response.choices[0].message.content

View File

@ -10,9 +10,14 @@ flask>=3.0.0
flask-cors>=6.0.0 flask-cors>=6.0.0
# ============= LLM 相关 ============= # ============= LLM 相关 =============
# OpenAI SDK统一使用 OpenAI 格式调用 LLM # OpenAI SDK默认 LLM 后端
openai>=1.0.0 openai>=1.0.0
# Prompture可选— 多供应商 LLM 支持LM Studio, Ollama, Claude, Groq, Kimi 等
# Install for multi-provider support: pip install prompture
# https://github.com/jhd3197/prompture
# prompture>=0.1.0
# ============= Zep Cloud ============= # ============= Zep Cloud =============
zep-cloud==3.13.0 zep-cloud==3.13.0

View File

@ -0,0 +1,68 @@
"""
Quick test: MiroFish LLMClient LM Studio via Prompture
"""
import sys, os
# Add backend to path so we can import app modules
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
# Override env vars for LM Studio before Config loads
os.environ["LLM_MODEL_NAME"] = "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b"
os.environ["LLM_BASE_URL"] = "http://localhost:1234/v1"
os.environ["LLM_API_KEY"] = "lm-studio"
# Provide a dummy ZEP key so Config.validate() won't complain
os.environ.setdefault("ZEP_API_KEY", "dummy")
from app.utils.llm_client import LLMClient
def test_basic_chat():
print("=== Test 1: Basic chat ===")
client = LLMClient()
from app.utils.llm_client import _HAS_PROMPTURE
print(f" Backend: Prompture={_HAS_PROMPTURE}")
print(f" Model: {client.model}")
response = client.chat([
{"role": "system", "content": "You are a helpful assistant. Reply in one sentence."},
{"role": "user", "content": "What is social media simulation?"},
], temperature=0.5, max_tokens=256)
print(f" Response: {response[:300]}")
print()
def test_json_chat():
print("=== Test 2: JSON response ===")
client = LLMClient()
result = client.chat_json([
{"role": "system", "content": "You are a JSON-only assistant. Always respond with valid JSON."},
{"role": "user", "content": 'Return a JSON object with keys "platform" and "agents" (an integer). Example: {"platform":"twitter","agents":5}'},
], temperature=0.2, max_tokens=256)
print(f" Parsed JSON: {result}")
print(f" Type: {type(result)}")
print()
def test_multi_turn():
print("=== Test 3: Multi-turn conversation ===")
client = LLMClient()
r1 = client.chat([
{"role": "user", "content": "My name is MiroFish. Remember it."},
], max_tokens=128)
print(f" Turn 1: {r1[:200]}")
r2 = client.chat([
{"role": "user", "content": "My name is MiroFish. Remember it."},
{"role": "assistant", "content": r1},
{"role": "user", "content": "What is my name?"},
], max_tokens=128)
print(f" Turn 2: {r2[:200]}")
print()
if __name__ == "__main__":
print(f"Prompture installed: True")
print(f"LM Studio endpoint: http://localhost:1234/v1\n")
try:
test_basic_chat()
test_json_chat()
test_multi_turn()
print("All tests passed!")
except Exception as e:
print(f"ERROR: {e}")
import traceback; traceback.print_exc()