diff --git a/.env.example b/.env.example
index 8892299a..cde7ba52 100644
--- a/.env.example
+++ b/.env.example
@@ -1,19 +1,44 @@
-# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API)
-# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/
-# 注意消耗较大,可先进行小于40轮的模拟尝试
+# ===== LLM API Configuration =====
+# Default: any OpenAI-compatible API
+# With Prompture installed (pip install prompture): 12+ providers supported
+#
+# ── OpenAI-compatible (default, no Prompture needed) ──
LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus
+#
+# ── With Prompture: use "provider/model" format ──
+# LM Studio (free, local):
+# LLM_MODEL_NAME=lmstudio/local-model
+# LLM_BASE_URL=http://localhost:1234/v1
+# LLM_API_KEY=lm-studio
+#
+# Ollama (free, local):
+# LLM_MODEL_NAME=ollama/llama3.1:8b
+#
+# Kimi / Moonshot:
+# LLM_MODEL_NAME=moonshot/moonshot-v1-8k
+# LLM_API_KEY=your_moonshot_key
+#
+# Claude:
+# LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
+# LLM_API_KEY=sk-ant-...
+#
+# Groq (fast, free tier):
+# LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
+# LLM_API_KEY=gsk_...
+#
+# See all providers: https://github.com/jhd3197/prompture#providers
-# ===== ZEP记忆图谱配置 =====
-# 每月免费额度即可支撑简单使用:https://app.getzep.com/
+# ===== ZEP Memory Graph =====
+# Free monthly quota: https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here
-# ===== 加速 LLM 配置(可选)=====
-# 注意如果不使用加速配置,env文件中就不要出现下面的配置项
-LLM_BOOST_API_KEY=your_api_key_here
-LLM_BOOST_BASE_URL=your_base_url_here
-LLM_BOOST_MODEL_NAME=your_model_name_here
-# ===== 前端API超时配置(可选)=====
-# 本地大模型响应较慢时可以增加此值(毫秒)
-# VITE_API_TIMEOUT=600000 # 10分钟
+# ===== Boost LLM (optional) =====
+# LLM_BOOST_API_KEY=your_api_key_here
+# LLM_BOOST_BASE_URL=your_base_url_here
+# LLM_BOOST_MODEL_NAME=your_model_name_here
+
+# ===== Frontend API timeout (optional) =====
+# Increase this value for slow local LLMs (milliseconds)
+# VITE_API_TIMEOUT=600000 # 10 minutes
diff --git a/README.md b/README.md
index 4b8369f4..6f733128 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
ZEP_API_KEY=your_zep_api_key
```
+#### Multi-Provider Support (Optional)
+
+Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
+
+```bash
+pip install prompture
+```
+
+Then use `"provider/model"` format in your `.env`:
+
+| Provider | `LLM_MODEL_NAME` | Cost |
+|---|---|---|
+| LM Studio | `lmstudio/local-model` | Free (local) |
+| Ollama | `ollama/llama3.1:8b` | Free (local) |
+| OpenAI | `openai/gpt-4o` | Paid |
+| Claude | `claude/claude-sonnet-4-20250514` | Paid |
+| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
+| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
+| Google | `google/gemini-1.5-pro` | Paid |
+| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
+
+> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
+
#### 2. Install Dependencies
```bash
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index e332eebb..b2a68507 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -1,104 +1,209 @@
"""
LLM客户端封装
-统一使用OpenAI格式调用
+Supports two backends:
+ 1. Prompture (optional) — 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
+ 2. OpenAI SDK (default fallback) — any OpenAI-compatible API
+Install Prompture for multi-provider support: pip install prompture
"""
import json
import re
from typing import Optional, Dict, Any, List
-from openai import OpenAI
from ..config import Config
+# Try to import Prompture; fall back to OpenAI SDK if not installed
+try:
+ from prompture.agents import Conversation
+ from prompture.infra.provider_env import ProviderEnvironment
+ from prompture.extraction.tools import strip_think_tags, clean_json_text
+ _HAS_PROMPTURE = True
+except ImportError:
+ _HAS_PROMPTURE = False
+
+if not _HAS_PROMPTURE:
+ from openai import OpenAI
+
+
+# Provider name → ProviderEnvironment field name
+_KEY_MAP = {
+ "openai": "openai_api_key",
+ "claude": "claude_api_key",
+ "google": "google_api_key",
+ "groq": "groq_api_key",
+ "grok": "grok_api_key",
+ "openrouter": "openrouter_api_key",
+ "moonshot": "moonshot_api_key",
+}
+
class LLMClient:
- """LLM客户端"""
-
+ """LLM客户端
+
+ When Prompture is installed, ``model`` accepts the ``"provider/model"``
+ format for multi-provider support::
+
+ "lmstudio/local-model" → LM Studio (free, local)
+ "ollama/llama3.1:8b" → Ollama (free, local)
+ "openai/gpt-4o" → OpenAI
+ "claude/claude-sonnet-4-20250514" → Anthropic
+ "moonshot/moonshot-v1-8k" → Kimi / Moonshot
+ "groq/llama-3.1-70b" → Groq
+
+ Without Prompture, the original OpenAI SDK backend is used (any
+ OpenAI-compatible API via LLM_BASE_URL).
+ """
+
def __init__(
self,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
- model: Optional[str] = None
+ model: Optional[str] = None,
):
self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL
self.model = model or Config.LLM_MODEL_NAME
-
+
+ if _HAS_PROMPTURE:
+ self._init_prompture()
+ else:
+ self._init_openai()
+
+ # ── Prompture backend ──────────────────────────────────────────
+
+ def _init_prompture(self):
+ env_kwargs: Dict[str, Any] = {}
+ if self.api_key:
+ provider = self.model.split("/")[0] if "/" in self.model else "openai"
+ env_field = _KEY_MAP.get(provider)
+ if env_field:
+ env_kwargs[env_field] = self.api_key
+
+ self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
+ self._driver_options: Dict[str, Any] = {}
+ if self.base_url:
+ self._driver_options["base_url"] = self.base_url
+
+ def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
+ opts: Dict[str, Any] = {
+ "temperature": temperature,
+ "max_tokens": max_tokens,
+ **self._driver_options,
+ }
+ return Conversation(self.model, options=opts, env=self._env)
+
+ # ── OpenAI fallback backend ────────────────────────────────────
+
+ def _init_openai(self):
if not self.api_key:
raise ValueError("LLM_API_KEY 未配置")
-
- self.client = OpenAI(
- api_key=self.api_key,
- base_url=self.base_url
- )
-
+ self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
+
+ # ── Public API ─────────────────────────────────────────────────
+
def chat(
self,
messages: List[Dict[str, str]],
temperature: float = 0.7,
max_tokens: int = 4096,
- response_format: Optional[Dict] = None
+ response_format: Optional[Dict] = None,
) -> str:
"""
发送聊天请求
-
+
Args:
messages: 消息列表
temperature: 温度参数
max_tokens: 最大token数
response_format: 响应格式(如JSON模式)
-
+
Returns:
模型响应文本
"""
+ if _HAS_PROMPTURE:
+ content = self._chat_prompture(messages, temperature, max_tokens)
+ return strip_think_tags(content)
+ else:
+ content = self._chat_openai(messages, temperature, max_tokens, response_format)
+ # Fallback: strip think tags with regex when Prompture is not available
+ return re.sub(r'[\s\S]*?', '', content).strip()
+
+ def chat_json(
+ self,
+ messages: List[Dict[str, str]],
+ temperature: float = 0.3,
+ max_tokens: int = 4096,
+ ) -> Dict[str, Any]:
+ """
+ 发送聊天请求并返回JSON
+
+ Args:
+ messages: 消息列表
+ temperature: 温度参数
+ max_tokens: 最大token数
+
+ Returns:
+ 解析后的JSON对象
+ """
+ if _HAS_PROMPTURE:
+ response = self._chat_prompture(messages, temperature, max_tokens)
+ # Prompture's clean_json_text strips think tags + markdown fences
+ cleaned = clean_json_text(response)
+ else:
+ response = self._chat_openai(
+ messages, temperature, max_tokens
+ )
+ # Fallback cleaning when Prompture is not available
+ cleaned = re.sub(r'[\s\S]*?', '', response).strip()
+ cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
+ cleaned = re.sub(r'\n?```\s*$', '', cleaned)
+ cleaned = cleaned.strip()
+
+ try:
+ return json.loads(cleaned)
+ except json.JSONDecodeError:
+ raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
+
+ # ── Private: Prompture path ────────────────────────────────────
+
+ def _chat_prompture(
+ self,
+ messages: List[Dict[str, str]],
+ temperature: float,
+ max_tokens: int,
+ ) -> str:
+ conv = self._make_conversation(temperature, max_tokens)
+
+ # Inject system prompt
+ system_parts = [m["content"] for m in messages if m["role"] == "system"]
+ if system_parts:
+ conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
+
+ # Replay prior turns
+ non_system = [m for m in messages if m["role"] != "system"]
+ for msg in non_system[:-1]:
+ conv._messages.append({"role": msg["role"], "content": msg["content"]})
+
+ prompt = non_system[-1]["content"] if non_system else ""
+ return conv.ask(prompt)
+
+ # ── Private: OpenAI fallback path ──────────────────────────────
+
+ def _chat_openai(
+ self,
+ messages: List[Dict[str, str]],
+ temperature: float,
+ max_tokens: int,
+ response_format: Optional[Dict] = None,
+ ) -> str:
kwargs = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
}
-
if response_format:
kwargs["response_format"] = response_format
-
+
response = self.client.chat.completions.create(**kwargs)
- content = response.choices[0].message.content
- # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除
- content = re.sub(r'[\s\S]*?', '', content).strip()
- return content
-
- def chat_json(
- self,
- messages: List[Dict[str, str]],
- temperature: float = 0.3,
- max_tokens: int = 4096
- ) -> Dict[str, Any]:
- """
- 发送聊天请求并返回JSON
-
- Args:
- messages: 消息列表
- temperature: 温度参数
- max_tokens: 最大token数
-
- Returns:
- 解析后的JSON对象
- """
- response = self.chat(
- messages=messages,
- temperature=temperature,
- max_tokens=max_tokens,
- # 不設 response_format 以相容 LM Studio / Ollama 等本地模型
- # 依賴 prompt 中的 JSON 指示 + 下方的 markdown 清理邏輯
- )
- # 清理markdown代码块标记
- cleaned_response = response.strip()
- cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
- cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
- cleaned_response = cleaned_response.strip()
-
- try:
- return json.loads(cleaned_response)
- except json.JSONDecodeError:
- raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
-
+ return response.choices[0].message.content
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 4f146296..d6777ea0 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -10,9 +10,14 @@ flask>=3.0.0
flask-cors>=6.0.0
# ============= LLM 相关 =============
-# OpenAI SDK(统一使用 OpenAI 格式调用 LLM)
+# OpenAI SDK(默认 LLM 后端)
openai>=1.0.0
+# Prompture(可选)— 多供应商 LLM 支持:LM Studio, Ollama, Claude, Groq, Kimi 等
+# Install for multi-provider support: pip install prompture
+# https://github.com/jhd3197/prompture
+# prompture>=0.1.0
+
# ============= Zep Cloud =============
zep-cloud==3.13.0
diff --git a/backend/scripts/test_lmstudio.py b/backend/scripts/test_lmstudio.py
new file mode 100644
index 00000000..dd47163a
--- /dev/null
+++ b/backend/scripts/test_lmstudio.py
@@ -0,0 +1,68 @@
+"""
+Quick test: MiroFish LLMClient → LM Studio via Prompture
+"""
+import sys, os
+
+# Add backend to path so we can import app modules
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+# Override env vars for LM Studio before Config loads
+os.environ["LLM_MODEL_NAME"] = "lmstudio/deepseek/deepseek-r1-0528-qwen3-8b"
+os.environ["LLM_BASE_URL"] = "http://localhost:1234/v1"
+os.environ["LLM_API_KEY"] = "lm-studio"
+# Provide a dummy ZEP key so Config.validate() won't complain
+os.environ.setdefault("ZEP_API_KEY", "dummy")
+
+from app.utils.llm_client import LLMClient
+
+def test_basic_chat():
+ print("=== Test 1: Basic chat ===")
+ client = LLMClient()
+ from app.utils.llm_client import _HAS_PROMPTURE
+ print(f" Backend: Prompture={_HAS_PROMPTURE}")
+ print(f" Model: {client.model}")
+ response = client.chat([
+ {"role": "system", "content": "You are a helpful assistant. Reply in one sentence."},
+ {"role": "user", "content": "What is social media simulation?"},
+ ], temperature=0.5, max_tokens=256)
+ print(f" Response: {response[:300]}")
+ print()
+
+def test_json_chat():
+ print("=== Test 2: JSON response ===")
+ client = LLMClient()
+ result = client.chat_json([
+ {"role": "system", "content": "You are a JSON-only assistant. Always respond with valid JSON."},
+ {"role": "user", "content": 'Return a JSON object with keys "platform" and "agents" (an integer). Example: {"platform":"twitter","agents":5}'},
+ ], temperature=0.2, max_tokens=256)
+ print(f" Parsed JSON: {result}")
+ print(f" Type: {type(result)}")
+ print()
+
+def test_multi_turn():
+ print("=== Test 3: Multi-turn conversation ===")
+ client = LLMClient()
+ r1 = client.chat([
+ {"role": "user", "content": "My name is MiroFish. Remember it."},
+ ], max_tokens=128)
+ print(f" Turn 1: {r1[:200]}")
+
+ r2 = client.chat([
+ {"role": "user", "content": "My name is MiroFish. Remember it."},
+ {"role": "assistant", "content": r1},
+ {"role": "user", "content": "What is my name?"},
+ ], max_tokens=128)
+ print(f" Turn 2: {r2[:200]}")
+ print()
+
+if __name__ == "__main__":
+ print(f"Prompture installed: True")
+ print(f"LM Studio endpoint: http://localhost:1234/v1\n")
+ try:
+ test_basic_chat()
+ test_json_chat()
+ test_multi_turn()
+ print("All tests passed!")
+ except Exception as e:
+ print(f"ERROR: {e}")
+ import traceback; traceback.print_exc()