From 79edc615630126a286f854b580641759375b6d38 Mon Sep 17 00:00:00 2001
From: Juan Denis <13461850+jhd3197@users.noreply.github.com>
Date: Sat, 4 Apr 2026 01:18:04 -0400
Subject: [PATCH] feat: multi-provider LLM support via Prompture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add optional Prompture integration for 12+ LLM providers (LM Studio,
Ollama, Claude, Groq, Kimi/Moonshot, etc.) as a drop-in backend.
Zero breaking changes — falls back to the existing OpenAI SDK client
when Prompture is not installed.

- Rewrite llm_client.py with dual-backend architecture
- Update .env.example with provider/model format examples
- Add multi-provider table to README Quick Start section
- Add prompture as optional dependency in requirements.txt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .env.example                    |  44 +++++--
 README.md                       |  23 ++++
 backend/app/utils/llm_client.py | 218 ++++++++++++++++++++++++--------
 backend/requirements.txt        |   7 +-
 4 files changed, 225 insertions(+), 67 deletions(-)

diff --git a/.env.example b/.env.example
index 78a3b72c..8cfe1c32 100644
--- a/.env.example
+++ b/.env.example
@@ -1,16 +1,40 @@
-# LLM API配置（支持 OpenAI SDK 格式的任意 LLM API）
-# 推荐使用阿里百炼平台qwen-plus模型：https://bailian.console.aliyun.com/
-# 注意消耗较大，可先进行小于40轮的模拟尝试
+# ===== LLM API Configuration =====
+# Default: any OpenAI-compatible API
+# With Prompture installed (pip install prompture): 12+ providers supported
+#
+# ── OpenAI-compatible (default, no Prompture needed) ──
 LLM_API_KEY=your_api_key_here
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus
+#
+# ── With Prompture: use "provider/model" format ──
+# LM Studio (free, local):
+#   LLM_MODEL_NAME=lmstudio/local-model
+#   LLM_BASE_URL=http://localhost:1234/v1
+#   LLM_API_KEY=lm-studio
+#
+# Ollama (free, local):
+#   LLM_MODEL_NAME=ollama/llama3.1:8b
+#
+# Kimi / Moonshot:
+#   LLM_MODEL_NAME=moonshot/moonshot-v1-8k
+#   LLM_API_KEY=your_moonshot_key
+#
+# Claude:
+#   LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
+#   LLM_API_KEY=sk-ant-...
+#
+# Groq (fast, free tier):
+#   LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
+#   LLM_API_KEY=gsk_...
+#
+# See all providers: https://github.com/jhd3197/prompture#providers
 
-# ===== ZEP记忆图谱配置 =====
-# 每月免费额度即可支撑简单使用：https://app.getzep.com/
+# ===== ZEP Memory Graph =====
+# Free monthly quota: https://app.getzep.com/
 ZEP_API_KEY=your_zep_api_key_here
 
-# ===== 加速 LLM 配置（可选）=====
-# 注意如果不使用加速配置，env文件中就不要出现下面的配置项
-LLM_BOOST_API_KEY=your_api_key_here
-LLM_BOOST_BASE_URL=your_base_url_here
-LLM_BOOST_MODEL_NAME=your_model_name_here
\ No newline at end of file
+# ===== Boost LLM (optional) =====
+# LLM_BOOST_API_KEY=your_api_key_here
+# LLM_BOOST_BASE_URL=your_base_url_here
+# LLM_BOOST_MODEL_NAME=your_model_name_here
diff --git a/README.md b/README.md
index 4b8369f4..6f733128 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
 ZEP_API_KEY=your_zep_api_key
 ```
 
+#### Multi-Provider Support (Optional)
+
+Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
+
+```bash
+pip install prompture
+```
+
+Then use `"provider/model"` format in your `.env`:
+
+| Provider | `LLM_MODEL_NAME` | Cost |
+|---|---|---|
+| LM Studio | `lmstudio/local-model` | Free (local) |
+| Ollama | `ollama/llama3.1:8b` | Free (local) |
+| OpenAI | `openai/gpt-4o` | Paid |
+| Claude | `claude/claude-sonnet-4-20250514` | Paid |
+| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
+| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
+| Google | `google/gemini-1.5-pro` | Paid |
+| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
+
+> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
+
 #### 2. Install Dependencies
 
 ```bash
diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py
index 6c1a81f4..13f77fdc 100644
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@@ -1,103 +1,209 @@
 """
 LLM客户端封装
-统一使用OpenAI格式调用
+Supports two backends:
+  1. Prompture (optional) — 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
+  2. OpenAI SDK (default fallback) — any OpenAI-compatible API
+Install Prompture for multi-provider support: pip install prompture
 """
 
 import json
 import re
 from typing import Optional, Dict, Any, List
-from openai import OpenAI
 
 from ..config import Config
 
+# Try to import Prompture; fall back to OpenAI SDK if not installed
+try:
+    from prompture.agents import Conversation
+    from prompture.infra.provider_env import ProviderEnvironment
+    _HAS_PROMPTURE = True
+except ImportError:
+    _HAS_PROMPTURE = False
+
+if not _HAS_PROMPTURE:
+    from openai import OpenAI
+
+
+# Provider name → ProviderEnvironment field name
+_KEY_MAP = {
+    "openai": "openai_api_key",
+    "claude": "claude_api_key",
+    "google": "google_api_key",
+    "groq": "groq_api_key",
+    "grok": "grok_api_key",
+    "openrouter": "openrouter_api_key",
+    "moonshot": "moonshot_api_key",
+}
+
 
 class LLMClient:
-    """LLM客户端"""
-    
+    """LLM客户端
+
+    When Prompture is installed, ``model`` accepts the ``"provider/model"``
+    format for multi-provider support::
+
+        "lmstudio/local-model"        → LM Studio (free, local)
+        "ollama/llama3.1:8b"          → Ollama (free, local)
+        "openai/gpt-4o"               → OpenAI
+        "claude/claude-sonnet-4-20250514"     → Anthropic
+        "moonshot/moonshot-v1-8k"     → Kimi / Moonshot
+        "groq/llama-3.1-70b"          → Groq
+
+    Without Prompture, the original OpenAI SDK backend is used (any
+    OpenAI-compatible API via LLM_BASE_URL).
+    """
+
     def __init__(
         self,
         api_key: Optional[str] = None,
         base_url: Optional[str] = None,
-        model: Optional[str] = None
+        model: Optional[str] = None,
     ):
         self.api_key = api_key or Config.LLM_API_KEY
         self.base_url = base_url or Config.LLM_BASE_URL
         self.model = model or Config.LLM_MODEL_NAME
-        
+
+        if _HAS_PROMPTURE:
+            self._init_prompture()
+        else:
+            self._init_openai()
+
+    # ── Prompture backend ──────────────────────────────────────────
+
+    def _init_prompture(self):
+        env_kwargs: Dict[str, Any] = {}
+        if self.api_key:
+            provider = self.model.split("/")[0] if "/" in self.model else "openai"
+            env_field = _KEY_MAP.get(provider)
+            if env_field:
+                env_kwargs[env_field] = self.api_key
+
+        self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
+        self._driver_options: Dict[str, Any] = {}
+        if self.base_url:
+            self._driver_options["base_url"] = self.base_url
+
+    def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
+        opts: Dict[str, Any] = {
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            **self._driver_options,
+        }
+        return Conversation(self.model, options=opts, env=self._env)
+
+    # ── OpenAI fallback backend ────────────────────────────────────
+
+    def _init_openai(self):
         if not self.api_key:
             raise ValueError("LLM_API_KEY 未配置")
-        
-        self.client = OpenAI(
-            api_key=self.api_key,
-            base_url=self.base_url
-        )
-    
+        self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
+
+    # ── Public API ─────────────────────────────────────────────────
+
     def chat(
         self,
         messages: List[Dict[str, str]],
         temperature: float = 0.7,
         max_tokens: int = 4096,
-        response_format: Optional[Dict] = None
+        response_format: Optional[Dict] = None,
     ) -> str:
         """
         发送聊天请求
-        
+
         Args:
             messages: 消息列表
             temperature: 温度参数
             max_tokens: 最大token数
             response_format: 响应格式（如JSON模式）
-            
+
         Returns:
             模型响应文本
         """
+        if _HAS_PROMPTURE:
+            content = self._chat_prompture(messages, temperature, max_tokens)
+        else:
+            content = self._chat_openai(messages, temperature, max_tokens, response_format)
+
+        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
+        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
+        return content
+
+    def chat_json(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float = 0.3,
+        max_tokens: int = 4096,
+    ) -> Dict[str, Any]:
+        """
+        发送聊天请求并返回JSON
+
+        Args:
+            messages: 消息列表
+            temperature: 温度参数
+            max_tokens: 最大token数
+
+        Returns:
+            解析后的JSON对象
+        """
+        if _HAS_PROMPTURE:
+            response = self._chat_prompture(messages, temperature, max_tokens)
+        else:
+            response = self._chat_openai(
+                messages, temperature, max_tokens,
+                response_format={"type": "json_object"},
+            )
+
+        # 清理markdown代码块标记
+        cleaned = response.strip()
+        cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
+        cleaned = re.sub(r'\n?```\s*$', '', cleaned)
+        cleaned = cleaned.strip()
+
+        try:
+            return json.loads(cleaned)
+        except json.JSONDecodeError:
+            raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
+
+    # ── Private: Prompture path ────────────────────────────────────
+
+    def _chat_prompture(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float,
+        max_tokens: int,
+    ) -> str:
+        conv = self._make_conversation(temperature, max_tokens)
+
+        # Inject system prompt
+        system_parts = [m["content"] for m in messages if m["role"] == "system"]
+        if system_parts:
+            conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
+
+        # Replay prior turns
+        non_system = [m for m in messages if m["role"] != "system"]
+        for msg in non_system[:-1]:
+            conv._messages.append({"role": msg["role"], "content": msg["content"]})
+
+        prompt = non_system[-1]["content"] if non_system else ""
+        return conv.ask(prompt)
+
+    # ── Private: OpenAI fallback path ──────────────────────────────
+
+    def _chat_openai(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float,
+        max_tokens: int,
+        response_format: Optional[Dict] = None,
+    ) -> str:
         kwargs = {
             "model": self.model,
             "messages": messages,
             "temperature": temperature,
             "max_tokens": max_tokens,
         }
-        
         if response_format:
             kwargs["response_format"] = response_format
-        
+
         response = self.client.chat.completions.create(**kwargs)
-        content = response.choices[0].message.content
-        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
-        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
-        return content
-    
-    def chat_json(
-        self,
-        messages: List[Dict[str, str]],
-        temperature: float = 0.3,
-        max_tokens: int = 4096
-    ) -> Dict[str, Any]:
-        """
-        发送聊天请求并返回JSON
-        
-        Args:
-            messages: 消息列表
-            temperature: 温度参数
-            max_tokens: 最大token数
-            
-        Returns:
-            解析后的JSON对象
-        """
-        response = self.chat(
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            response_format={"type": "json_object"}
-        )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
-
-        try:
-            return json.loads(cleaned_response)
-        except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
-
+        return response.choices[0].message.content
diff --git a/backend/requirements.txt b/backend/requirements.txt
index 4f146296..d6777ea0 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -10,9 +10,14 @@ flask>=3.0.0
 flask-cors>=6.0.0
 
 # ============= LLM 相关 =============
-# OpenAI SDK（统一使用 OpenAI 格式调用 LLM）
+# OpenAI SDK（默认 LLM 后端）
 openai>=1.0.0
 
+# Prompture（可选）— 多供应商 LLM 支持：LM Studio, Ollama, Claude, Groq, Kimi 等
+# Install for multi-provider support: pip install prompture
+# https://github.com/jhd3197/prompture
+# prompture>=0.1.0
+
 # ============= Zep Cloud =============
 zep-cloud==3.13.0