Replace Zep with local graph backend

2026-04-22 17:14:42 +08:00 · 2026-04-22 17:14:42 +08:00 · d0a65067b4
parent fa0f6519b1
commit d0a65067b4
43 changed files with 4622 additions and 217 deletions
--- a/.env.example
+++ b/.env.example
@ -4,13 +4,54 @@
 LLM_API_KEY=your_api_key_here
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus
+LLM_CONTEXT_WINDOW=8192
+LLM_MAX_CONCURRENCY=1
+LLM_JSON_MAX_RETRIES=2
+ONTOLOGY_MAX_OUTPUT_TOKENS=2048
+ONTOLOGY_PROMPT_MARGIN_TOKENS=512
+ONTOLOGY_MAX_CHUNKS=8
+LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS=2048
+LOCAL_ZEP_EXTRACT_MAX_RETRIES=2

-# ===== ZEP记忆图谱配置 =====
-# 每月免费额度即可支撑简单使用：https://app.getzep.com/
-ZEP_API_KEY=your_zep_api_key_here
+# ===== Local Zep / Embeddings 配置 =====
+# 使用 OpenAI-compatible embeddings 接口，vLLM 可作为首选后端
+EMBEDDING_API_KEY=local-embedding-key
+EMBEDDING_BASE_URL=http://localhost:8001/v1
+EMBEDDING_MODEL_NAME=your_embedding_model_here
+
+# 可选：cross_encoder reranker 接口；不配置时本地图谱会自动回退到 RRF
+RERANKER_API_KEY=local-reranker-key
+RERANKER_BASE_URL=http://localhost:8002/v1
+RERANKER_MODEL_NAME=your_reranker_model_here
+LOCAL_ZEP_RERANK_TOP_K=50
+
+# 本地图谱 SQLite 路径（可选）
+LOCAL_ZEP_DB_PATH=backend/data/local_zep.sqlite3
+
+# Optional: Python executable for the original OASIS/CAMEL simulation runner.
+# Use this when the backend runs on Python 3.13 but OASIS is installed in a Python 3.11 venv.
+# OASIS_PYTHON=/absolute/path/to/oasis-venv/bin/python
+
+# ===== Tailscale / Remote Access（可选）=====
+# 前端开发模式默认会通过 /api 代理访问后端，适合直接用 Tailscale 访问 http://<tailnet-host>:3000
+VITE_API_BASE_URL=
+VITE_DEV_HOST=0.0.0.0
+VITE_DEV_PORT=3000
+VITE_DEV_PROXY_TARGET=http://127.0.0.1:5001
+VITE_ALLOWED_HOSTS=localhost,127.0.0.1,.ts.net,.beta.tailscale.net
+
+# 如果通过 MagicDNS 访问时 HMR 需要显式指定，可取消注释
+# VITE_HMR_HOST=your-machine.your-tailnet.ts.net
+# VITE_HMR_CLIENT_PORT=3000
+
+FLASK_HOST=0.0.0.0
+FLASK_PORT=5001
+ENABLE_PROXY_FIX=false
+PUBLIC_BASE_URL=
+TAILSCALE_URL=

 # ===== 加速 LLM 配置（可选）=====
 # 注意如果不使用加速配置，env文件中就不要出现下面的配置项
 LLM_BOOST_API_KEY=your_api_key_here
 LLM_BOOST_BASE_URL=your_base_url_here
-LLM_BOOST_MODEL_NAME=your_model_name_here
+LLM_BOOST_MODEL_NAME=your_model_name_here
--- a/.gitignore
+++ b/.gitignore
@ -9,6 +9,7 @@ Thumbs.db
 .env.development
 .env.test
 .env.production
+start.sh

 # Python
 __pycache__/
@ -18,6 +19,7 @@ __pycache__/
 .Python
 .venv/
 venv/
+venv11/
 ENV/
 .eggs/
 *.egg-info/
@ -57,4 +59,4 @@ backend/logs/
 backend/uploads/

 # Docker 数据
-data/
+data/
--- a/README-ZH.md
+++ b/README-ZH.md
@ -100,8 +100,8 @@ MiroFish 致力于打造映射现实的群体智能镜像，通过捕捉个体
 | 工具 | 版本要求 | 说明 | 安装检查 |
 |------|---------|------|---------|
 | **Node.js** | 18+ | 前端运行环境，包含 npm | `node -v` |
-| **Python** | ≥3.11, ≤3.12 | 后端运行环境 | `python --version` |
-| **uv** | 最新版 | Python 包管理器 | `uv --version` |
+| **Python** | ≥3.11 | 后端运行环境。API 与本地图谱后端支持 Python 3.13。 | `python --version` |
+| **Python 3.11** | 3.11.x | 可选但推荐给 OASIS/CAMEL 使用，因为 `camel-oasis==0.2.5` 声明 Python `<3.12`。 | `python3.11 --version` |

 #### 1. 配置环境变量

@ -121,10 +121,31 @@ cp .env.example .env
 LLM_API_KEY=your_api_key
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus
+LLM_CONTEXT_WINDOW=8192
+LLM_MAX_CONCURRENCY=1
+LLM_JSON_MAX_RETRIES=2
+ONTOLOGY_MAX_OUTPUT_TOKENS=2048
+ONTOLOGY_PROMPT_MARGIN_TOKENS=512
+LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS=2048
+LOCAL_ZEP_EXTRACT_MAX_RETRIES=2

-# Zep Cloud 配置
-# 每月免费额度即可支撑简单使用：https://app.getzep.com/
-ZEP_API_KEY=your_zep_api_key
+# 本地图谱 / Embeddings 配置
+# 使用 OpenAI-compatible embeddings 接口，推荐优先接 vLLM
+EMBEDDING_API_KEY=local-embedding-key
+EMBEDDING_BASE_URL=http://localhost:8001/v1
+EMBEDDING_MODEL_NAME=your_embedding_model
+
+# 可选 cross-encoder reranker 接口；不配置时本地图谱搜索自动回退到 RRF
+RERANKER_API_KEY=local-reranker-key
+RERANKER_BASE_URL=http://localhost:8002/v1
+RERANKER_MODEL_NAME=your_reranker_model
+LOCAL_ZEP_RERANK_TOP_K=50
+
+# 可选：本地图谱 SQLite 存储路径
+LOCAL_ZEP_DB_PATH=backend/data/local_zep.sqlite3
+
+# 可选：为原版 OASIS/CAMEL 脚本指定独立 Python 3.11 venv
+OASIS_PYTHON=/absolute/path/to/venv11/bin/python
 ```

 #### 2. 安装依赖
@ -144,6 +165,23 @@ npm run setup
 npm run setup:backend
 ```

+如果系统默认 `python3` 低于 3.11，可以手动用目标解释器创建后端 venv：
+
+```bash
+python3.13 -m venv venv
+./venv/bin/python -m pip install --upgrade pip
+./venv/bin/python -m pip install -r backend/requirements.txt
+```
+
+如果需要运行原版 OASIS/CAMEL 模拟引擎，建议单独创建 Python 3.11 venv，并在 `.env` 中设置 `OASIS_PYTHON`：
+
+```bash
+python3.11 -m venv venv11
+./venv11/bin/python -m pip install --upgrade pip
+./venv11/bin/python -m pip install -r backend/requirements.txt
+echo "OASIS_PYTHON=$(pwd)/venv11/bin/python" >> .env
+```
+
 #### 3. 启动服务

 ```bash
@ -162,6 +200,124 @@ npm run backend   # 仅启动后端
 npm run frontend  # 仅启动前端
 ```

+### 本地模型运行与参数
+
+MiroFish 不再依赖 Zep Cloud。本地图谱模块会把图数据存入 SQLite，并使用你提供的 OpenAI-compatible embedding 接口。Reranker 是可选项，只在图谱搜索请求 `cross_encoder` 时用于重排候选结果；未配置时会自动回退到本地 hybrid ranking / RRF。
+
+本地端点示例：
+
+```bash
+# 主 LLM，llama.cpp 示例
+llama-server \
+  -m /path/to/model.gguf \
+  --alias Qwen3.5-9B-VL \
+  --host 127.0.0.1 \
+  --port 8000 \
+  -c 16384 \
+  --jinja
+
+# Embeddings，vLLM 示例
+vllm serve /path/to/Qwen3-Embedding-0.6B \
+  --host 127.0.0.1 \
+  --port 8001 \
+  --runner pooling \
+  --max-model-len 8192
+
+# 可选 reranker，vLLM 示例
+vllm serve /path/to/Qwen3-Reranker-0.6B \
+  --host 127.0.0.1 \
+  --port 8002 \
+  --runner pooling \
+  --max-model-len 8192
+```
+
+已验证可用于 24GB 显存 GPU（RTX 3090 / RTX 4090 级别）的配置。路径已做匿名化处理：
+
+```bash
+# 主 llama.cpp LLM 端点
+./llama-server \
+  --mmproj /path/to/models/Qwen3.5-9B-gguf/mmproj-Qwen3.5-9B-BF16.gguf \
+  --alias Qwen3.5-9B-VL \
+  --host 127.0.0.1 \
+  --port 8000 \
+  -c 280000 \
+  -ngl auto \
+  --temp 0.7 \
+  --top-p 0.8 \
+  --top-k 20 \
+  --min-p 0.0 \
+  --jinja \
+  -m /path/to/models/Qwen3.5-9B-gguf/Qwen3.5-9B-Q6_K.gguf
+
+# Embedding 端点，实测峰值约 2-3GB 显存
+vllm serve /path/to/models/embedding/Qwen3-Embedding-0.6B \
+  --host 127.0.0.1 \
+  --port 8001 \
+  --runner pooling \
+  --gpu-memory-utilization 0.08 \
+  --max-model-len 8192 \
+  --quantization fp8 \
+  --kv-cache-dtype fp8
+
+# Reranker 端点，实测峰值约 2-3GB 显存
+vllm serve /path/to/models/embedding/Qwen3-Reranker-0.6B \
+  --host 127.0.0.1 \
+  --port 8002 \
+  --runner pooling \
+  --gpu-memory-utilization 0.09 \
+  --max-model-len 8192 \
+  --quantization fp8 \
+  --kv-cache-dtype fp8
+```
+
+对应 `.env`：
+
+```env
+LLM_BASE_URL=http://127.0.0.1:8000/v1
+LLM_MODEL_NAME=Qwen3.5-9B-VL
+LLM_CONTEXT_WINDOW=280000
+LLM_MAX_CONCURRENCY=1
+ONTOLOGY_MAX_OUTPUT_TOKENS=8192
+LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS=8192
+
+EMBEDDING_BASE_URL=http://127.0.0.1:8001/v1
+EMBEDDING_MODEL_NAME=/path/to/Qwen3-Embedding-0.6B
+
+RERANKER_BASE_URL=http://127.0.0.1:8002/v1
+RERANKER_MODEL_NAME=/path/to/Qwen3-Reranker-0.6B
+LOCAL_ZEP_RERANK_TOP_K=50
+```
+
+参数规则：
+- `LLM_CONTEXT_WINDOW` 必须和主 LLM 服务暴露的上下文长度一致，确保 `prompt tokens + max output tokens <= LLM_CONTEXT_WINDOW`。
+- 使用上面的 24GB 配置时，`LLM_CONTEXT_WINDOW=280000` 应与 `-c 280000` 保持一致。16k 上下文模型建议 `ONTOLOGY_MAX_OUTPUT_TOKENS=8192`，大约保留一半窗口给输出；8k 端点建议使用 `2048` 到 `4096`。
+- 单个 llama.cpp/vLLM 主端点建议 `LLM_MAX_CONCURRENCY=1`，除非确认服务端可以稳定并发处理 chat completions。
+- Embedding 维度不需要手动配置。本地图谱会记录模型返回的向量；同一个图谱数据库应保持使用同一个 embedding 模型。
+- Reranker 与 embedding 是不同模型。Embedding 负责图谱索引和语义检索；reranker 只负责对候选搜索结果重排。
+- `.env`、`start.sh`、`venv/`、`venv11/`、上传的模拟数据和本地图谱数据库都会被 git 忽略。
+
+### Tailscale 访问
+
+前端现在默认走同源 `/api`，不再默认写死 `http://localhost:5001`。因此你从另一台 Tailscale 设备访问 Vite 开发服务器时，前端请求会继续命中宿主机后端，而不会错误地访问调用方自己的 localhost。
+
+建议在项目根目录 `.env` 中加入：
+
+```env
+VITE_DEV_HOST=0.0.0.0
+VITE_DEV_PORT=3000
+VITE_DEV_PROXY_TARGET=http://127.0.0.1:5001
+VITE_ALLOWED_HOSTS=localhost,127.0.0.1,.ts.net,.beta.tailscale.net
+FLASK_HOST=0.0.0.0
+FLASK_PORT=5001
+```
+
+然后从另一台 Tailscale 设备访问：
+
+- 前端：`http://<你的 tailnet 主机名>:3000`
+- 后端：`http://<你的 tailnet 主机名>:5001`
+
+如果你通过 `tailscale serve` 或 `tailscale funnel` 暴露后端，建议设置 `ENABLE_PROXY_FIX=true`。如果通过 MagicDNS 访问时 HMR 连接不稳定，再额外设置 `VITE_HMR_HOST` 为你的 Tailscale 主机名。
+
 ### 二、Docker 部署

 ```bash
--- a/README.md
+++ b/README.md
@ -100,8 +100,8 @@ Click the image to watch MiroFish's deep prediction of the lost ending based on
 | Tool | Version | Description | Check Installation |
 |------|---------|-------------|-------------------|
 | **Node.js** | 18+ | Frontend runtime, includes npm | `node -v` |
-| **Python** | ≥3.11, ≤3.12 | Backend runtime | `python --version` |
-| **uv** | Latest | Python package manager | `uv --version` |
+| **Python** | ≥3.11 | Backend runtime. Python 3.13 is supported for the API and local graph backend. | `python --version` |
+| **Python 3.11** | 3.11.x | Optional but recommended for OASIS/CAMEL, because `camel-oasis==0.2.5` declares Python `<3.12`. | `python3.11 --version` |

 #### 1. Configure Environment Variables

@ -121,10 +121,31 @@ cp .env.example .env
 LLM_API_KEY=your_api_key
 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME=qwen-plus
+LLM_CONTEXT_WINDOW=8192
+LLM_MAX_CONCURRENCY=1
+LLM_JSON_MAX_RETRIES=2
+ONTOLOGY_MAX_OUTPUT_TOKENS=2048
+ONTOLOGY_PROMPT_MARGIN_TOKENS=512
+LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS=2048
+LOCAL_ZEP_EXTRACT_MAX_RETRIES=2

-# Zep Cloud Configuration
-# Free monthly quota is sufficient for simple usage: https://app.getzep.com/
-ZEP_API_KEY=your_zep_api_key
+# Local Graph / Embeddings Configuration
+# Use an OpenAI-compatible embeddings endpoint. vLLM is a good first option.
+EMBEDDING_API_KEY=local-embedding-key
+EMBEDDING_BASE_URL=http://localhost:8001/v1
+EMBEDDING_MODEL_NAME=your_embedding_model
+
+# Optional cross-encoder reranker endpoint. If omitted, local graph search falls back to RRF.
+RERANKER_API_KEY=local-reranker-key
+RERANKER_BASE_URL=http://localhost:8002/v1
+RERANKER_MODEL_NAME=your_reranker_model
+LOCAL_ZEP_RERANK_TOP_K=50
+
+# Optional: choose where the local graph SQLite file is stored
+LOCAL_ZEP_DB_PATH=backend/data/local_zep.sqlite3
+
+# Optional: use a separate Python 3.11 venv for original OASIS/CAMEL scripts
+OASIS_PYTHON=/absolute/path/to/venv11/bin/python
 ```

 #### 2. Install Dependencies
@ -144,6 +165,23 @@ npm run setup
 npm run setup:backend
 ```

+If your default `python3` is older than 3.11, create the backend venv manually with the desired interpreter before running the backend:
+
+```bash
+python3.13 -m venv venv
+./venv/bin/python -m pip install --upgrade pip
+./venv/bin/python -m pip install -r backend/requirements.txt
+```
+
+For the original OASIS/CAMEL simulation engine, use a Python 3.11 venv and point `OASIS_PYTHON` at it:
+
+```bash
+python3.11 -m venv venv11
+./venv11/bin/python -m pip install --upgrade pip
+./venv11/bin/python -m pip install -r backend/requirements.txt
+echo "OASIS_PYTHON=$(pwd)/venv11/bin/python" >> .env
+```
+
 #### 3. Start Services

 ```bash
@ -162,6 +200,124 @@ npm run backend   # Start backend only
 npm run frontend  # Start frontend only
 ```

+### Local Model Runtime and Parameters
+
+MiroFish no longer requires Zep Cloud. The local graph module stores graph data in SQLite and uses your OpenAI-compatible embedding endpoint. A reranker endpoint is optional and is used only when graph search requests `cross_encoder`; otherwise search falls back to local hybrid ranking/RRF.
+
+Example local endpoints:
+
+```bash
+# Main LLM endpoint, llama.cpp example
+llama-server \
+  -m /path/to/model.gguf \
+  --alias Qwen3.5-9B-VL \
+  --host 127.0.0.1 \
+  --port 8000 \
+  -c 16384 \
+  --jinja
+
+# Embeddings endpoint, vLLM example
+vllm serve /path/to/Qwen3-Embedding-0.6B \
+  --host 127.0.0.1 \
+  --port 8001 \
+  --runner pooling \
+  --max-model-len 8192
+
+# Optional reranker endpoint, vLLM example
+vllm serve /path/to/Qwen3-Reranker-0.6B \
+  --host 127.0.0.1 \
+  --port 8002 \
+  --runner pooling \
+  --max-model-len 8192
+```
+
+Tested 24GB GPU profile (RTX 3090 / RTX 4090 class). Paths are intentionally anonymized:
+
+```bash
+# Main llama.cpp LLM endpoint
+./llama-server \
+  --mmproj /path/to/models/Qwen3.5-9B-gguf/mmproj-Qwen3.5-9B-BF16.gguf \
+  --alias Qwen3.5-9B-VL \
+  --host 127.0.0.1 \
+  --port 8000 \
+  -c 280000 \
+  -ngl auto \
+  --temp 0.7 \
+  --top-p 0.8 \
+  --top-k 20 \
+  --min-p 0.0 \
+  --jinja \
+  -m /path/to/models/Qwen3.5-9B-gguf/Qwen3.5-9B-Q6_K.gguf
+
+# Embedding endpoint. Observed peak is about 2-3GB VRAM.
+vllm serve /path/to/models/embedding/Qwen3-Embedding-0.6B \
+  --host 127.0.0.1 \
+  --port 8001 \
+  --runner pooling \
+  --gpu-memory-utilization 0.08 \
+  --max-model-len 8192 \
+  --quantization fp8 \
+  --kv-cache-dtype fp8
+
+# Reranker endpoint. Observed peak is about 2-3GB VRAM.
+vllm serve /path/to/models/embedding/Qwen3-Reranker-0.6B \
+  --host 127.0.0.1 \
+  --port 8002 \
+  --runner pooling \
+  --gpu-memory-utilization 0.09 \
+  --max-model-len 8192 \
+  --quantization fp8 \
+  --kv-cache-dtype fp8
+```
+
+Matching `.env` settings:
+
+```env
+LLM_BASE_URL=http://127.0.0.1:8000/v1
+LLM_MODEL_NAME=Qwen3.5-9B-VL
+LLM_CONTEXT_WINDOW=280000
+LLM_MAX_CONCURRENCY=1
+ONTOLOGY_MAX_OUTPUT_TOKENS=8192
+LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS=8192
+
+EMBEDDING_BASE_URL=http://127.0.0.1:8001/v1
+EMBEDDING_MODEL_NAME=/path/to/Qwen3-Embedding-0.6B
+
+RERANKER_BASE_URL=http://127.0.0.1:8002/v1
+RERANKER_MODEL_NAME=/path/to/Qwen3-Reranker-0.6B
+LOCAL_ZEP_RERANK_TOP_K=50
+```
+
+Parameter rules:
+- `LLM_CONTEXT_WINDOW` must match the context length exposed by the main LLM server. Keep `prompt tokens + max output tokens <= LLM_CONTEXT_WINDOW`.
+- For the 24GB profile above, set `LLM_CONTEXT_WINDOW=280000` to match `-c 280000`. For a `16k` context model, `ONTOLOGY_MAX_OUTPUT_TOKENS=8192` leaves roughly half the window for output. For an `8k` endpoint, use `2048` to `4096`.
+- Set `LLM_MAX_CONCURRENCY=1` for single llama.cpp/vLLM main endpoints unless you know the server can handle parallel chat completions.
+- Embedding dimension is not configured manually. The local graph records vectors from the embedding model; keep the same embedding model for one graph database.
+- The reranker is different from the embedding model. Embeddings are required for graph indexing and semantic search. The reranker is optional and only reorders candidate search results.
+- `.env`, `start.sh`, `venv/`, `venv11/`, uploaded simulations, and local graph databases are intentionally ignored by git.
+
+### Tailscale Access
+
+The frontend now defaults to same-origin `/api` instead of `http://localhost:5001`, so opening the Vite dev server from another device over Tailscale will still reach the backend running on the host machine.
+
+Recommended root `.env` settings:
+
+```env
+VITE_DEV_HOST=0.0.0.0
+VITE_DEV_PORT=3000
+VITE_DEV_PROXY_TARGET=http://127.0.0.1:5001
+VITE_ALLOWED_HOSTS=localhost,127.0.0.1,.ts.net,.beta.tailscale.net
+FLASK_HOST=0.0.0.0
+FLASK_PORT=5001
+```
+
+Then access from another Tailscale device at:
+
+- Frontend: `http://<your-tailnet-hostname>:3000`
+- Backend: `http://<your-tailnet-hostname>:5001`
+
+If you expose the backend through `tailscale serve` or `tailscale funnel`, set `ENABLE_PROXY_FIX=true`. If HMR does not reconnect correctly over MagicDNS, set `VITE_HMR_HOST` to your Tailscale hostname.
+
 ### Option 2: Docker Deployment

 ```bash
@ -200,4 +356,4 @@ MiroFish's simulation engine is powered by **[OASIS (Open Agent Social Interacti
   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=666ghj/MiroFish&type=date&legend=top-left" />
   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=666ghj/MiroFish&type=date&legend=top-left" />
 </picture>
-</a>
+</a>
--- a/backend/app/init.py
+++ b/backend/app/init.py
@ -20,6 +20,10 @@ def create_app(config_class=Config):
    """Flask应用工厂函数"""
    app = Flask(__name__)
    app.config.from_object(config_class)
+
+    if app.config.get('ENABLE_PROXY_FIX'):
+        from werkzeug.middleware.proxy_fix import ProxyFix
+        app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
    
    # 设置JSON编码：确保中文直接显示（而不是 \uXXXX 格式）
    # Flask >= 2.3 使用 app.json.ensure_ascii，旧版本使用 JSON_AS_ASCII 配置
@ -38,6 +42,8 @@ def create_app(config_class=Config):
        logger.info("=" * 50)
        logger.info("MiroFish Backend 启动中...")
        logger.info("=" * 50)
+        if app.config.get('ENABLE_PROXY_FIX'):
+            logger.info("已启用 ProxyFix（适用于 Tailscale Serve/Funnel 等反向代理）")
    
    # 启用CORS
    CORS(app, resources={r"/api/*": {"origins": "*"}})
@ -77,4 +83,3 @@ def create_app(config_class=Config):
        logger.info("MiroFish Backend 启动完成")
    
    return app
-
--- a/backend/app/api/graph.py
+++ b/backend/app/api/graph.py
@ -283,17 +283,6 @@ def build_graph():
    try:
        logger.info("=== 开始构建图谱 ===")
        
-        # 检查配置
-        errors = []
-        if not Config.ZEP_API_KEY:
-            errors.append(t('api.zepApiKeyMissing'))
-        if errors:
-            logger.error(f"配置错误: {errors}")
-            return jsonify({
-                "success": False,
-                "error": t('api.configError', details="; ".join(errors))
-            }), 500
-        
        # 解析请求
        data = request.get_json() or {}
        project_id = data.get('project_id')
@ -440,7 +429,7 @@ def build_graph():
                episode_uuids = builder.add_text_batches(
                    graph_id, 
                    chunks,
-                    batch_size=3,
+                    batch_size=1,
                    progress_callback=add_progress_callback
                )
                
@ -572,12 +561,6 @@ def get_graph_data(graph_id: str):
    获取图谱数据（节点和边）
    """
    try:
-        if not Config.ZEP_API_KEY:
-            return jsonify({
-                "success": False,
-                "error": t('api.zepApiKeyMissing')
-            }), 500
-        
        builder = GraphBuilderService(api_key=Config.ZEP_API_KEY)
        graph_data = builder.get_graph_data(graph_id)
        
@ -600,12 +583,6 @@ def delete_graph(graph_id: str):
    删除Zep图谱
    """
    try:
-        if not Config.ZEP_API_KEY:
-            return jsonify({
-                "success": False,
-                "error": t('api.zepApiKeyMissing')
-            }), 500
-        
        builder = GraphBuilderService(api_key=Config.ZEP_API_KEY)
        builder.delete_graph(graph_id)
        
--- a/backend/app/api/simulation.py
+++ b/backend/app/api/simulation.py
@ -57,12 +57,6 @@ def get_graph_entities(graph_id: str):
        enrich: 是否获取相关边信息（默认true）
    """
    try:
-        if not Config.ZEP_API_KEY:
-            return jsonify({
-                "success": False,
-                "error": t('api.zepApiKeyMissing')
-            }), 500
-        
        entity_types_str = request.args.get('entity_types', '')
        entity_types = [t.strip() for t in entity_types_str.split(',') if t.strip()] if entity_types_str else None
        enrich = request.args.get('enrich', 'true').lower() == 'true'
@ -94,12 +88,6 @@ def get_graph_entities(graph_id: str):
 def get_entity_detail(graph_id: str, entity_uuid: str):
    """获取单个实体的详细信息"""
    try:
-        if not Config.ZEP_API_KEY:
-            return jsonify({
-                "success": False,
-                "error": t('api.zepApiKeyMissing')
-            }), 500
-        
        reader = ZepEntityReader()
        entity = reader.get_entity_with_context(graph_id, entity_uuid)
        
@ -127,12 +115,6 @@ def get_entity_detail(graph_id: str, entity_uuid: str):
 def get_entities_by_type(graph_id: str, entity_type: str):
    """获取指定类型的所有实体"""
    try:
-        if not Config.ZEP_API_KEY:
-            return jsonify({
-                "success": False,
-                "error": t('api.zepApiKeyMissing')
-            }), 500
-        
        enrich = request.args.get('enrich', 'true').lower() == 'true'
        
        reader = ZepEntityReader()
@ -466,7 +448,7 @@ def prepare_simulation():
        
        entity_types_list = data.get('entity_types')
        use_llm_for_profiles = data.get('use_llm_for_profiles', True)
-        parallel_profile_count = data.get('parallel_profile_count', 5)
+        parallel_profile_count = data.get('parallel_profile_count', 1)
        
        # ========== 同步获取实体数量（在后台任务启动前） ==========
        # 这样前端在调用prepare后立即就能获取到预期Agent总数
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -31,9 +31,31 @@ class Config:
    LLM_API_KEY = os.environ.get('LLM_API_KEY')
    LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
    LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
+    LLM_CONTEXT_WINDOW = int(os.environ.get('LLM_CONTEXT_WINDOW', '8192'))
+    LLM_MAX_CONCURRENCY = int(os.environ.get('LLM_MAX_CONCURRENCY', '1'))
+    LLM_JSON_MAX_RETRIES = int(os.environ.get('LLM_JSON_MAX_RETRIES', '2'))
+    ONTOLOGY_MAX_OUTPUT_TOKENS = int(os.environ.get('ONTOLOGY_MAX_OUTPUT_TOKENS', '2048'))
+    ONTOLOGY_PROMPT_MARGIN_TOKENS = int(os.environ.get('ONTOLOGY_PROMPT_MARGIN_TOKENS', '512'))
+    ONTOLOGY_MAX_CHUNKS = int(os.environ.get('ONTOLOGY_MAX_CHUNKS', '8'))
+    LOCAL_ZEP_EXTRACT_MAX_RETRIES = int(os.environ.get('LOCAL_ZEP_EXTRACT_MAX_RETRIES', '2'))
+    LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS = int(os.environ.get('LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS', '2048'))
    
-    # Zep配置
-    ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
+    # Local graph / embeddings 配置
+    ZEP_API_KEY = os.environ.get('ZEP_API_KEY')  # deprecated, ignored by the local graph backend
+    EMBEDDING_API_KEY = os.environ.get('EMBEDDING_API_KEY', 'local-embedding-key')
+    EMBEDDING_BASE_URL = os.environ.get('EMBEDDING_BASE_URL')
+    EMBEDDING_MODEL_NAME = os.environ.get('EMBEDDING_MODEL_NAME')
+    RERANKER_API_KEY = os.environ.get('RERANKER_API_KEY', 'local-reranker-key')
+    RERANKER_BASE_URL = os.environ.get('RERANKER_BASE_URL')
+    RERANKER_MODEL_NAME = os.environ.get('RERANKER_MODEL_NAME')
+    LOCAL_ZEP_RERANK_TOP_K = int(os.environ.get('LOCAL_ZEP_RERANK_TOP_K', '50'))
+    LOCAL_ZEP_DB_PATH = os.environ.get(
+        'LOCAL_ZEP_DB_PATH',
+        os.path.join(os.path.dirname(__file__), '../data/local_zep.sqlite3')
+    )
+    PUBLIC_BASE_URL = os.environ.get('PUBLIC_BASE_URL')
+    TAILSCALE_URL = os.environ.get('TAILSCALE_URL')
+    ENABLE_PROXY_FIX = os.environ.get('ENABLE_PROXY_FIX', 'False').lower() == 'true'
    
    # 文件上传配置
    MAX_CONTENT_LENGTH = 50 * 1024 * 1024  # 50MB
@ -47,6 +69,7 @@ class Config:
    # OASIS模拟配置
    OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10'))
    OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations')
+    OASIS_PYTHON = os.environ.get('OASIS_PYTHON')
    
    # OASIS平台可用动作配置
    OASIS_TWITTER_ACTIONS = [
@ -69,7 +92,8 @@ class Config:
        errors = []
        if not cls.LLM_API_KEY:
            errors.append("LLM_API_KEY 未配置")
-        if not cls.ZEP_API_KEY:
-            errors.append("ZEP_API_KEY 未配置")
+        if not cls.EMBEDDING_BASE_URL:
+            errors.append("EMBEDDING_BASE_URL 未配置")
+        if not cls.EMBEDDING_MODEL_NAME:
+            errors.append("EMBEDDING_MODEL_NAME 未配置")
        return errors
-
--- a/backend/app/services/graph_builder.py
+++ b/backend/app/services/graph_builder.py
@ -45,9 +45,6 @@ class GraphBuilderService:
    
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or Config.ZEP_API_KEY
-        if not self.api_key:
-            raise ValueError("ZEP_API_KEY 未配置")
-        
        self.client = Zep(api_key=self.api_key)
        self.task_manager = TaskManager()
    
@ -503,4 +500,3 @@ class GraphBuilderService:
    def delete_graph(self, graph_id: str):
        """删除图谱"""
        self.client.graph.delete(graph_id=graph_id)
-
--- a/backend/app/services/oasis_profile_generator.py
+++ b/backend/app/services/oasis_profile_generator.py
@ -20,6 +20,7 @@ from zep_cloud.client import Zep

 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_gate import main_llm_slot
 from ..utils.locale import get_language_instruction, get_locale, set_locale, t
 from .zep_entity_reader import EntityNode, ZepEntityReader

@ -203,11 +204,10 @@ class OasisProfileGenerator:
        self.zep_client = None
        self.graph_id = graph_id
        
-        if self.zep_api_key:
-            try:
-                self.zep_client = Zep(api_key=self.zep_api_key)
-            except Exception as e:
-                logger.warning(f"Zep客户端初始化失败: {e}")
+        try:
+            self.zep_client = Zep(api_key=self.zep_api_key)
+        except Exception as e:
+            logger.warning(f"Zep客户端初始化失败: {e}")
    
    def generate_profile_from_entity(
        self, 
@ -527,16 +527,17 @@ class OasisProfileGenerator:
        
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": self._get_system_prompt(is_individual)},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
-                    # 不设置max_tokens，让LLM自由发挥
-                )
+                with main_llm_slot():
+                    response = self.client.chat.completions.create(
+                        model=self.model_name,
+                        messages=[
+                            {"role": "system", "content": self._get_system_prompt(is_individual)},
+                            {"role": "user", "content": prompt}
+                        ],
+                        response_format={"type": "json_object"},
+                        temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                        # 不设置max_tokens，让LLM自由发挥
+                    )
                
                content = response.choices[0].message.content
                
@ -950,6 +951,7 @@ class OasisProfileGenerator:
                )
                return idx, fallback_profile, str(e)
        
+        parallel_count = max(1, min(parallel_count, Config.LLM_MAX_CONCURRENCY))
        logger.info(f"开始并行生成 {total} 个Agent人设（并行数: {parallel_count}）...")
        print(f"\n{'='*60}")
        print(f"开始生成Agent人设 - 共 {total} 个实体，并行数: {parallel_count}")
@ -1202,4 +1204,3 @@ class OasisProfileGenerator:
        """[已废弃] 请使用 save_profiles() 方法"""
        logger.warning("save_profiles_to_json已废弃，请使用save_profiles方法")
        self.save_profiles(profiles, file_path, platform)
-
--- a/backend/app/services/ontology_generator.py
+++ b/backend/app/services/ontology_generator.py
@ -4,13 +4,24 @@
 """

 import json
-import logging
+import math
 import re
+import traceback
 from typing import Dict, Any, List, Optional
 from ..utils.llm_client import LLMClient
 from ..utils.locale import get_language_instruction
+from ..utils.logger import get_logger
+from ..config import Config

-logger = logging.getLogger(__name__)
+logger = get_logger('mirofish.ontology')
+
+
+def _estimate_tokens(text: str) -> int:
+    """Conservative token estimate for local vLLM context budgeting."""
+    text = text or ""
+    cjk_chars = len(re.findall(r'[\u3400-\u9fff\uf900-\ufaff]', text))
+    non_cjk_chars = len(text) - cjk_chars
+    return cjk_chars + math.ceil(non_cjk_chars / 4)


 def _to_pascal_case(name: str) -> str:
@ -199,51 +210,499 @@ class OntologyGenerator:
        Returns:
            本体定义（entity_types, edge_types等）
        """
-        # 构建用户消息
-        user_message = self._build_user_message(
-            document_texts, 
-            simulation_requirement,
-            additional_context
-        )
-        
        lang_instruction = get_language_instruction()
        system_prompt = f"{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above."
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_message}
-        ]
-        
-        # 调用LLM
-        result = self.llm_client.chat_json(
-            messages=messages,
-            temperature=0.3,
-            max_tokens=4096
+
+        chunks = self._build_document_chunks(
+            document_texts=document_texts,
+            simulation_requirement=simulation_requirement,
+            additional_context=additional_context,
+            system_prompt=system_prompt,
        )
+        fallback_ontology = self._document_aware_fallback(document_texts, simulation_requirement)
+        logger.info("Ontology generation split into %s LLM chunk(s)", len(chunks))
+
+        partial_results = []
+        for index, chunk in enumerate(chunks, start=1):
+            user_message = self._build_user_message(
+                [chunk],
+                simulation_requirement,
+                additional_context,
+                chunk_index=index,
+                chunk_count=len(chunks),
+            )
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_message}
+            ]
+
+            try:
+                raw_result = self.llm_client.chat_json(
+                    messages=messages,
+                    temperature=0.3,
+                    max_tokens=Config.ONTOLOGY_MAX_OUTPUT_TOKENS,
+                    max_retries=Config.LLM_JSON_MAX_RETRIES,
+                )
+                result = self._coerce_ontology_result(raw_result)
+                if self._has_usable_ontology(result):
+                    partial_results.append(result)
+                else:
+                    logger.warning(
+                        "Ontology LLM chunk %s/%s returned JSON without usable entity_types/edge_types: %s",
+                        index,
+                        len(chunks),
+                        str(raw_result)[:1000],
+                    )
+            except Exception as exc:
+                logger.error("Ontology LLM chunk %s/%s failed: %s", index, len(chunks), exc)
+                logger.debug(traceback.format_exc())
+
+        if partial_results:
+            result = self._merge_ontologies(partial_results, simulation_requirement)
+        else:
+            logger.error("All ontology LLM chunks failed, using document-aware fallback ontology")
+            result = fallback_ontology
        
        # 验证和后处理
-        result = self._validate_and_process(result)
+        result = self._validate_and_process(result, fill_ontology=fallback_ontology)
        
        return result
+
+    def _coerce_ontology_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """Accept common local-LLM schema variants and normalize to our shape."""
+        if not isinstance(result, dict):
+            return {}
+
+        for wrapper_key in ("ontology", "data", "result"):
+            wrapped = result.get(wrapper_key)
+            if isinstance(wrapped, dict):
+                result = wrapped
+                break
+
+        normalized = dict(result)
+        if "entity_types" not in normalized:
+            for key in ("entities", "entityTypes", "node_types", "nodeTypes", "nodes"):
+                if isinstance(normalized.get(key), list):
+                    normalized["entity_types"] = normalized[key]
+                    break
+
+        if "edge_types" not in normalized:
+            for key in ("relationships", "relations", "edges", "edgeTypes", "relation_types", "relationship_types"):
+                if isinstance(normalized.get(key), list):
+                    normalized["edge_types"] = normalized[key]
+                    break
+
+        normalized["entity_types"] = [
+            self._coerce_entity_type(item)
+            for item in normalized.get("entity_types", [])
+            if isinstance(item, dict)
+        ]
+        normalized["edge_types"] = [
+            self._coerce_edge_type(item)
+            for item in normalized.get("edge_types", [])
+            if isinstance(item, dict)
+        ]
+        return normalized
+
+    def _coerce_entity_type(self, item: Dict[str, Any]) -> Dict[str, Any]:
+        name = item.get("name") or item.get("type") or item.get("label")
+        attributes = item.get("attributes") if isinstance(item.get("attributes"), list) else []
+        return {
+            "name": name,
+            "description": item.get("description") or item.get("summary") or f"{name} entity.",
+            "attributes": attributes,
+            "examples": item.get("examples") if isinstance(item.get("examples"), list) else [],
+        }
+
+    def _coerce_edge_type(self, item: Dict[str, Any]) -> Dict[str, Any]:
+        name = item.get("name") or item.get("type") or item.get("relation")
+        source_targets = item.get("source_targets")
+        if not isinstance(source_targets, list):
+            source = item.get("source") or item.get("source_type")
+            target = item.get("target") or item.get("target_type")
+            source_targets = [{"source": source or "Person", "target": target or "Organization"}]
+        return {
+            "name": name,
+            "description": item.get("description") or item.get("summary") or f"{name} relation.",
+            "source_targets": source_targets,
+            "attributes": item.get("attributes") if isinstance(item.get("attributes"), list) else [],
+        }
+
+    def _has_usable_ontology(self, result: Dict[str, Any]) -> bool:
+        return bool(result.get("entity_types")) and bool(result.get("edge_types"))
+
+    def _document_aware_fallback(
+        self,
+        document_texts: List[str],
+        simulation_requirement: str,
+    ) -> Dict[str, Any]:
+        corpus = "\n".join(document_texts or [])[:50000]
+        lower = corpus.lower()
+        fate_markers = [
+            "fate/grand order",
+            "lostbelt",
+            "阿瓦隆",
+            "勒",
+            "妖精",
+            "摩根",
+            "迦勒底",
+            "不列顛",
+            "奧伯龍",
+            "科爾努諾斯",
+        ]
+        if any(marker in lower or marker in corpus for marker in fate_markers):
+            return {
+                "entity_types": [
+                    {
+                        "name": "FictionalCharacter",
+                        "description": "Named story character or role in the Lostbelt narrative.",
+                        "attributes": [{"name": "role", "type": "text", "description": "Narrative role"}],
+                        "examples": ["Morgan", "Artoria Caster"],
+                    },
+                    {
+                        "name": "Faction",
+                        "description": "Political, military, or social group in the setting.",
+                        "attributes": [{"name": "alignment", "type": "text", "description": "Faction alignment"}],
+                        "examples": ["Chaldea", "Round Table"],
+                    },
+                    {
+                        "name": "FairyClan",
+                        "description": "Fairy clan or species group in Britain.",
+                        "attributes": [{"name": "clan_role", "type": "text", "description": "Clan role"}],
+                        "examples": ["Wind clan", "Fang clan"],
+                    },
+                    {
+                        "name": "Kingdom",
+                        "description": "Realm, court, or governing power.",
+                        "attributes": [{"name": "ruler", "type": "text", "description": "Known ruler"}],
+                        "examples": ["Camelot", "Fairy Britain"],
+                    },
+                    {
+                        "name": "Deity",
+                        "description": "Godlike or mythic entity affecting events.",
+                        "attributes": [{"name": "domain", "type": "text", "description": "Mythic domain"}],
+                        "examples": ["Cernunnos"],
+                    },
+                    {
+                        "name": "Location",
+                        "description": "Named place or region in the chronology.",
+                        "attributes": [{"name": "region_type", "type": "text", "description": "Place category"}],
+                        "examples": ["Avalon", "Britain"],
+                    },
+                    {
+                        "name": "NarrativeEvent",
+                        "description": "Major battle, calamity, or turning point.",
+                        "attributes": [{"name": "era", "type": "text", "description": "Era or time marker"}],
+                        "examples": ["Great Calamity", "Queen Morgan battle"],
+                    },
+                    {
+                        "name": "SourceMaterial",
+                        "description": "Official or community source cited by the report.",
+                        "attributes": [{"name": "source_type", "type": "text", "description": "Source category"}],
+                        "examples": ["Road to 7", "official soundtrack"],
+                    },
+                    {
+                        "name": "Person",
+                        "description": "Any individual not fitting other specific person types.",
+                        "attributes": [{"name": "full_name", "type": "text", "description": "Full name"}],
+                        "examples": ["writer", "commentator"],
+                    },
+                    {
+                        "name": "Organization",
+                        "description": "Any organization not fitting other specific organization types.",
+                        "attributes": [{"name": "org_name", "type": "text", "description": "Organization name"}],
+                        "examples": ["publisher", "studio"],
+                    },
+                ],
+                "edge_types": [
+                    {
+                        "name": "APPEARS_IN",
+                        "description": "Entity appears in a source, era, or event.",
+                        "source_targets": [{"source": "FictionalCharacter", "target": "NarrativeEvent"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "RULES",
+                        "description": "Character or power rules a realm or group.",
+                        "source_targets": [{"source": "FictionalCharacter", "target": "Kingdom"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "ALLIED_WITH",
+                        "description": "Entity is allied or cooperating with another.",
+                        "source_targets": [{"source": "Faction", "target": "Faction"}, {"source": "FictionalCharacter", "target": "Faction"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "OPPOSES",
+                        "description": "Entity opposes another entity or faction.",
+                        "source_targets": [{"source": "FictionalCharacter", "target": "FictionalCharacter"}, {"source": "Faction", "target": "Faction"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "LOCATED_IN",
+                        "description": "Entity or event is located in a place.",
+                        "source_targets": [{"source": "NarrativeEvent", "target": "Location"}, {"source": "Kingdom", "target": "Location"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "CAUSES",
+                        "description": "Entity or event causes another event.",
+                        "source_targets": [{"source": "NarrativeEvent", "target": "NarrativeEvent"}, {"source": "Deity", "target": "NarrativeEvent"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "DOCUMENTS",
+                        "description": "Source material documents an entity or event.",
+                        "source_targets": [{"source": "SourceMaterial", "target": "NarrativeEvent"}, {"source": "SourceMaterial", "target": "FictionalCharacter"}],
+                        "attributes": [],
+                    },
+                    {
+                        "name": "TRANSFORMS_INTO",
+                        "description": "Entity changes form, role, or state.",
+                        "source_targets": [{"source": "FictionalCharacter", "target": "FictionalCharacter"}, {"source": "NarrativeEvent", "target": "NarrativeEvent"}],
+                        "attributes": [],
+                    },
+                ],
+                "analysis_summary": f"Document-aware fallback ontology generated for Fate/Lostbelt content: {simulation_requirement[:200]}",
+            }
+
+        return self._fallback_ontology(simulation_requirement)
+
+    def _fallback_ontology(self, simulation_requirement: str) -> Dict[str, Any]:
+        """Deterministic ontology used when a local LLM fails JSON generation."""
+        return {
+            "entity_types": [
+                {
+                    "name": "Journalist",
+                    "description": "Reporter or editor participating in public discourse.",
+                    "attributes": [{"name": "role", "type": "text", "description": "Media role"}],
+                    "examples": ["reporter", "editor"],
+                },
+                {
+                    "name": "MediaOutlet",
+                    "description": "Media organization publishing news or commentary.",
+                    "attributes": [{"name": "org_name", "type": "text", "description": "Outlet name"}],
+                    "examples": ["newspaper", "online media"],
+                },
+                {
+                    "name": "Company",
+                    "description": "Business organization involved in the issue.",
+                    "attributes": [{"name": "industry", "type": "text", "description": "Industry"}],
+                    "examples": ["company", "platform"],
+                },
+                {
+                    "name": "GovernmentAgency",
+                    "description": "Government or regulator relevant to the event.",
+                    "attributes": [{"name": "jurisdiction", "type": "text", "description": "Jurisdiction"}],
+                    "examples": ["regulator", "department"],
+                },
+                {
+                    "name": "Official",
+                    "description": "Public official or authority figure.",
+                    "attributes": [{"name": "title", "type": "text", "description": "Official title"}],
+                    "examples": ["mayor", "spokesperson"],
+                },
+                {
+                    "name": "Expert",
+                    "description": "Analyst, scholar, or professional commentator.",
+                    "attributes": [{"name": "specialty", "type": "text", "description": "Expertise"}],
+                    "examples": ["researcher", "lawyer"],
+                },
+                {
+                    "name": "CommunityGroup",
+                    "description": "Grassroots group or collective actor.",
+                    "attributes": [{"name": "focus", "type": "text", "description": "Group focus"}],
+                    "examples": ["local group", "advocacy group"],
+                },
+                {
+                    "name": "Influencer",
+                    "description": "Online personality with audience influence.",
+                    "attributes": [{"name": "platform", "type": "text", "description": "Main platform"}],
+                    "examples": ["blogger", "creator"],
+                },
+                {
+                    "name": "Person",
+                    "description": "Any individual person not fitting specific person types.",
+                    "attributes": [{"name": "full_name", "type": "text", "description": "Full name"}],
+                    "examples": ["ordinary citizen", "witness"],
+                },
+                {
+                    "name": "Organization",
+                    "description": "Any organization not fitting specific organization types.",
+                    "attributes": [{"name": "org_name", "type": "text", "description": "Organization name"}],
+                    "examples": ["association", "small organization"],
+                },
+            ],
+            "edge_types": [
+                {
+                    "name": "WORKS_FOR",
+                    "description": "Employment or affiliation relationship.",
+                    "source_targets": [{"source": "Person", "target": "Organization"}, {"source": "Journalist", "target": "MediaOutlet"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "REPORTS_ON",
+                    "description": "Publishes or reports about an actor.",
+                    "source_targets": [{"source": "MediaOutlet", "target": "Organization"}, {"source": "Journalist", "target": "Person"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "RESPONDS_TO",
+                    "description": "Publicly responds to another actor.",
+                    "source_targets": [{"source": "Person", "target": "Person"}, {"source": "Organization", "target": "Organization"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "SUPPORTS",
+                    "description": "Expresses support for another actor.",
+                    "source_targets": [{"source": "Person", "target": "Organization"}, {"source": "Organization", "target": "Person"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "OPPOSES",
+                    "description": "Expresses opposition to another actor.",
+                    "source_targets": [{"source": "Person", "target": "Organization"}, {"source": "Organization", "target": "Person"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "COLLABORATES_WITH",
+                    "description": "Cooperates with another actor.",
+                    "source_targets": [{"source": "Organization", "target": "Organization"}, {"source": "Person", "target": "Person"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "INFLUENCES",
+                    "description": "Influences opinions or decisions.",
+                    "source_targets": [{"source": "Influencer", "target": "Person"}, {"source": "MediaOutlet", "target": "Person"}],
+                    "attributes": [],
+                },
+                {
+                    "name": "REGULATES",
+                    "description": "Regulatory or oversight relation.",
+                    "source_targets": [{"source": "GovernmentAgency", "target": "Company"}, {"source": "Official", "target": "Organization"}],
+                    "attributes": [],
+                },
+            ],
+            "analysis_summary": f"Fallback ontology generated for: {simulation_requirement[:200]}",
+        }
    
-    # 传给 LLM 的文本最大长度（5万字）
-    MAX_TEXT_LENGTH_FOR_LLM = 50000
+    def _context_input_budget(
+        self,
+        system_prompt: str,
+        simulation_requirement: str,
+        additional_context: Optional[str],
+    ) -> int:
+        empty_user_message = self._build_user_message(
+            [""],
+            simulation_requirement,
+            additional_context,
+            chunk_index=1,
+            chunk_count=1,
+        )
+        reserved = (
+            _estimate_tokens(system_prompt)
+            + _estimate_tokens(empty_user_message)
+            + Config.ONTOLOGY_MAX_OUTPUT_TOKENS
+            + Config.ONTOLOGY_PROMPT_MARGIN_TOKENS
+        )
+        return max(512, Config.LLM_CONTEXT_WINDOW - reserved)
+
+    def _build_document_chunks(
+        self,
+        document_texts: List[str],
+        simulation_requirement: str,
+        additional_context: Optional[str],
+        system_prompt: str,
+    ) -> List[str]:
+        budget = self._context_input_budget(system_prompt, simulation_requirement, additional_context)
+        chunks: List[str] = []
+
+        for text in document_texts:
+            normalized = text or ""
+            current_parts: List[str] = []
+            current_tokens = 0
+            for part in self._iter_text_parts(normalized, budget):
+                part_tokens = _estimate_tokens(part)
+                if current_parts and current_tokens + part_tokens > budget:
+                    chunks.append("\n\n".join(current_parts))
+                    current_parts = []
+                    current_tokens = 0
+                current_parts.append(part)
+                current_tokens += part_tokens
+            if current_parts:
+                chunks.append("\n\n".join(current_parts))
+
+        if not chunks:
+            return [""]
+
+        max_chunks = max(1, Config.ONTOLOGY_MAX_CHUNKS)
+        if len(chunks) > max_chunks:
+            logger.warning(
+                "Ontology input produced %s chunks; keeping first %s to avoid endpoint overload",
+                len(chunks),
+                max_chunks,
+            )
+            chunks = chunks[:max_chunks]
+
+        return chunks
+
+    def _iter_text_parts(self, text: str, budget: int):
+        paragraphs = [part.strip() for part in re.split(r'\n{2,}', text) if part.strip()]
+        if not paragraphs:
+            paragraphs = [text.strip()] if text.strip() else [""]
+
+        for paragraph in paragraphs:
+            if _estimate_tokens(paragraph) <= budget:
+                yield paragraph
+                continue
+
+            sentences = [part.strip() for part in re.split(r'(?<=[。！？.!?])\s*', paragraph) if part.strip()]
+            if not sentences:
+                sentences = [paragraph]
+
+            current = ""
+            for sentence in sentences:
+                if _estimate_tokens(sentence) > budget:
+                    if current:
+                        yield current
+                        current = ""
+                    yield from self._split_oversize_text(sentence, budget)
+                    continue
+                candidate = f"{current}\n{sentence}".strip() if current else sentence
+                if current and _estimate_tokens(candidate) > budget:
+                    yield current
+                    current = sentence
+                else:
+                    current = candidate
+            if current:
+                yield current
+
+    def _split_oversize_text(self, text: str, budget: int):
+        # Conservative char window: CJK can be close to one token per char.
+        window = max(800, min(len(text), budget * 2))
+        start = 0
+        while start < len(text):
+            end = min(len(text), start + window)
+            chunk = text[start:end]
+            while _estimate_tokens(chunk) > budget and len(chunk) > 500:
+                end = start + max(500, (end - start) // 2)
+                chunk = text[start:end]
+            yield chunk
+            start = end
    
    def _build_user_message(
        self,
        document_texts: List[str],
        simulation_requirement: str,
-        additional_context: Optional[str]
+        additional_context: Optional[str],
+        chunk_index: int = 1,
+        chunk_count: int = 1,
    ) -> str:
        """构建用户消息"""
        
        # 合并文本
        combined_text = "\n\n---\n\n".join(document_texts)
-        original_length = len(combined_text)
-        
-        # 如果文本超过5万字，截断（仅影响传给LLM的内容，不影响图谱构建）
-        if len(combined_text) > self.MAX_TEXT_LENGTH_FOR_LLM:
-            combined_text = combined_text[:self.MAX_TEXT_LENGTH_FOR_LLM]
-            combined_text += f"\n\n...(原文共{original_length}字，已截取前{self.MAX_TEXT_LENGTH_FOR_LLM}字用于本体分析)..."
        
        message = f"""## 模拟需求

@ -271,10 +730,55 @@ class OntologyGenerator:
 4. 所有实体类型必须是现实中可以发声的主体，不能是抽象概念
 5. 属性名不能使用 name、uuid、group_id 等保留字，用 full_name、org_name 等替代
 """
+        if chunk_count > 1:
+            message += f"\n当前是第 {chunk_index}/{chunk_count} 个文本分片。请只基于当前分片生成候选本体，后续系统会合并去重。\n"
        
        return message
+
+    def _merge_ontologies(self, results: List[Dict[str, Any]], simulation_requirement: str) -> Dict[str, Any]:
+        merged = {
+            "entity_types": [],
+            "edge_types": [],
+            "analysis_summary": "",
+        }
+        seen_entities = set()
+        seen_edges = set()
+        summaries = []
+
+        for result in results:
+            for entity in result.get("entity_types", []):
+                name = _to_pascal_case(str(entity.get("name", "")))
+                if not name or name in seen_entities:
+                    continue
+                seen_entities.add(name)
+                entity = dict(entity)
+                entity["name"] = name
+                merged["entity_types"].append(entity)
+
+            for edge in result.get("edge_types", []):
+                name = str(edge.get("name", "")).upper()
+                if not name or name in seen_edges:
+                    continue
+                seen_edges.add(name)
+                edge = dict(edge)
+                edge["name"] = name
+                merged["edge_types"].append(edge)
+
+            summary = str(result.get("analysis_summary", "")).strip()
+            if summary:
+                summaries.append(summary)
+
+        if not merged["entity_types"] or not merged["edge_types"]:
+            return self._fallback_ontology(simulation_requirement)
+
+        merged["analysis_summary"] = " ".join(summaries[:3]) or f"Ontology generated for: {simulation_requirement[:200]}"
+        return merged
    
-    def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
+    def _validate_and_process(
+        self,
+        result: Dict[str, Any],
+        fill_ontology: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
        """验证和后处理结果"""
        
        # 确保必要字段存在
@ -387,6 +891,21 @@ class OntologyGenerator:
            
            # 添加兜底类型
            result["entity_types"].extend(fallbacks_to_add)
+
+        # Local LLMs sometimes return too few types. Fill from deterministic
+        # social-simulation defaults so downstream graph setup always has a
+        # usable ontology instead of failing later.
+        fill_ontology = fill_ontology or self._fallback_ontology("")
+
+        if len(result["entity_types"]) < MAX_ENTITY_TYPES:
+            entity_names = {e["name"] for e in result["entity_types"]}
+            for fallback_entity in fill_ontology.get("entity_types", []):
+                if len(result["entity_types"]) >= MAX_ENTITY_TYPES:
+                    break
+                if fallback_entity["name"] in entity_names:
+                    continue
+                result["entity_types"].append(fallback_entity)
+                entity_names.add(fallback_entity["name"])
        
        # 最终确保不超过限制（防御性编程）
        if len(result["entity_types"]) > MAX_ENTITY_TYPES:
@ -394,6 +913,16 @@ class OntologyGenerator:
        
        if len(result["edge_types"]) > MAX_EDGE_TYPES:
            result["edge_types"] = result["edge_types"][:MAX_EDGE_TYPES]
+
+        if len(result["edge_types"]) < 6:
+            edge_names = {edge.get("name") for edge in result["edge_types"]}
+            for fallback_edge in fill_ontology.get("edge_types", []):
+                if len(result["edge_types"]) >= 6:
+                    break
+                if fallback_edge["name"] in edge_names:
+                    continue
+                result["edge_types"].append(fallback_edge)
+                edge_names.add(fallback_edge["name"])
        
        return result
    
@ -503,4 +1032,3 @@ class OntologyGenerator:
        code_lines.append('}')
        
        return '\n'.join(code_lines)
-
--- a/backend/app/services/simulation_config_generator.py
+++ b/backend/app/services/simulation_config_generator.py
@ -20,6 +20,7 @@ from openai import OpenAI

 from ..config import Config
 from ..utils.logger import get_logger
+from ..utils.llm_gate import main_llm_slot
 from ..utils.locale import get_language_instruction, t
 from .zep_entity_reader import EntityNode, ZepEntityReader

@ -440,16 +441,17 @@ class SimulationConfigGenerator:
        
        for attempt in range(max_attempts):
            try:
-                response = self.client.chat.completions.create(
-                    model=self.model_name,
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"},
-                    temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
-                    # 不设置max_tokens，让LLM自由发挥
-                )
+                with main_llm_slot():
+                    response = self.client.chat.completions.create(
+                        model=self.model_name,
+                        messages=[
+                            {"role": "system", "content": system_prompt},
+                            {"role": "user", "content": prompt}
+                        ],
+                        response_format={"type": "json_object"},
+                        temperature=0.7 - (attempt * 0.1)  # 每次重试降低温度
+                        # 不设置max_tokens，让LLM自由发挥
+                    )
                
                content = response.choices[0].message.content
                finish_reason = response.choices[0].finish_reason
@ -988,4 +990,3 @@ class SimulationConfigGenerator:
                "influence_weight": 1.0
            }
    
-
--- a/backend/app/services/simulation_manager.py
+++ b/backend/app/services/simulation_manager.py
@ -235,7 +235,7 @@ class SimulationManager:
        defined_entity_types: Optional[List[str]] = None,
        use_llm_for_profiles: bool = True,
        progress_callback: Optional[callable] = None,
-        parallel_profile_count: int = 3
+        parallel_profile_count: int = 1
    ) -> SimulationState:
        """
        准备模拟环境（全程自动化）
--- a/backend/app/services/simulation_runner.py
+++ b/backend/app/services/simulation_runner.py
@ -414,7 +414,7 @@ class SimulationRunner:
            #   simulation.log        - 主进程日志
            
            cmd = [
-                sys.executable,  # Python解释器
+                Config.OASIS_PYTHON or sys.executable,  # Python解释器
                script_path,
                "--config", config_path,  # 使用完整配置文件路径
            ]
@ -1765,4 +1765,3 @@ class SimulationRunner:
            results = results[:limit]
        
        return results
-
--- a/backend/app/services/zep_entity_reader.py
+++ b/backend/app/services/zep_entity_reader.py
@ -80,9 +80,6 @@ class ZepEntityReader:
    
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or Config.ZEP_API_KEY
-        if not self.api_key:
-            raise ValueError("ZEP_API_KEY 未配置")
-        
        self.client = Zep(api_key=self.api_key)
    
    def _call_with_retry(
@ -434,4 +431,3 @@ class ZepEntityReader:
        )
        return result.entities

-
--- a/backend/app/services/zep_graph_memory_updater.py
+++ b/backend/app/services/zep_graph_memory_updater.py
@ -239,10 +239,6 @@ class ZepGraphMemoryUpdater:
        """
        self.graph_id = graph_id
        self.api_key = api_key or Config.ZEP_API_KEY
-        
-        if not self.api_key:
-            raise ValueError("ZEP_API_KEY未配置")
-        
        self.client = Zep(api_key=self.api_key)
        
        # 活动队列
--- a/backend/app/services/zep_tools.py
+++ b/backend/app/services/zep_tools.py
@ -424,9 +424,6 @@ class ZepToolsService:
    
    def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None):
        self.api_key = api_key or Config.ZEP_API_KEY
-        if not self.api_key:
-            raise ValueError("ZEP_API_KEY 未配置")
-        
        self.client = Zep(api_key=self.api_key)
        # LLM客户端用于InsightForge生成子问题
        self._llm_client = llm_client
--- a/backend/app/utils/llm_client.py
+++ b/backend/app/utils/llm_client.py
@ -5,10 +5,15 @@ LLM客户端封装

 import json
 import re
+import time
 from typing import Optional, Dict, Any, List
 from openai import OpenAI

 from ..config import Config
+from .logger import get_logger
+from .llm_gate import main_llm_slot
+
+logger = get_logger('mirofish.llm')


 class LLMClient:
@ -61,17 +66,20 @@ class LLMClient:
        if response_format:
            kwargs["response_format"] = response_format
        
-        response = self.client.chat.completions.create(**kwargs)
+        with main_llm_slot():
+            response = self.client.chat.completions.create(**kwargs)
        content = response.choices[0].message.content
        # 部分模型（如MiniMax M2.5）会在content中包含<think>思考内容，需要移除
        content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
+        content = re.sub(r'^Thinking Process:[\s\S]*?(?=\{|\[)', '', content).strip()
        return content
    
    def chat_json(
        self,
        messages: List[Dict[str, str]],
        temperature: float = 0.3,
-        max_tokens: int = 4096
+        max_tokens: int = 4096,
+        max_retries: Optional[int] = None
    ) -> Dict[str, Any]:
        """
        发送聊天请求并返回JSON
@ -84,20 +92,132 @@ class LLMClient:
        Returns:
            解析后的JSON对象
        """
-        response = self.chat(
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            response_format={"type": "json_object"}
-        )
-        # 清理markdown代码块标记
-        cleaned_response = response.strip()
-        cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
-        cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
-        cleaned_response = cleaned_response.strip()
+        retries = Config.LLM_JSON_MAX_RETRIES if max_retries is None else max_retries
+        last_error: Exception | None = None
+        last_response = ""

-        try:
-            return json.loads(cleaned_response)
-        except json.JSONDecodeError:
-            raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
+        for attempt in range(retries + 1):
+            attempt_messages = list(messages)
+            if attempt > 0:
+                attempt_messages.append({
+                    "role": "user",
+                    "content": (
+                        "The previous answer was not valid JSON. Return exactly one valid JSON object. "
+                        "Do not include markdown fences, explanation, comments, or thinking text."
+                    )
+                })

+            try:
+                response = self.chat(
+                    messages=attempt_messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    response_format={"type": "json_object"}
+                )
+                last_response = response
+                parsed = self._parse_json_lenient(response)
+                if parsed is not None:
+                    if attempt > 0:
+                        logger.info("LLM JSON recovered after retry %s/%s", attempt, retries)
+                    return parsed
+
+                last_error = ValueError("LLM returned unparsable JSON")
+                logger.warning(
+                    "LLM returned invalid JSON on attempt %s/%s: %s",
+                    attempt + 1,
+                    retries + 1,
+                    self._cleanup_response(response)[:1000],
+                )
+            except Exception as exc:
+                last_error = exc
+                logger.warning(
+                    "LLM JSON call failed on attempt %s/%s: %s",
+                    attempt + 1,
+                    retries + 1,
+                    str(exc)[:1000],
+                )
+
+            if attempt < retries:
+                time.sleep(min(2.0, 0.5 * (attempt + 1)))
+
+        cleaned = self._cleanup_response(last_response)
+        if cleaned:
+            logger.error("LLM returned invalid JSON after retries: %s", cleaned[:2000])
+            raise ValueError(f"LLM返回的JSON格式无效: {cleaned[:1000]}")
+        raise last_error or ValueError("LLM JSON call failed")
+
+    def _parse_json_lenient(self, text: str) -> Dict[str, Any] | None:
+        cleaned = self._cleanup_response(text)
+        candidates = [cleaned]
+
+        for block in re.findall(r"```(?:json)?\s*([\s\S]*?)```", text or "", flags=re.IGNORECASE):
+            candidates.append(self._cleanup_response(block))
+
+        candidates.extend(self._balanced_json_objects(cleaned))
+
+        start = cleaned.find('{')
+        end = cleaned.rfind('}')
+        if start >= 0 and end > start:
+            candidates.append(cleaned[start:end + 1])
+
+        for candidate in candidates:
+            candidate = candidate.strip()
+            if not candidate:
+                continue
+            repaired = self._repair_json(candidate)
+            try:
+                parsed = json.loads(repaired)
+                return parsed if isinstance(parsed, dict) else None
+            except json.JSONDecodeError:
+                pass
+
+        return None
+
+    def _cleanup_response(self, text: str) -> str:
+        cleaned = text or ""
+        cleaned = cleaned.replace("\ufeff", "").replace("\u200b", "")
+        cleaned = re.sub(r"<think>[\s\S]*?</think>", "", cleaned).strip()
+        cleaned = re.sub(r"^Thinking Process:[\s\S]*?(?=\{|\[)", "", cleaned).strip()
+        cleaned = re.sub(r"^```(?:json)?\s*\n?", "", cleaned, flags=re.IGNORECASE)
+        cleaned = re.sub(r"\n?```\s*$", "", cleaned)
+        return cleaned.strip()
+
+    def _repair_json(self, text: str) -> str:
+        repaired = text.strip()
+        repaired = re.sub(r",(\s*[}\]])", r"\1", repaired)
+        repaired = re.sub(r"\bNone\b", "null", repaired)
+        repaired = re.sub(r"\bTrue\b", "true", repaired)
+        repaired = re.sub(r"\bFalse\b", "false", repaired)
+        return repaired
+
+    def _balanced_json_objects(self, text: str) -> List[str]:
+        objects: List[str] = []
+        start = None
+        depth = 0
+        in_string = False
+        escape = False
+
+        for index, char in enumerate(text or ""):
+            if in_string:
+                if escape:
+                    escape = False
+                elif char == "\\":
+                    escape = True
+                elif char == '"':
+                    in_string = False
+                continue
+
+            if char == '"':
+                in_string = True
+            elif char == "{":
+                if depth == 0:
+                    start = index
+                depth += 1
+            elif char == "}" and depth:
+                depth -= 1
+                if depth == 0 and start is not None:
+                    objects.append(text[start:index + 1])
+                    start = None
+
+        # Prefer larger objects first; noisy LLM text may contain small examples.
+        return sorted(objects, key=len, reverse=True)
--- a/backend/app/utils/llm_gate.py
+++ b/backend/app/utils/llm_gate.py
@ -0,0 +1,19 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from threading import BoundedSemaphore
+
+from ..config import Config
+
+
+_MAIN_LLM_GATE = BoundedSemaphore(max(1, Config.LLM_MAX_CONCURRENCY))
+
+
+@contextmanager
+def main_llm_slot():
+    """Limit concurrent calls to the local main LLM endpoint."""
+    _MAIN_LLM_GATE.acquire()
+    try:
+        yield
+    finally:
+        _MAIN_LLM_GATE.release()
--- a/backend/local_zep/init.py
+++ b/backend/local_zep/init.py
@ -0,0 +1,9 @@
+from .client import Zep
+from .models import EpisodeData, EntityEdgeSourceTarget, InternalServerError
+
+__all__ = [
+    "EntityEdgeSourceTarget",
+    "EpisodeData",
+    "InternalServerError",
+    "Zep",
+]
--- a/backend/local_zep/client.py
+++ b/backend/local_zep/client.py
@ -0,0 +1,218 @@
+from __future__ import annotations
+
+from typing import Any
+
+from .models import EntityEdgeSourceTarget, EpisodeData, GraphSearchResults
+from .store import LocalZepStore
+
+
+_DEFAULT_STORE: LocalZepStore | None = None
+
+
+def get_default_store() -> LocalZepStore:
+    global _DEFAULT_STORE
+    if _DEFAULT_STORE is None:
+        _DEFAULT_STORE = LocalZepStore()
+    return _DEFAULT_STORE
+
+
+def _model_fields_to_attributes(model_cls: Any) -> list[dict[str, str]]:
+    fields = getattr(model_cls, "model_fields", {}) or {}
+    result = []
+    for field_name, field_info in fields.items():
+        result.append(
+            {
+                "name": field_name,
+                "description": getattr(field_info, "description", None) or field_name,
+            }
+        )
+    return result
+
+
+def _compat_ontology_from_models(
+    entities: dict[str, Any] | None,
+    edges: dict[str, Any] | None,
+) -> dict[str, Any]:
+    ontology = {"entity_types": [], "edge_types": []}
+
+    for entity_name, entity_cls in (entities or {}).items():
+        ontology["entity_types"].append(
+            {
+                "name": entity_name,
+                "description": getattr(entity_cls, "__doc__", "") or f"A {entity_name} entity.",
+                "attributes": _model_fields_to_attributes(entity_cls),
+            }
+        )
+
+    for edge_name, edge_value in (edges or {}).items():
+        edge_cls, source_targets = edge_value
+        formatted_targets = []
+        for pair in source_targets or []:
+            if isinstance(pair, EntityEdgeSourceTarget):
+                formatted_targets.append({"source": pair.source, "target": pair.target})
+            else:
+                formatted_targets.append(
+                    {
+                        "source": getattr(pair, "source", "Entity"),
+                        "target": getattr(pair, "target", "Entity"),
+                    }
+                )
+        ontology["edge_types"].append(
+            {
+                "name": edge_name,
+                "description": getattr(edge_cls, "__doc__", "") or f"A {edge_name} relationship.",
+                "attributes": _model_fields_to_attributes(edge_cls),
+                "source_targets": formatted_targets,
+            }
+        )
+
+    return ontology
+
+
+class EpisodeManager:
+    def __init__(self, store: LocalZepStore) -> None:
+        self.store = store
+
+    def get(self, uuid_: str):
+        return self.store.get_episode(uuid_)
+
+    def get_by_graph_id(self, graph_id: str, lastn: int | None = None):
+        return self.store.get_episodes_by_graph_id(graph_id=graph_id, lastn=lastn)
+
+
+class NodeManager:
+    def __init__(self, store: LocalZepStore) -> None:
+        self.store = store
+
+    def get_by_graph_id(self, graph_id: str, limit: int = 100, uuid_cursor: str | None = None):
+        return self.store.get_nodes_page(graph_id=graph_id, limit=limit, uuid_cursor=uuid_cursor)
+
+    def get_by_user_id(self, user_id: str, limit: int = 100, uuid_cursor: str | None = None):
+        return self.get_by_graph_id(graph_id=user_id, limit=limit, uuid_cursor=uuid_cursor)
+
+    def get(self, uuid_: str):
+        return self.store.get_node(uuid_)
+
+    def get_entity_edges(self, node_uuid: str):
+        return self.store.get_entity_edges(node_uuid)
+
+    def get_edges(self, node_uuid: str):
+        return self.get_entity_edges(node_uuid)
+
+
+class EdgeManager:
+    def __init__(self, store: LocalZepStore) -> None:
+        self.store = store
+
+    def get_by_graph_id(self, graph_id: str, limit: int = 100, uuid_cursor: str | None = None):
+        return self.store.get_edges_page(graph_id=graph_id, limit=limit, uuid_cursor=uuid_cursor)
+
+    def get_by_user_id(self, user_id: str, limit: int = 100, uuid_cursor: str | None = None):
+        return self.get_by_graph_id(graph_id=user_id, limit=limit, uuid_cursor=uuid_cursor)
+
+    def get(self, uuid_: str):
+        return self.store.get_edge(uuid_)
+
+
+class GraphManager:
+    def __init__(self, store: LocalZepStore) -> None:
+        self.store = store
+        self.node = NodeManager(store)
+        self.edge = EdgeManager(store)
+        self.episode = EpisodeManager(store)
+
+    def create(self, graph_id: str, name: str = "", description: str = ""):
+        return self.store.create_graph(graph_id=graph_id, name=name, description=description)
+
+    def get(self, graph_id: str):
+        return self.store.get_graph(graph_id=graph_id)
+
+    def delete(self, graph_id: str):
+        self.store.delete_graph(graph_id=graph_id)
+
+    def set_ontology(
+        self,
+        graph_ids: list[str] | None = None,
+        user_ids: list[str] | None = None,
+        entities: dict[str, Any] | None = None,
+        edges: dict[str, Any] | None = None,
+        ontology: dict[str, Any] | None = None,
+        **_: Any,
+    ):
+        graph_ids = graph_ids or user_ids or []
+        parsed = ontology or _compat_ontology_from_models(entities, edges)
+        for graph_id in graph_ids:
+            self.store.set_ontology(graph_id=graph_id, ontology=parsed)
+
+    def add(
+        self,
+        graph_id: str | None = None,
+        data: str = "",
+        type: str = "text",
+        user_id: str | None = None,
+        created_at: str | None = None,
+        metadata: dict[str, Any] | None = None,
+        source_description: str | None = None,
+        **extra: Any,
+    ):
+        graph_id = graph_id or user_id or extra.get("graphId") or extra.get("userId")
+        if not graph_id:
+            raise ValueError("graph_id or user_id is required")
+        return self.store.add(
+            graph_id=graph_id,
+            data=data,
+            type_=type,
+            created_at=created_at or extra.get("createdAt"),
+            metadata=metadata,
+            source_description=source_description or extra.get("sourceDescription"),
+        )
+
+    def add_batch(self, graph_id: str | None = None, episodes: list[EpisodeData] | None = None, user_id: str | None = None, **extra: Any):
+        graph_id = graph_id or user_id or extra.get("graphId") or extra.get("userId")
+        if not graph_id:
+            raise ValueError("graph_id or user_id is required")
+        return self.store.add_batch(graph_id=graph_id, episodes=episodes or [])
+
+    def search(
+        self,
+        graph_id: str | None = None,
+        query: str = "",
+        limit: int = 10,
+        scope: str = "edges",
+        user_id: str | None = None,
+        reranker: str = "rrf",
+        mmr_lambda: float | None = None,
+        center_node_uuid: str | None = None,
+        search_filters: Any = None,
+        bfs_origin_node_uuids: list[str] | None = None,
+        **extra: Any,
+    ) -> GraphSearchResults:
+        graph_id = graph_id or user_id or extra.get("graphId") or extra.get("userId")
+        if not graph_id:
+            return GraphSearchResults()
+        if mmr_lambda is None:
+            mmr_lambda = extra.get("mmrLambda")
+        if center_node_uuid is None:
+            center_node_uuid = extra.get("centerNodeUuid")
+        if search_filters is None:
+            search_filters = extra.get("searchFilters")
+        if bfs_origin_node_uuids is None:
+            bfs_origin_node_uuids = extra.get("bfsOriginNodeUuids")
+        return self.store.search(
+            graph_id=graph_id,
+            query=query,
+            limit=limit,
+            scope=scope,
+            reranker=reranker,
+            mmr_lambda=mmr_lambda,
+            center_node_uuid=center_node_uuid,
+            search_filters=search_filters,
+            bfs_origin_node_uuids=bfs_origin_node_uuids,
+        )
+
+
+class Zep:
+    def __init__(self, api_key: str | None = None, **_: Any) -> None:
+        del api_key
+        self._store = get_default_store()
+        self.graph = GraphManager(self._store)
--- a/backend/local_zep/embeddings.py
+++ b/backend/local_zep/embeddings.py
@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import math
+from typing import Iterable
+
+from .settings import settings
+
+
+def cosine_similarity(left: list[float], right: list[float]) -> float:
+    if not left or not right or len(left) != len(right):
+        return 0.0
+
+    left_norm = math.sqrt(sum(value * value for value in left))
+    right_norm = math.sqrt(sum(value * value for value in right))
+    if left_norm == 0 or right_norm == 0:
+        return 0.0
+
+    dot = sum(a * b for a, b in zip(left, right))
+    return dot / (left_norm * right_norm)
+
+
+class EmbeddingClient:
+    """OpenAI-compatible embeddings client with vLLM-friendly defaults."""
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model_name: str | None = None,
+    ) -> None:
+        self.api_key = api_key or settings.embedding_api_key or "local-embedding-key"
+        self.base_url = (base_url or settings.embedding_base_url or "").strip()
+        self.model_name = (model_name or settings.embedding_model_name or "").strip()
+
+        if not self.base_url:
+            raise ValueError("EMBEDDING_BASE_URL 未配置")
+        if not self.model_name:
+            raise ValueError("EMBEDDING_MODEL_NAME 未配置")
+
+        from openai import OpenAI
+
+        self.client = OpenAI(
+            api_key=self.api_key,
+            base_url=self.base_url,
+        )
+
+    def embed_texts(self, texts: Iterable[str], batch_size: int = 32) -> list[list[float]]:
+        normalized = [text.strip() if text else "" for text in texts]
+        if not normalized:
+            return []
+
+        results: list[list[float]] = []
+        for start in range(0, len(normalized), batch_size):
+            batch = normalized[start:start + batch_size]
+            response = self.client.embeddings.create(
+                model=self.model_name,
+                input=batch,
+            )
+            ordered = sorted(response.data, key=lambda item: item.index)
+            results.extend([list(item.embedding) for item in ordered])
+        return results
+
+    def embed_text(self, text: str) -> list[float]:
+        embeddings = self.embed_texts([text])
+        return embeddings[0] if embeddings else []
--- a/backend/local_zep/extraction.py
+++ b/backend/local_zep/extraction.py
@ -0,0 +1,339 @@
+from __future__ import annotations
+
+import json
+import re
+import time
+from typing import Any
+
+from .settings import settings
+
+try:
+    from app.utils.logger import get_logger
+    from app.utils.llm_gate import main_llm_slot
+    logger = get_logger("mirofish.local_zep.extraction")
+except Exception:  # pragma: no cover - fallback for direct package use
+    import logging
+    from contextlib import nullcontext
+    main_llm_slot = nullcontext
+    logger = logging.getLogger(__name__)
+
+
+def _clean_string(value: Any) -> str:
+    if value is None:
+        return ""
+    return str(value).strip()
+
+
+def _coerce_mapping(value: Any) -> dict[str, str]:
+    if isinstance(value, dict):
+        result = {}
+        for key, item in value.items():
+            cleaned = _clean_string(item)
+            if cleaned:
+                result[str(key)] = cleaned
+        return result
+    return {}
+
+
+def _normalize_type_name(value: str) -> str:
+    cleaned = _clean_string(value)
+    if not cleaned:
+        return "Entity"
+    return re.sub(r"\s+", "", cleaned)
+
+
+def _clean_timestamp(value: Any) -> str | None:
+    cleaned = _clean_string(value)
+    return cleaned or None
+
+
+def _cleanup_model_json(text: str) -> str:
+    cleaned = text or ""
+    cleaned = cleaned.replace("\ufeff", "").replace("\u200b", "")
+    cleaned = re.sub(r"<think>[\s\S]*?</think>", "", cleaned).strip()
+    cleaned = re.sub(r"^Thinking Process:[\s\S]*?(?=\{|\[)", "", cleaned).strip()
+    cleaned = re.sub(r"^```(?:json)?\s*\n?", "", cleaned, flags=re.IGNORECASE)
+    cleaned = re.sub(r"\n?```\s*$", "", cleaned)
+    return cleaned.strip()
+
+
+def _repair_json(text: str) -> str:
+    repaired = text.strip()
+    repaired = re.sub(r",(\s*[}\]])", r"\1", repaired)
+    repaired = re.sub(r"\bNone\b", "null", repaired)
+    repaired = re.sub(r"\bTrue\b", "true", repaired)
+    repaired = re.sub(r"\bFalse\b", "false", repaired)
+    return repaired
+
+
+def _balanced_json_objects(text: str) -> list[str]:
+    objects: list[str] = []
+    start = None
+    depth = 0
+    in_string = False
+    escape = False
+
+    for index, char in enumerate(text or ""):
+        if in_string:
+            if escape:
+                escape = False
+            elif char == "\\":
+                escape = True
+            elif char == '"':
+                in_string = False
+            continue
+
+        if char == '"':
+            in_string = True
+        elif char == "{":
+            if depth == 0:
+                start = index
+            depth += 1
+        elif char == "}" and depth:
+            depth -= 1
+            if depth == 0 and start is not None:
+                objects.append(text[start:index + 1])
+                start = None
+
+    return sorted(objects, key=len, reverse=True)
+
+
+def _extract_balanced_array_after_key(text: str, key: str) -> list[Any] | None:
+    match = re.search(rf'"{re.escape(key)}"\s*:\s*\[', text)
+    if not match:
+        return None
+
+    start = match.end() - 1
+    depth = 0
+    in_string = False
+    escape = False
+    for index in range(start, len(text)):
+        char = text[index]
+        if in_string:
+            if escape:
+                escape = False
+            elif char == "\\":
+                escape = True
+            elif char == '"':
+                in_string = False
+            continue
+
+        if char == '"':
+            in_string = True
+        elif char == "[":
+            depth += 1
+        elif char == "]" and depth:
+            depth -= 1
+            if depth == 0:
+                try:
+                    parsed = json.loads(_repair_json(text[start:index + 1]))
+                    return parsed if isinstance(parsed, list) else None
+                except json.JSONDecodeError:
+                    return None
+
+    return None
+
+
+def _parse_payload_lenient(text: str) -> dict[str, Any] | None:
+    cleaned = _cleanup_model_json(text)
+    candidates = [cleaned]
+    candidates.extend(_cleanup_model_json(block) for block in re.findall(r"```(?:json)?\s*([\s\S]*?)```", text or "", flags=re.IGNORECASE))
+    candidates.extend(_balanced_json_objects(cleaned))
+
+    start = cleaned.find("{")
+    end = cleaned.rfind("}")
+    if start >= 0 and end > start:
+        candidates.append(cleaned[start:end + 1])
+
+    for candidate in candidates:
+        candidate = candidate.strip()
+        if not candidate:
+            continue
+        try:
+            parsed = json.loads(_repair_json(candidate))
+            if isinstance(parsed, dict):
+                return parsed
+        except json.JSONDecodeError:
+            pass
+
+    loose: dict[str, Any] = {}
+    entities = _extract_balanced_array_after_key(cleaned, "entities")
+    edges = _extract_balanced_array_after_key(cleaned, "edges")
+    if entities is not None:
+        loose["entities"] = entities
+    if edges is not None:
+        loose["edges"] = edges
+    return loose or None
+
+
+class GraphExtractor:
+    """LLM-backed entity and relation extraction constrained by ontology."""
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model_name: str | None = None,
+    ) -> None:
+        self.api_key = api_key or settings.llm_api_key
+        self.base_url = base_url or settings.llm_base_url
+        self.model_name = model_name or settings.llm_model_name
+
+        if not self.api_key:
+            raise ValueError("LLM_API_KEY 未配置")
+
+        from openai import OpenAI
+
+        self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
+
+    def extract(self, text: str, ontology: dict[str, Any] | None) -> dict[str, list[dict[str, Any]]]:
+        if not text or not text.strip():
+            return {"entities": [], "edges": []}
+
+        ontology = ontology or {"entity_types": [], "edge_types": []}
+        entity_types = ontology.get("entity_types", [])
+        edge_types = ontology.get("edge_types", [])
+
+        entity_type_names = [item.get("name", "Entity") for item in entity_types]
+        edge_type_names = [item.get("name", "RELATED_TO") for item in edge_types]
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an information extraction engine for a local temporal knowledge graph. "
+                    "Return strict JSON with keys entities and edges. "
+                    "Only use ontology entity types and edge types provided by the user. "
+                    "Do not invent unsupported types. Keep summaries concise."
+                ),
+            },
+            {
+                "role": "user",
+                "content": (
+                    "Extract graph updates from the text below.\n\n"
+                    "Ontology:\n"
+                    f"{ontology}\n\n"
+                    "JSON schema:\n"
+                    "{\n"
+                    '  "entities": [\n'
+                    '    {"name": "entity name", "type": "one of ontology entity types", '
+                    '"summary": "short summary", "attributes": {"attr": "value"}}\n'
+                    "  ],\n"
+                    '  "edges": [\n'
+                    '    {"name": "one of ontology edge types", "source": "entity name", '
+                    '"target": "entity name", "fact": "atomic factual sentence", '
+                    '"attributes": {"attr": "value"}, "valid_at": "optional RFC3339 time"}\n'
+                    "  ]\n"
+                    "}\n\n"
+                    "Rules:\n"
+                    f"- Allowed entity types: {entity_type_names or ['Entity']}\n"
+                    f"- Allowed edge types: {edge_type_names or ['RELATED_TO']}\n"
+                    "- Use exact entity names from the text when possible.\n"
+                    "- Omit any item you are not reasonably confident about.\n"
+                    "- Every edge source and target must reference an entity name.\n\n"
+                    "- If a fact includes an explicit real-world start time, put it in valid_at. "
+                    "Otherwise omit valid_at and the episode created_at will be used.\n\n"
+                    "Text:\n"
+                    f"{text}"
+                ),
+            },
+        ]
+
+        payload: dict[str, Any] | None = None
+        last_content = ""
+        max_retries = max(0, settings.local_zep_extract_max_retries)
+        for attempt in range(max_retries + 1):
+            attempt_messages = list(messages)
+            if attempt > 0:
+                attempt_messages.append({
+                    "role": "user",
+                    "content": (
+                        "Retry: return only compact valid JSON with top-level keys entities and edges. "
+                        "No markdown, no comments, no thinking text. Keep at most 20 entities and 20 edges."
+                    ),
+                })
+
+            try:
+                with main_llm_slot():
+                    response = self.client.chat.completions.create(
+                        model=self.model_name,
+                        messages=attempt_messages,
+                        temperature=0.1,
+                        max_tokens=settings.local_zep_extract_max_output_tokens,
+                        response_format={"type": "json_object"},
+                    )
+                last_content = response.choices[0].message.content or "{}"
+                payload = _parse_payload_lenient(last_content)
+                if payload is not None:
+                    if attempt > 0:
+                        logger.info("Graph extraction JSON recovered after retry %s/%s", attempt, max_retries)
+                    break
+                logger.warning(
+                    "Graph extraction returned invalid JSON on attempt %s/%s: %s",
+                    attempt + 1,
+                    max_retries + 1,
+                    _cleanup_model_json(last_content)[:1000],
+                )
+            except Exception as exc:
+                logger.warning(
+                    "Graph extraction LLM call failed on attempt %s/%s: %s",
+                    attempt + 1,
+                    max_retries + 1,
+                    str(exc)[:1000],
+                )
+
+            if attempt < max_retries:
+                time.sleep(min(2.0, 0.5 * (attempt + 1)))
+
+        if payload is None:
+            logger.error(
+                "Graph extraction failed after retries; skipping text chunk. Last response: %s",
+                _cleanup_model_json(last_content)[:2000],
+            )
+            return {"entities": [], "edges": []}
+
+        entities = []
+        for raw_entity in payload.get("entities", []):
+            if not isinstance(raw_entity, dict):
+                continue
+            name = _clean_string(raw_entity.get("name"))
+            entity_type = _normalize_type_name(raw_entity.get("type"))
+            if not name:
+                continue
+            if entity_types and entity_type not in entity_type_names:
+                continue
+            entities.append(
+                {
+                    "name": name,
+                    "type": entity_type,
+                    "summary": _clean_string(raw_entity.get("summary")),
+                    "attributes": _coerce_mapping(raw_entity.get("attributes")),
+                }
+            )
+
+        edges = []
+        for raw_edge in payload.get("edges", []):
+            if not isinstance(raw_edge, dict):
+                continue
+            name = _normalize_type_name(raw_edge.get("name"))
+            source = _clean_string(raw_edge.get("source"))
+            target = _clean_string(raw_edge.get("target"))
+            fact = _clean_string(raw_edge.get("fact"))
+            if not name or not source or not target:
+                continue
+            if edge_types and name not in edge_type_names:
+                continue
+            if not fact:
+                fact = f"{source} {name} {target}"
+            edges.append(
+                {
+                    "name": name,
+                    "source": source,
+                    "target": target,
+                    "fact": fact,
+                    "attributes": _coerce_mapping(raw_edge.get("attributes")),
+                    "valid_at": _clean_timestamp(raw_edge.get("valid_at")),
+                }
+            )
+
+        return {"entities": entities, "edges": edges}
--- a/backend/local_zep/models.py
+++ b/backend/local_zep/models.py
@ -0,0 +1,112 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+class InternalServerError(RuntimeError):
+    """Compatibility error used by paging helpers."""
+
+
+@dataclass
+class EpisodeData:
+    data: str
+    type: str = "text"
+    created_at: str | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+    source_description: str | None = None
+
+
+@dataclass
+class EntityEdgeSourceTarget:
+    source: str = "Entity"
+    target: str = "Entity"
+
+
+@dataclass
+class GraphRecord:
+    graph_id: str
+    name: str
+    description: str = ""
+    created_at: str | None = None
+
+
+@dataclass
+class GraphNode:
+    uuid_: str
+    graph_id: str
+    name: str
+    labels: list[str] = field(default_factory=list)
+    summary: str = ""
+    attributes: dict[str, Any] = field(default_factory=dict)
+    created_at: str | None = None
+    score: float | None = None
+    relevance: float | None = None
+
+    @property
+    def uuid(self) -> str:
+        return self.uuid_
+
+
+@dataclass
+class GraphEdge:
+    uuid_: str
+    graph_id: str
+    name: str
+    fact: str
+    source_node_uuid: str
+    target_node_uuid: str
+    attributes: dict[str, Any] = field(default_factory=dict)
+    created_at: str | None = None
+    valid_at: str | None = None
+    invalid_at: str | None = None
+    expired_at: str | None = None
+    episodes: list[str] = field(default_factory=list)
+    score: float | None = None
+    relevance: float | None = None
+
+    @property
+    def uuid(self) -> str:
+        return self.uuid_
+
+    @property
+    def episode_ids(self) -> list[str]:
+        return self.episodes
+
+
+@dataclass
+class GraphEpisode:
+    uuid_: str
+    graph_id: str
+    data: str
+    type: str = "text"
+    processed: bool = False
+    created_at: str | None = None
+    error: str | None = None
+    metadata: dict[str, Any] = field(default_factory=dict)
+    source_description: str | None = None
+    role: str | None = None
+    role_type: str | None = None
+    thread_id: str | None = None
+    task_id: str | None = None
+    score: float | None = None
+    relevance: float | None = None
+
+    @property
+    def uuid(self) -> str:
+        return self.uuid_
+
+    @property
+    def content(self) -> str:
+        return self.data
+
+    @property
+    def source(self) -> str:
+        return self.type
+
+
+@dataclass
+class GraphSearchResults:
+    nodes: list[GraphNode] = field(default_factory=list)
+    edges: list[GraphEdge] = field(default_factory=list)
+    episodes: list[GraphEpisode] = field(default_factory=list)
--- a/backend/local_zep/reranker.py
+++ b/backend/local_zep/reranker.py
@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import json
+import logging
+import urllib.error
+import urllib.request
+from typing import Any
+
+from .settings import settings
+
+
+logger = logging.getLogger("mirofish.local_zep")
+
+
+class RerankerClient:
+    """Small OpenAI/vLLM-friendly reranker client.
+
+    The local graph uses this only for Zep's ``cross_encoder`` reranker. If no
+    reranker endpoint is configured, graph search falls back to local hybrid
+    ranking rather than failing the application request.
+    """
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        model_name: str | None = None,
+    ) -> None:
+        self.api_key = api_key or settings.reranker_api_key or "local-reranker-key"
+        self.base_url = (base_url or settings.reranker_base_url or "").rstrip("/")
+        self.model_name = (model_name or settings.reranker_model_name or "").strip()
+
+    @property
+    def is_configured(self) -> bool:
+        return bool(self.base_url and self.model_name)
+
+    def rerank(self, query: str, documents: list[str]) -> list[float] | None:
+        if not self.is_configured or not query or not documents:
+            return None
+
+        payload = {
+            "model": self.model_name,
+            "query": query,
+            "documents": documents,
+            "top_n": len(documents),
+            "return_documents": False,
+        }
+
+        for url in self._candidate_urls():
+            try:
+                response = self._post_json(url, payload)
+                scores = self._extract_scores(response, len(documents))
+                if scores is not None:
+                    return scores
+            except (OSError, urllib.error.URLError, json.JSONDecodeError, ValueError) as exc:
+                logger.warning("Reranker request failed for %s: %s", url, exc)
+
+        return None
+
+    def _candidate_urls(self) -> list[str]:
+        if self.base_url.endswith("/v1"):
+            return [f"{self.base_url}/rerank"]
+        return [f"{self.base_url}/v1/rerank", f"{self.base_url}/rerank"]
+
+    def _post_json(self, url: str, payload: dict[str, Any]) -> dict[str, Any]:
+        body = json.dumps(payload).encode("utf-8")
+        request = urllib.request.Request(
+            url,
+            data=body,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {self.api_key}",
+            },
+            method="POST",
+        )
+        with urllib.request.urlopen(request, timeout=30) as response:
+            return json.loads(response.read().decode("utf-8"))
+
+    def _extract_scores(self, response: dict[str, Any], expected_count: int) -> list[float] | None:
+        if isinstance(response.get("scores"), list):
+            scores = [float(value) for value in response["scores"]]
+            return scores[:expected_count] + [0.0] * max(0, expected_count - len(scores))
+
+        rows = response.get("results")
+        if not isinstance(rows, list):
+            rows = response.get("data")
+        if not isinstance(rows, list):
+            return None
+
+        scores = [0.0] * expected_count
+        found = False
+        for rank, item in enumerate(rows):
+            if not isinstance(item, dict):
+                continue
+            index = item.get("index", item.get("document_index", item.get("documentIndex", rank)))
+            try:
+                index = int(index)
+            except (TypeError, ValueError):
+                continue
+            if index < 0 or index >= expected_count:
+                continue
+            score = item.get("relevance_score", item.get("relevanceScore", item.get("score", item.get("relevance"))))
+            try:
+                scores[index] = float(score)
+            except (TypeError, ValueError):
+                continue
+            found = True
+
+        return scores if found else None
--- a/backend/local_zep/settings.py
+++ b/backend/local_zep/settings.py
@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from pathlib import Path
+
+_BACKEND_ROOT = Path(__file__).resolve().parents[1]
+_PROJECT_ROOT = _BACKEND_ROOT.parent
+_ENV_PATH = _PROJECT_ROOT / ".env"
+
+try:
+    from dotenv import load_dotenv
+except ImportError:
+    load_dotenv = None
+
+if load_dotenv is not None:
+    if _ENV_PATH.exists():
+        load_dotenv(_ENV_PATH, override=True)
+    else:
+        load_dotenv(override=True)
+
+
+def _project_path(value: str) -> str:
+    path = Path(value)
+    if path.is_absolute():
+        return str(path)
+    return str((_PROJECT_ROOT / path).resolve())
+
+
+@dataclass(frozen=True)
+class LocalZepSettings:
+    llm_api_key: str | None = os.environ.get("LLM_API_KEY")
+    llm_base_url: str = os.environ.get("LLM_BASE_URL", "https://api.openai.com/v1")
+    llm_model_name: str = os.environ.get("LLM_MODEL_NAME", "gpt-4o-mini")
+    embedding_api_key: str = os.environ.get("EMBEDDING_API_KEY", "local-embedding-key")
+    embedding_base_url: str | None = os.environ.get("EMBEDDING_BASE_URL")
+    embedding_model_name: str | None = os.environ.get("EMBEDDING_MODEL_NAME")
+    reranker_api_key: str = os.environ.get("RERANKER_API_KEY", "local-reranker-key")
+    reranker_base_url: str | None = os.environ.get("RERANKER_BASE_URL")
+    reranker_model_name: str | None = os.environ.get("RERANKER_MODEL_NAME")
+    local_zep_rerank_top_k: int = int(os.environ.get("LOCAL_ZEP_RERANK_TOP_K", "50"))
+    local_zep_extract_max_retries: int = int(os.environ.get("LOCAL_ZEP_EXTRACT_MAX_RETRIES", "2"))
+    local_zep_extract_max_output_tokens: int = int(os.environ.get("LOCAL_ZEP_EXTRACT_MAX_OUTPUT_TOKENS", "2048"))
+    local_zep_db_path: str = _project_path(
+        os.environ.get(
+            "LOCAL_ZEP_DB_PATH",
+            str(_BACKEND_ROOT / "data" / "local_zep.sqlite3"),
+        )
+    )
+
+
+settings = LocalZepSettings()
--- a/backend/local_zep/store.py
+++ b/backend/local_zep/store.py
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -16,12 +16,10 @@ dependencies = [
    # LLM 相关
    "openai>=1.0.0",
    
-    # Zep Cloud
-    "zep-cloud==3.13.0",
-    
-    # OASIS 社交媒体模拟
-    "camel-oasis==0.2.5",
-    "camel-ai==0.2.78",
+    # OASIS 社交媒体模拟.
+    # These packages currently publish metadata for Python <3.12 only.
+    "camel-oasis==0.2.5; python_version < '3.12'",
+    "camel-ai==0.2.78; python_version < '3.12'",
    
    # 文件处理
    "PyMuPDF>=1.24.0",
@ -35,6 +33,10 @@ dependencies = [
 ]

 [project.optional-dependencies]
+oasis = [
+    "camel-oasis==0.2.5; python_version < '3.12'",
+    "camel-ai==0.2.78; python_version < '3.12'",
+]
 dev = [
    "pytest>=8.0.0",
    "pytest-asyncio>=0.23.0",
@ -52,4 +54,4 @@ dev = [
 ]

 [tool.hatch.build.targets.wheel]
-packages = ["app"]
+packages = ["app", "local_zep", "zep_cloud"]
--- a/backend/requirements-oasis.txt
+++ b/backend/requirements-oasis.txt
@ -0,0 +1,7 @@
+# Optional OASIS runtime.
+#
+# These third-party packages currently declare Python <3.12 support.
+# Use a Python 3.11 venv if you need the original OASIS simulation scripts.
+
+camel-oasis==0.2.5; python_version < "3.12"
+camel-ai==0.2.78; python_version < "3.12"
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -1,7 +1,7 @@
 # ===========================================
 # MiroFish Backend Dependencies
 # ===========================================
-# Python 3.11+ required
+# Python 3.11+ required. Python 3.13 is supported for the API/local-Zep backend.
 # Install: pip install -r requirements.txt
 # ===========================================

@ -13,13 +13,11 @@ flask-cors>=6.0.0
 # OpenAI SDK（统一使用 OpenAI 格式调用 LLM）
 openai>=1.0.0

-# ============= Zep Cloud =============
-zep-cloud==3.13.0
-
 # ============= OASIS 社交媒体模拟 =============
-# OASIS 社交模拟框架
-camel-oasis==0.2.5
-camel-ai==0.2.78
+# camel-oasis 0.2.5 declares Python <3.12, so keep it optional on newer Python.
+# The backend server and local-Zep graph work on Python 3.13 without these packages.
+camel-oasis==0.2.5; python_version < "3.12"
+camel-ai==0.2.78; python_version < "3.12"

 # ============= 文件处理 =============
 PyMuPDF>=1.24.0
--- a/backend/run.py
+++ b/backend/run.py
@ -40,6 +40,11 @@ def main():
    host = os.environ.get('FLASK_HOST', '0.0.0.0')
    port = int(os.environ.get('FLASK_PORT', 5001))
    debug = Config.DEBUG
+
+    if Config.PUBLIC_BASE_URL:
+        print(f"Public backend URL: {Config.PUBLIC_BASE_URL}")
+    if Config.TAILSCALE_URL:
+        print(f"Tailscale URL: {Config.TAILSCALE_URL}")
    
    # 启动服务
    app.run(host=host, port=port, debug=debug, threaded=True)
@ -47,4 +52,3 @@ def main():

 if __name__ == '__main__':
    main()
-
--- a/backend/scripts/fake_openai_server.py
+++ b/backend/scripts/fake_openai_server.py
@ -0,0 +1,364 @@
+"""
+Minimal OpenAI-compatible fake server for local graph E2E tests.
+
+Implements:
+- POST /v1/chat/completions
+- POST /v1/embeddings
+- POST /v1/rerank
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+
+FIXED_ONTOLOGY = {
+    "entity_types": [
+        {
+            "name": "Journalist",
+            "description": "A news reporter or editor active in public discourse.",
+            "attributes": [{"name": "role", "type": "text", "description": "Journalism role"}],
+            "examples": ["Alice"],
+        },
+        {
+            "name": "MediaOutlet",
+            "description": "A media organization publishing reports and commentary.",
+            "attributes": [{"name": "org_name", "type": "text", "description": "Media brand"}],
+            "examples": ["DailyNews"],
+        },
+        {
+            "name": "Ngo",
+            "description": "A civil society or advocacy organization.",
+            "attributes": [{"name": "mission", "type": "text", "description": "Primary mission"}],
+            "examples": ["GreenFuture"],
+        },
+        {
+            "name": "Company",
+            "description": "A commercial company participating in the issue.",
+            "attributes": [{"name": "industry", "type": "text", "description": "Industry sector"}],
+            "examples": ["PolluteCorp"],
+        },
+        {
+            "name": "Official",
+            "description": "A government or public official making responses.",
+            "attributes": [{"name": "title", "type": "text", "description": "Official title"}],
+            "examples": ["Mayor Lee"],
+        },
+        {
+            "name": "Citizen",
+            "description": "A member of the public commenting on the issue.",
+            "attributes": [{"name": "location", "type": "text", "description": "Location"}],
+            "examples": ["Concerned resident"],
+        },
+        {
+            "name": "Expert",
+            "description": "An expert or scholar adding analysis.",
+            "attributes": [{"name": "specialty", "type": "text", "description": "Area of expertise"}],
+            "examples": ["Policy researcher"],
+        },
+        {
+            "name": "CommunityGroup",
+            "description": "A community or activist group involved in the issue.",
+            "attributes": [{"name": "focus", "type": "text", "description": "Group focus"}],
+            "examples": ["Local advocates"],
+        },
+        {
+            "name": "Person",
+            "description": "Any individual person not fitting other specific person types.",
+            "attributes": [{"name": "full_name", "type": "text", "description": "Full name"}],
+            "examples": ["ordinary citizen"],
+        },
+        {
+            "name": "Organization",
+            "description": "Any organization not fitting other specific organization types.",
+            "attributes": [{"name": "org_name", "type": "text", "description": "Organization name"}],
+            "examples": ["generic organization"],
+        },
+    ],
+    "edge_types": [
+        {
+            "name": "WORKS_FOR",
+            "description": "Employment or affiliation relation.",
+            "source_targets": [{"source": "Journalist", "target": "MediaOutlet"}],
+            "attributes": [],
+        },
+        {
+            "name": "REPORTS_ON",
+            "description": "Coverage relation for public reporting.",
+            "source_targets": [
+                {"source": "MediaOutlet", "target": "Ngo"},
+                {"source": "MediaOutlet", "target": "Company"},
+                {"source": "Journalist", "target": "Ngo"},
+            ],
+            "attributes": [],
+        },
+        {
+            "name": "SUPPORTS",
+            "description": "Supportive stance relation.",
+            "source_targets": [
+                {"source": "Journalist", "target": "Ngo"},
+                {"source": "Citizen", "target": "Ngo"},
+                {"source": "Official", "target": "Organization"},
+            ],
+            "attributes": [],
+        },
+        {
+            "name": "OPPOSES",
+            "description": "Opposing stance relation.",
+            "source_targets": [
+                {"source": "Company", "target": "Ngo"},
+                {"source": "Organization", "target": "Organization"},
+            ],
+            "attributes": [],
+        },
+        {
+            "name": "RESPONDS_TO",
+            "description": "A public response to another actor.",
+            "source_targets": [
+                {"source": "Official", "target": "Journalist"},
+                {"source": "Organization", "target": "MediaOutlet"},
+            ],
+            "attributes": [],
+        },
+        {
+            "name": "COLLABORATES_WITH",
+            "description": "Cooperation relation between actors.",
+            "source_targets": [
+                {"source": "Organization", "target": "Organization"},
+                {"source": "Ngo", "target": "CommunityGroup"},
+            ],
+            "attributes": [],
+        },
+    ],
+    "analysis_summary": "Fake ontology for local graph integration testing.",
+}
+
+
+def _embedding_for_text(text: str) -> list[float]:
+    digest = hashlib.sha256((text or "").encode("utf-8")).digest()
+    values = []
+    for index in range(8):
+        chunk = digest[index * 4:(index + 1) * 4]
+        raw = int.from_bytes(chunk, "big")
+        values.append((raw % 1000) / 1000.0)
+    return values
+
+
+def _rerank_score(query: str, document: str) -> float:
+    query_tokens = set((query or "").lower().replace(".", " ").replace(",", " ").split())
+    document_tokens = set((document or "").lower().replace(".", " ").replace(",", " ").split())
+    if not query_tokens or not document_tokens:
+        return 0.0
+    overlap = len(query_tokens.intersection(document_tokens)) / len(query_tokens)
+    exact_bonus = 0.4 if (query or "").lower() in (document or "").lower() else 0.0
+    return min(1.0, overlap + exact_bonus)
+
+
+def _build_extraction_payload(text: str) -> dict:
+    normalized = (text or "").lower()
+    entities = []
+    edges = []
+
+    if "alice" in normalized:
+        entities.append({
+            "name": "Alice",
+            "type": "Journalist",
+            "summary": "Journalist covering the environmental dispute.",
+            "attributes": {"role": "journalist"},
+        })
+    if "dailynews" in normalized:
+        entities.append({
+            "name": "DailyNews",
+            "type": "MediaOutlet",
+            "summary": "Media outlet reporting on the issue.",
+            "attributes": {"org_name": "DailyNews"},
+        })
+    if "greenfuture" in normalized:
+        entities.append({
+            "name": "GreenFuture",
+            "type": "Ngo",
+            "summary": "Environmental organization active in the conflict.",
+            "attributes": {"mission": "environmental advocacy"},
+        })
+    if "pollutecorp" in normalized:
+        entities.append({
+            "name": "PolluteCorp",
+            "type": "Company",
+            "summary": "Company opposing GreenFuture's campaign.",
+            "attributes": {"industry": "manufacturing"},
+        })
+    if "mayor lee" in normalized:
+        entities.append({
+            "name": "Mayor Lee",
+            "type": "Official",
+            "summary": "Public official responding to media coverage.",
+            "attributes": {"title": "Mayor"},
+        })
+
+    if {"alice", "dailynews"} <= set(normalized.replace(".", " ").replace(",", " ").split()):
+        edges.append({
+            "name": "WORKS_FOR",
+            "source": "Alice",
+            "target": "DailyNews",
+            "fact": "Alice works for DailyNews.",
+            "attributes": {},
+        })
+    if "dailynews" in normalized and "greenfuture" in normalized:
+        edges.append({
+            "name": "REPORTS_ON",
+            "source": "DailyNews",
+            "target": "GreenFuture",
+            "fact": "DailyNews reports on GreenFuture.",
+            "attributes": {},
+        })
+    if "alice supports greenfuture" in normalized or ("alice" in normalized and "supports greenfuture" in normalized):
+        edges.append({
+            "name": "SUPPORTS",
+            "source": "Alice",
+            "target": "GreenFuture",
+            "fact": "Alice supports GreenFuture.",
+            "attributes": {},
+        })
+    if "alice opposes greenfuture" in normalized:
+        edges.append({
+            "name": "OPPOSES",
+            "source": "Alice",
+            "target": "GreenFuture",
+            "fact": "Alice opposes GreenFuture.",
+            "attributes": {},
+        })
+    if "pollutecorp opposes greenfuture" in normalized or ("pollutecorp" in normalized and "opposes greenfuture" in normalized):
+        edges.append({
+            "name": "OPPOSES",
+            "source": "PolluteCorp",
+            "target": "GreenFuture",
+            "fact": "PolluteCorp opposes GreenFuture.",
+            "attributes": {},
+        })
+    if "mayor lee responds to alice" in normalized or ("mayor lee" in normalized and "responds to alice" in normalized):
+        edges.append({
+            "name": "RESPONDS_TO",
+            "source": "Mayor Lee",
+            "target": "Alice",
+            "fact": "Mayor Lee responds to Alice.",
+            "attributes": {},
+        })
+
+    return {"entities": entities, "edges": edges}
+
+
+def _chat_response(messages: list[dict]) -> str:
+    system = "\n".join(message.get("content", "") for message in messages if message.get("role") == "system")
+    user = "\n".join(message.get("content", "") for message in messages if message.get("role") == "user")
+
+    if "本体设计专家" in system or "正好输出10个实体类型" in user:
+        return json.dumps(FIXED_ONTOLOGY, ensure_ascii=False)
+
+    if "Extract graph updates from the text below" in user:
+        return json.dumps(_build_extraction_payload(user), ensure_ascii=False)
+
+    return json.dumps({"message": "fake-server-default-response"}, ensure_ascii=False)
+
+
+class FakeOpenAIHandler(BaseHTTPRequestHandler):
+    server_version = "FakeOpenAI/0.1"
+
+    def do_POST(self):  # noqa: N802
+        length = int(self.headers.get("Content-Length", "0"))
+        payload = json.loads(self.rfile.read(length).decode("utf-8") or "{}")
+
+        if self.path == "/v1/chat/completions":
+            response = {
+                "id": "chatcmpl-fake",
+                "object": "chat.completion",
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": _chat_response(payload.get("messages", [])),
+                        },
+                        "finish_reason": "stop",
+                    }
+                ],
+            }
+            self._write_json(200, response)
+            return
+
+        if self.path == "/v1/embeddings":
+            inputs = payload.get("input", [])
+            if not isinstance(inputs, list):
+                inputs = [inputs]
+            response = {
+                "object": "list",
+                "data": [
+                    {
+                        "object": "embedding",
+                        "index": index,
+                        "embedding": _embedding_for_text(text),
+                    }
+                    for index, text in enumerate(inputs)
+                ],
+                "model": payload.get("model", "fake-embedding-model"),
+            }
+            self._write_json(200, response)
+            return
+
+        if self.path == "/v1/rerank":
+            documents = payload.get("documents", [])
+            if not isinstance(documents, list):
+                documents = [str(documents)]
+            query = payload.get("query", "")
+            scored = [
+                {
+                    "index": index,
+                    "relevance_score": _rerank_score(query, document),
+                }
+                for index, document in enumerate(documents)
+            ]
+            scored.sort(key=lambda item: item["relevance_score"], reverse=True)
+            response = {
+                "model": payload.get("model", "fake-reranker-model"),
+                "results": scored[: int(payload.get("top_n") or len(scored))],
+            }
+            self._write_json(200, response)
+            return
+
+        self._write_json(404, {"error": f"Unknown path: {self.path}"})
+
+    def log_message(self, format, *args):  # noqa: A003
+        return
+
+    def _write_json(self, status: int, payload: dict) -> None:
+        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+def start_server(host: str = "127.0.0.1", port: int = 18080) -> tuple[ThreadingHTTPServer, threading.Thread]:
+    server = ThreadingHTTPServer((host, port), FakeOpenAIHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    return server, thread
+
+
+def stop_server(server: ThreadingHTTPServer, thread: threading.Thread | None = None) -> None:
+    server.shutdown()
+    server.server_close()
+    if thread is not None:
+        thread.join(timeout=2)
+
+
+if __name__ == "__main__":
+    httpd, worker = start_server()
+    print("fake_openai_server listening on http://127.0.0.1:18080/v1")
+    try:
+        worker.join()
+    except KeyboardInterrupt:
+        stop_server(httpd, worker)
--- a/backend/scripts/test_local_zep_e2e.py
+++ b/backend/scripts/test_local_zep_e2e.py
@ -0,0 +1,419 @@
+"""
+Zero-dependency end-to-end test for the local graph replacement.
+
+This test:
+1. Starts a fake OpenAI-compatible server
+2. Injects minimal runtime shims for missing third-party packages
+3. Runs ontology generation -> graph build -> entity read -> graph search
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import json
+import os
+import shutil
+import sys
+import tempfile
+import time
+import types
+import urllib.request
+from pathlib import Path
+from types import SimpleNamespace
+
+from fake_openai_server import start_server, stop_server
+
+
+ROOT = Path(__file__).resolve().parents[2]
+BACKEND_ROOT = ROOT / "backend"
+APP_ROOT = BACKEND_ROOT / "app"
+
+SAMPLE_TEXT = (
+    "Alice is a journalist at DailyNews. "
+    "DailyNews reports on GreenFuture, an environmental organization. "
+    "Alice supports GreenFuture. "
+    "PolluteCorp opposes GreenFuture. "
+    "Mayor Lee responds to Alice."
+)
+
+
+def _to_namespace(value):
+    if isinstance(value, dict):
+        return SimpleNamespace(**{key: _to_namespace(item) for key, item in value.items()})
+    if isinstance(value, list):
+        return [_to_namespace(item) for item in value]
+    return value
+
+
+def install_runtime_shims() -> None:
+    flask_module = types.ModuleType("flask")
+    flask_module.request = SimpleNamespace(headers={})
+    flask_module.has_request_context = lambda: False
+    flask_module.Flask = type("Flask", (), {})
+    sys.modules["flask"] = flask_module
+
+    flask_cors_module = types.ModuleType("flask_cors")
+    flask_cors_module.CORS = lambda *args, **kwargs: None
+    sys.modules["flask_cors"] = flask_cors_module
+
+    dotenv_module = types.ModuleType("dotenv")
+    dotenv_module.load_dotenv = lambda *args, **kwargs: False
+    sys.modules["dotenv"] = dotenv_module
+
+    pydantic_module = types.ModuleType("pydantic")
+
+    class FieldInfo:
+        def __init__(self, default=None, description=None):
+            self.default = default
+            self.description = description
+
+    def Field(*, default=None, description=None):
+        return FieldInfo(default=default, description=description)
+
+    class ConfigDict(dict):
+        pass
+
+    class BaseModel:
+        model_fields = {}
+
+        def __init_subclass__(cls, **kwargs):
+            super().__init_subclass__(**kwargs)
+            annotations = getattr(cls, "__annotations__", {})
+            model_fields = {}
+            for name in annotations:
+                value = getattr(cls, name, None)
+                if isinstance(value, FieldInfo):
+                    model_fields[name] = value
+            cls.model_fields = model_fields
+
+    pydantic_module.Field = Field
+    pydantic_module.BaseModel = BaseModel
+    pydantic_module.ConfigDict = ConfigDict
+    sys.modules["pydantic"] = pydantic_module
+
+    openai_module = types.ModuleType("openai")
+
+    class _ChatCompletions:
+        def __init__(self, client):
+            self.client = client
+
+        def create(self, **kwargs):
+            return self.client._post("/chat/completions", kwargs)
+
+    class _Embeddings:
+        def __init__(self, client):
+            self.client = client
+
+        def create(self, **kwargs):
+            return self.client._post("/embeddings", kwargs)
+
+    class OpenAI:
+        def __init__(self, api_key=None, base_url=None):
+            self.api_key = api_key or ""
+            self.base_url = (base_url or "").rstrip("/")
+            self.chat = SimpleNamespace(completions=_ChatCompletions(self))
+            self.embeddings = _Embeddings(self)
+
+        def _post(self, path: str, payload: dict):
+            body = json.dumps(payload).encode("utf-8")
+            request = urllib.request.Request(
+                self.base_url + path,
+                data=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {self.api_key}",
+                },
+                method="POST",
+            )
+            with urllib.request.urlopen(request, timeout=10) as response:
+                data = json.loads(response.read().decode("utf-8"))
+            return _to_namespace(data)
+
+    openai_module.OpenAI = OpenAI
+    sys.modules["openai"] = openai_module
+
+
+def ensure_package(name: str, path: Path) -> None:
+    module = types.ModuleType(name)
+    module.__path__ = [str(path)]
+    sys.modules[name] = module
+
+
+def load_module(name: str, file_path: Path):
+    spec = importlib.util.spec_from_file_location(name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[name] = module
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def load_backend_modules(temp_upload_root: Path):
+    sys.path.insert(0, str(BACKEND_ROOT))
+
+    ensure_package("app", APP_ROOT)
+    ensure_package("app.utils", APP_ROOT / "utils")
+    ensure_package("app.models", APP_ROOT / "models")
+    ensure_package("app.services", APP_ROOT / "services")
+
+    config_module = load_module("app.config", APP_ROOT / "config.py")
+    config_module.Config.UPLOAD_FOLDER = str(temp_upload_root)
+    config_module.Config.OASIS_SIMULATION_DATA_DIR = str(temp_upload_root / "simulations")
+
+    load_module("app.utils.logger", APP_ROOT / "utils" / "logger.py")
+    load_module("app.utils.locale", APP_ROOT / "utils" / "locale.py")
+    load_module("app.utils.file_parser", APP_ROOT / "utils" / "file_parser.py")
+    load_module("app.utils.llm_client", APP_ROOT / "utils" / "llm_client.py")
+    load_module("app.utils.zep_paging", APP_ROOT / "utils" / "zep_paging.py")
+    load_module("app.models.task", APP_ROOT / "models" / "task.py")
+    load_module("app.models.project", APP_ROOT / "models" / "project.py")
+    load_module("app.services.text_processor", APP_ROOT / "services" / "text_processor.py")
+    load_module("app.services.ontology_generator", APP_ROOT / "services" / "ontology_generator.py")
+    load_module("app.services.graph_builder", APP_ROOT / "services" / "graph_builder.py")
+    load_module("app.services.zep_entity_reader", APP_ROOT / "services" / "zep_entity_reader.py")
+    load_module("app.services.zep_tools", APP_ROOT / "services" / "zep_tools.py")
+
+    return {
+        "config": sys.modules["app.config"],
+        "file_parser": sys.modules["app.utils.file_parser"],
+        "project": sys.modules["app.models.project"],
+        "task": sys.modules["app.models.task"],
+        "text_processor": sys.modules["app.services.text_processor"],
+        "ontology": sys.modules["app.services.ontology_generator"],
+        "graph_builder": sys.modules["app.services.graph_builder"],
+        "entity_reader": sys.modules["app.services.zep_entity_reader"],
+        "zep_tools": sys.modules["app.services.zep_tools"],
+    }
+
+
+def wait_for_task(task_manager, task_id: str, timeout: int = 20):
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        task = task_manager.get_task(task_id)
+        if task and task.status.value in {"completed", "failed"}:
+            return task
+        time.sleep(0.2)
+    raise TimeoutError(f"Timed out waiting for task {task_id}")
+
+
+class FakeFileStorage:
+    def __init__(self, source_path: Path):
+        self.source_path = source_path
+
+    def save(self, destination: str) -> None:
+        shutil.copyfile(self.source_path, destination)
+
+
+def run_e2e() -> dict:
+    with tempfile.TemporaryDirectory(prefix="mirofish-local-zep-") as temp_dir:
+        temp_root = Path(temp_dir)
+        upload_root = temp_root / "uploads"
+        sample_path = temp_root / "seed.txt"
+        db_path = temp_root / "local_zep.sqlite3"
+        sample_path.write_text(SAMPLE_TEXT, encoding="utf-8")
+
+        os.environ["LLM_API_KEY"] = "fake-key"
+        os.environ["LLM_BASE_URL"] = "http://127.0.0.1:18080/v1"
+        os.environ["LLM_MODEL_NAME"] = "fake-chat-model"
+        os.environ["EMBEDDING_API_KEY"] = "fake-key"
+        os.environ["EMBEDDING_BASE_URL"] = "http://127.0.0.1:18080/v1"
+        os.environ["EMBEDDING_MODEL_NAME"] = "fake-embedding-model"
+        os.environ["RERANKER_API_KEY"] = "fake-key"
+        os.environ["RERANKER_BASE_URL"] = "http://127.0.0.1:18080/v1"
+        os.environ["RERANKER_MODEL_NAME"] = "fake-reranker-model"
+        os.environ["LOCAL_ZEP_DB_PATH"] = str(db_path)
+
+        install_runtime_shims()
+        modules = load_backend_modules(upload_root)
+
+        ProjectManager = modules["project"].ProjectManager
+        OntologyGenerator = modules["ontology"].OntologyGenerator
+        GraphBuilderService = modules["graph_builder"].GraphBuilderService
+        ZepEntityReader = modules["entity_reader"].ZepEntityReader
+        ZepToolsService = modules["zep_tools"].ZepToolsService
+        TaskManager = modules["task"].TaskManager
+        FileParser = modules["file_parser"].FileParser
+        TextProcessor = modules["text_processor"].TextProcessor
+
+        project = ProjectManager.create_project(name="Local Zep E2E")
+        project.simulation_requirement = "Simulate public sentiment around the GreenFuture environmental campaign."
+
+        file_info = ProjectManager.save_file_to_project(
+            project.project_id,
+            FakeFileStorage(sample_path),
+            "seed.txt",
+        )
+        project.files.append({"filename": file_info["original_filename"], "size": file_info["size"]})
+
+        extracted_text = TextProcessor.preprocess_text(FileParser.extract_text(file_info["path"]))
+        ProjectManager.save_extracted_text(project.project_id, extracted_text)
+        project.total_text_length = len(extracted_text)
+        ProjectManager.save_project(project)
+
+        ontology_generator = OntologyGenerator()
+        ontology = ontology_generator.generate(
+            document_texts=[extracted_text],
+            simulation_requirement=project.simulation_requirement,
+            additional_context=None,
+        )
+        assert len(ontology["entity_types"]) == 10, ontology["entity_types"]
+        assert any(item["name"] == "Person" for item in ontology["entity_types"])
+        assert any(item["name"] == "Organization" for item in ontology["entity_types"])
+
+        project.ontology = {
+            "entity_types": ontology["entity_types"],
+            "edge_types": ontology["edge_types"],
+        }
+        ProjectManager.save_project(project)
+
+        builder = GraphBuilderService(api_key="ignored-for-local")
+        task_id = builder.build_graph_async(
+            text=extracted_text,
+            ontology=project.ontology,
+            graph_name="Local Zep E2E Graph",
+            chunk_size=400,
+            chunk_overlap=40,
+            batch_size=2,
+        )
+        task = wait_for_task(TaskManager(), task_id)
+        assert task.status.value == "completed", task.error
+
+        graph_id = task.result["graph_id"]
+        graph_data = builder.get_graph_data(graph_id)
+        assert graph_data["node_count"] >= 5, graph_data
+        assert graph_data["edge_count"] >= 5, graph_data
+
+        reader = ZepEntityReader()
+        filtered = reader.filter_defined_entities(graph_id=graph_id, enrich_with_edges=True)
+        assert filtered.filtered_count >= 5, filtered.to_dict()
+
+        tools = ZepToolsService(api_key="ignored-for-local")
+        search = tools.search_graph(graph_id=graph_id, query="Alice supports GreenFuture", limit=5, scope="edges")
+        assert any("Alice supports GreenFuture" in fact for fact in search.facts), search.to_dict()
+        cross_encoder_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="Alice supports GreenFuture",
+            limit=3,
+            scope="edges",
+            reranker="cross_encoder",
+        )
+        assert cross_encoder_results.edges, cross_encoder_results
+        assert cross_encoder_results.edges[0].score is not None
+        assert cross_encoder_results.edges[0].relevance is not None
+
+        rrf_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="GreenFuture",
+            limit=3,
+            scope="both",
+            reranker="rrf",
+        )
+        assert rrf_results.edges or rrf_results.nodes, rrf_results
+
+        mmr_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="GreenFuture",
+            limit=3,
+            scope="edges",
+            reranker="mmr",
+            mmr_lambda=0.5,
+        )
+        assert mmr_results.edges, mmr_results
+
+        episode_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="Mayor Lee",
+            limit=2,
+            scope="episodes",
+            reranker="rrf",
+        )
+        assert episode_results.episodes, episode_results
+
+        temporal_episode = tools.client.graph.add(
+            graph_id=graph_id,
+            data="Alice opposes GreenFuture.",
+            type="text",
+            created_at="2025-01-01T00:00:00Z",
+            metadata={"source": "temporal_test"},
+            source_description="Temporal update test",
+        )
+        assert temporal_episode.metadata["source"] == "temporal_test"
+
+        temporal_edges = tools.client.graph.edge.get_by_graph_id(graph_id=graph_id, limit=20)
+        old_supports = [
+            edge
+            for edge in temporal_edges
+            if edge.name == "SUPPORTS" and edge.source_node_uuid and "Alice supports GreenFuture" in edge.fact
+        ]
+        new_opposes = [
+            edge
+            for edge in temporal_edges
+            if edge.name == "OPPOSES" and "Alice opposes GreenFuture" in edge.fact
+        ]
+        assert old_supports and old_supports[0].invalid_at is not None, [edge.fact for edge in temporal_edges]
+        assert new_opposes and new_opposes[0].valid_at == "2025-01-01T00:00:00Z", new_opposes
+
+        active_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="Alice GreenFuture",
+            limit=10,
+            scope="edges",
+            reranker="rrf",
+            search_filters={"invalid_at": [[{"comparison_operator": "IS NULL"}]]},
+        )
+        assert any("Alice opposes GreenFuture" in edge.fact for edge in active_results.edges)
+        assert not any("Alice supports GreenFuture" in edge.fact for edge in active_results.edges)
+
+        metadata_results = tools.client.graph.search(
+            graph_id=graph_id,
+            query="Alice opposes GreenFuture",
+            limit=5,
+            scope="episodes",
+            search_filters={
+                "episode_metadata_filters": {
+                    "type": "and",
+                    "filters": [
+                        {
+                            "property_name": "source",
+                            "comparison_operator": "=",
+                            "property_value": "temporal_test",
+                        }
+                    ],
+                }
+            },
+        )
+        assert metadata_results.episodes and metadata_results.episodes[0].uuid_ == temporal_episode.uuid_
+
+        stats = tools.get_graph_statistics(graph_id)
+        assert stats["total_nodes"] >= 5
+        assert stats["total_edges"] >= 5
+
+        summary = tools.get_entity_summary(graph_id=graph_id, entity_name="Alice")
+        assert summary["entity_info"] is not None, summary
+        assert summary["total_relations"] >= 2, summary
+
+        return {
+            "project_id": project.project_id,
+            "graph_id": graph_id,
+            "node_count": graph_data["node_count"],
+            "edge_count": graph_data["edge_count"],
+            "entity_count": filtered.filtered_count,
+            "search_facts": search.facts[:3],
+            "cross_encoder_relevance": cross_encoder_results.edges[0].relevance,
+            "episode_hits": len(episode_results.episodes),
+            "alice_relations": summary["total_relations"],
+        }
+
+
+def main() -> int:
+    server, thread = start_server()
+    try:
+        result = run_e2e()
+        print(json.dumps({"status": "ok", "result": result}, ensure_ascii=False, indent=2))
+        return 0
+    finally:
+        stop_server(server, thread)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/backend/zep_cloud/init.py
+++ b/backend/zep_cloud/init.py
@ -0,0 +1,7 @@
+from local_zep import EntityEdgeSourceTarget, EpisodeData, InternalServerError
+
+__all__ = [
+    "EntityEdgeSourceTarget",
+    "EpisodeData",
+    "InternalServerError",
+]
--- a/backend/zep_cloud/client.py
+++ b/backend/zep_cloud/client.py
@ -0,0 +1,3 @@
+from local_zep.client import Zep
+
+__all__ = ["Zep"]
--- a/backend/zep_cloud/external_clients/init.py
+++ b/backend/zep_cloud/external_clients/init.py
@ -0,0 +1,3 @@
+from .ontology import EdgeModel, EntityModel, EntityText
+
+__all__ = ["EdgeModel", "EntityModel", "EntityText"]
--- a/backend/zep_cloud/external_clients/ontology.py
+++ b/backend/zep_cloud/external_clients/ontology.py
@ -0,0 +1,11 @@
+from pydantic import BaseModel, ConfigDict
+
+EntityText = str
+
+
+class EntityModel(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+
+class EdgeModel(BaseModel):
+    model_config = ConfigDict(extra="allow")
--- a/frontend/src/api/index.js
+++ b/frontend/src/api/index.js
@ -1,9 +1,20 @@
 import axios from 'axios'
 import i18n from '../i18n'

+function resolveApiBaseURL() {
+  const configured = (import.meta.env.VITE_API_BASE_URL || '').trim()
+  if (configured) {
+    return configured
+  }
+
+  // API modules already call /api/... paths. Keeping baseURL empty makes
+  // remote browsers use the Vite same-origin proxy instead of their localhost.
+  return ''
+}
+
 // 创建axios实例
 const service = axios.create({
-  baseURL: import.meta.env.VITE_API_BASE_URL || 'http://localhost:5001',
+  baseURL: resolveApiBaseURL(),
  timeout: 300000, // 5分钟超时（本体生成可能需要较长时间）
  headers: {
    'Content-Type': 'application/json'
--- a/frontend/src/views/Process.vue
+++ b/frontend/src/views/Process.vue
@ -317,7 +317,7 @@
              <div class="detail-section">
                <div class="detail-label">接口说明</div>
                <div class="detail-content">
-                  基于生成的本体，将文档分块后调用 Zep API 构建知识图谱，提取实体和关系
+                  基于生成的本体，将文档分块后调用本地图谱引擎构建知识图谱，提取实体和关系
                </div>
              </div>
              
@ -2065,4 +2065,4 @@ onUnmounted(() => {
      display: none;
  }
 }
-</style>
+</style>
--- a/frontend/vite.config.js
+++ b/frontend/vite.config.js
@ -1,24 +1,58 @@
-import { defineConfig } from 'vite'
+import { defineConfig, loadEnv } from 'vite'
 import vue from '@vitejs/plugin-vue'
 import path from 'path'

+function parseAllowedHosts(value) {
+  if (!value) {
+    return ['localhost', '127.0.0.1', '.ts.net', '.beta.tailscale.net']
+  }
+
+  return value
+    .split(',')
+    .map(item => item.trim())
+    .filter(Boolean)
+}
+
+function buildHmrConfig(env) {
+  const hmr = {}
+
+  if (env.VITE_HMR_HOST) hmr.host = env.VITE_HMR_HOST
+  if (env.VITE_HMR_PROTOCOL) hmr.protocol = env.VITE_HMR_PROTOCOL
+  if (env.VITE_HMR_PORT) hmr.port = Number(env.VITE_HMR_PORT)
+  if (env.VITE_HMR_CLIENT_PORT) hmr.clientPort = Number(env.VITE_HMR_CLIENT_PORT)
+  if (env.VITE_HMR_PATH) hmr.path = env.VITE_HMR_PATH
+
+  return Object.keys(hmr).length ? hmr : undefined
+}
+
 // https://vite.dev/config/
-export default defineConfig({
-  plugins: [vue()],
-  resolve: {
-    alias: {
-      '@': path.resolve(__dirname, 'src'),
-      '@locales': path.resolve(__dirname, '../locales')
-    }
-  },
-  server: {
-    port: 3000,
-    open: true,
-    proxy: {
-      '/api': {
-        target: 'http://localhost:5001',
-        changeOrigin: true,
-        secure: false
+export default defineConfig(({ mode }) => {
+  const repoRoot = path.resolve(__dirname, '..')
+  const rootEnv = loadEnv(mode, repoRoot, '')
+  const frontendEnv = loadEnv(mode, __dirname, '')
+  const env = { ...rootEnv, ...frontendEnv, ...process.env }
+
+  return {
+    plugins: [vue()],
+    resolve: {
+      alias: {
+        '@': path.resolve(__dirname, 'src'),
+        '@locales': path.resolve(__dirname, '../locales')
+      }
+    },
+    server: {
+      host: env.VITE_DEV_HOST || '0.0.0.0',
+      port: Number(env.VITE_DEV_PORT || 3000),
+      strictPort: true,
+      open: env.VITE_OPEN_BROWSER ? env.VITE_OPEN_BROWSER === 'true' : true,
+      allowedHosts: parseAllowedHosts(env.VITE_ALLOWED_HOSTS),
+      hmr: buildHmrConfig(env),
+      proxy: {
+        '/api': {
+          target: env.VITE_DEV_PROXY_TARGET || 'http://127.0.0.1:5001',
+          changeOrigin: true,
+          secure: false
+        }
      }
    }
  }
--- a/locales/en.json
+++ b/locales/en.json
@ -85,7 +85,7 @@
    "ontologyDesc": "LLM analyzes document content and simulation requirements, extracts reality seeds, and auto-generates a suitable ontology structure",
    "analyzingDocs": "Analyzing documents...",
    "graphRagBuild": "GraphRAG Build",
-    "graphRagDesc": "Based on the generated ontology, documents are auto-chunked and sent to Zep to build a knowledge graph, extracting entities and relations, forming temporal memory and community summaries",
+    "graphRagDesc": "Based on the generated ontology, documents are auto-chunked and sent to the local graph engine to build a knowledge graph, extracting entities and relations, forming temporal memory and community summaries",
    "entityNodes": "Entity Nodes",
    "relationEdges": "Relation Edges",
    "schemaTypes": "Schema Types",
@ -328,7 +328,7 @@
    "noDocProcessed": "No documents were processed successfully. Please check file formats.",
    "requireProjectId": "Please provide project_id",
    "configError": "Configuration error: {details}",
-    "zepApiKeyMissing": "ZEP_API_KEY not configured",
+    "zepApiKeyMissing": "Local graph embeddings are not configured",
    "ontologyNotGenerated": "Ontology not yet generated. Please call /ontology/generate first.",
    "graphBuilding": "Graph build in progress. Do not resubmit. To force rebuild, add force: true.",
    "textNotFound": "Extracted text content not found",
@ -393,10 +393,10 @@
  "progress": {
    "initGraphService": "Initializing graph build service...",
    "textChunking": "Chunking text...",
-    "creatingZepGraph": "Creating Zep graph...",
+    "creatingZepGraph": "Creating local graph...",
    "settingOntology": "Setting ontology definition...",
    "addingChunks": "Adding {count} text chunks...",
-    "waitingZepProcess": "Waiting for Zep to process data...",
+    "waitingZepProcess": "Waiting for the local graph engine to process data...",
    "fetchingGraphData": "Fetching graph data...",
    "graphBuildComplete": "Graph build complete",
    "buildFailed": "Build failed: {error}",
@ -410,12 +410,12 @@
    "noEpisodesWait": "No episodes to wait for",
    "waitingEpisodes": "Waiting for {count} text chunks to process...",
    "episodesTimeout": "Some chunks timed out, {completed}/{total} completed",
-    "zepProcessing": "Zep processing... {completed}/{total} done, {pending} pending ({elapsed}s)",
+    "zepProcessing": "Local graph processing... {completed}/{total} done, {pending} pending ({elapsed}s)",
    "processingComplete": "Processing complete: {completed}/{total}",
    "taskComplete": "Task complete",
    "taskFailed": "Task failed",
    "startPreparingEnv": "Preparing simulation environment...",
-    "connectingZepGraph": "Connecting to Zep graph...",
+    "connectingZepGraph": "Connecting to local graph...",
    "readingNodeData": "Reading node data...",
    "readingComplete": "Done, {count} entities found",
    "startGenerating": "Starting generation...",
@ -492,7 +492,7 @@
    "detectedExistingPrep": "Detected existing preparation, using it directly",
    "prepareTaskStarted": "Preparation task started",
    "prepareTaskId": "  └─ Task ID: {taskId}",
-    "zepEntitiesFound": "Found {count} entities from Zep graph",
+    "zepEntitiesFound": "Found {count} entities from the local graph",
    "entityTypes": "  └─ Entity types: {types}",
    "startPollingProgress": "Polling preparation progress...",
    "prepareFailed": "Preparation failed: {error}",
@ -610,13 +610,13 @@
    "redirectToInsightForge": "get_simulation_context redirected to insight_forge"
  },
  "console": {
-    "zepToolsInitialized": "ZepToolsService initialized",
-    "zepRetryAttempt": "Zep {operation} attempt {attempt} failed: {error}, retrying in {delay}s...",
-    "zepAllRetriesFailed": "Zep {operation} failed after {retries} attempts: {error}",
+    "zepToolsInitialized": "Local graph tools initialized",
+    "zepRetryAttempt": "Local graph {operation} attempt {attempt} failed: {error}, retrying in {delay}s...",
+    "zepAllRetriesFailed": "Local graph {operation} failed after {retries} attempts: {error}",
    "graphSearch": "Graph search: graph_id={graphId}, query={query}...",
    "graphSearchOp": "Graph search (graph={graphId})",
    "searchComplete": "Search complete: found {count} relevant facts",
-    "zepSearchApiFallback": "Zep Search API failed, falling back to local search: {error}",
+    "zepSearchApiFallback": "Semantic search failed, falling back to local keyword search: {error}",
    "usingLocalSearch": "Using local search: query={query}...",
    "localSearchComplete": "Local search complete: found {count} relevant facts",
    "localSearchFailed": "Local search failed: {error}",
--- a/locales/zh.json
+++ b/locales/zh.json
@ -85,7 +85,7 @@
    "ontologyDesc": "LLM分析文档内容与模拟需求，提取出现实种子，自动生成合适的本体结构",
    "analyzingDocs": "正在分析文档...",
    "graphRagBuild": "GraphRAG构建",
-    "graphRagDesc": "基于生成的本体，将文档自动分块后调用 Zep 构建知识图谱，提取实体和关系，并形成时序记忆与社区摘要",
+    "graphRagDesc": "基于生成的本体，将文档自动分块后送入本地图谱引擎构建知识图谱，提取实体和关系，并形成时序记忆与社区摘要",
    "entityNodes": "实体节点",
    "relationEdges": "关系边",
    "schemaTypes": "SCHEMA类型",
@ -328,7 +328,7 @@
    "noDocProcessed": "没有成功处理任何文档，请检查文件格式",
    "requireProjectId": "请提供 project_id",
    "configError": "配置错误: {details}",
-    "zepApiKeyMissing": "ZEP_API_KEY未配置",
+    "zepApiKeyMissing": "本地图谱 Embeddings 配置未完成",
    "ontologyNotGenerated": "项目尚未生成本体，请先调用 /ontology/generate",
    "graphBuilding": "图谱正在构建中，请勿重复提交。如需强制重建，请添加 force: true",
    "textNotFound": "未找到提取的文本内容",
@ -393,10 +393,10 @@
  "progress": {
    "initGraphService": "初始化图谱构建服务...",
    "textChunking": "文本分块中...",
-    "creatingZepGraph": "创建Zep图谱...",
+    "creatingZepGraph": "创建本地图谱...",
    "settingOntology": "设置本体定义...",
    "addingChunks": "开始添加 {count} 个文本块...",
-    "waitingZepProcess": "等待Zep处理数据...",
+    "waitingZepProcess": "等待本地图谱引擎处理数据...",
    "fetchingGraphData": "获取图谱数据...",
    "graphBuildComplete": "图谱构建完成",
    "buildFailed": "构建失败: {error}",
@ -410,12 +410,12 @@
    "noEpisodesWait": "无需等待（没有 episode）",
    "waitingEpisodes": "开始等待 {count} 个文本块处理...",
    "episodesTimeout": "部分文本块超时，已完成 {completed}/{total}",
-    "zepProcessing": "Zep处理中... {completed}/{total} 完成, {pending} 待处理 ({elapsed}秒)",
+    "zepProcessing": "本地图谱处理中... {completed}/{total} 完成, {pending} 待处理 ({elapsed}秒)",
    "processingComplete": "处理完成: {completed}/{total}",
    "taskComplete": "任务完成",
    "taskFailed": "任务失败",
    "startPreparingEnv": "开始准备模拟环境...",
-    "connectingZepGraph": "正在连接Zep图谱...",
+    "connectingZepGraph": "正在连接本地图谱...",
    "readingNodeData": "正在读取节点数据...",
    "readingComplete": "完成，共 {count} 个实体",
    "startGenerating": "开始生成...",
@ -492,7 +492,7 @@
    "detectedExistingPrep": "检测到已有完成的准备工作，直接使用",
    "prepareTaskStarted": "准备任务已启动",
    "prepareTaskId": "  └─ Task ID: {taskId}",
-    "zepEntitiesFound": "从Zep图谱读取到 {count} 个实体",
+    "zepEntitiesFound": "从本地图谱读取到 {count} 个实体",
    "entityTypes": "  └─ 实体类型: {types}",
    "startPollingProgress": "开始轮询准备进度...",
    "prepareFailed": "准备失败: {error}",
@ -610,13 +610,13 @@
    "redirectToInsightForge": "get_simulation_context 已重定向到 insight_forge"
  },
  "console": {
-    "zepToolsInitialized": "ZepToolsService 初始化完成",
-    "zepRetryAttempt": "Zep {operation} 第 {attempt} 次尝试失败: {error}, {delay}秒后重试...",
-    "zepAllRetriesFailed": "Zep {operation} 在 {retries} 次尝试后仍失败: {error}",
+    "zepToolsInitialized": "本地图谱工具初始化完成",
+    "zepRetryAttempt": "本地图谱 {operation} 第 {attempt} 次尝试失败: {error}, {delay}秒后重试...",
+    "zepAllRetriesFailed": "本地图谱 {operation} 在 {retries} 次尝试后仍失败: {error}",
    "graphSearch": "图谱搜索: graph_id={graphId}, query={query}...",
    "graphSearchOp": "图谱搜索(graph={graphId})",
    "searchComplete": "搜索完成: 找到 {count} 条相关事实",
-    "zepSearchApiFallback": "Zep Search API失败，降级为本地搜索: {error}",
+    "zepSearchApiFallback": "语义检索失败，降级为本地关键词搜索: {error}",
    "usingLocalSearch": "使用本地搜索: query={query}...",
    "localSearchComplete": "本地搜索完成: 找到 {count} 条相关事实",
    "localSearchFailed": "本地搜索失败: {error}",
--- a/package.json
+++ b/package.json
@ -4,10 +4,10 @@
  "description": "MiroFish - 简洁通用的群体智能引擎，预测万物",
  "scripts": {
    "setup": "npm install && cd frontend && npm install",
-    "setup:backend": "cd backend && uv sync",
+    "setup:backend": "python3 -m venv venv && ./venv/bin/python -m pip install --upgrade pip && ./venv/bin/python -m pip install -r backend/requirements.txt",
    "setup:all": "npm run setup && npm run setup:backend",
    "dev": "concurrently --kill-others -n \"backend,frontend\" -c \"green,cyan\" \"npm run backend\" \"npm run frontend\"",
-    "backend": "cd backend && uv run python run.py",
+    "backend": "./venv/bin/python backend/run.py",
    "frontend": "cd frontend && npm run dev",
    "build": "cd frontend && npm run build"
  },