From 28827067c0ae3d24df6ba953f3005f1d9cc6b6dc Mon Sep 17 00:00:00 2001 From: Abhishek Yadav Date: Sun, 22 Mar 2026 01:30:28 +0530 Subject: [PATCH 1/3] feat: migrate knowledge graph from Zep Cloud to Graphiti + local Neo4j MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the paid, rate-limited Zep Cloud service with Graphiti (graphiti-core 0.11.6) backed by a local Neo4j instance — free, unlimited, and self-hosted. Key changes: - Add GraphitiAdapter: drop-in Zep-compatible wrapper around Graphiti with a persistent event-loop thread to avoid asyncio/Neo4j driver conflicts - Switch LLM client to native GeminiClient + GeminiEmbedder (text-embedding-004 fails on Gemini compat endpoint; use google-genai SDK directly) - Add _GeminiReranker passthrough replacing OpenAIRerankerClient (which hardcodes gpt-4.1-nano and uses logprobs unsupported by Gemini) - Fix Cypher queries: use s.uuid/t.uuid for edge source/target instead of r.source_node_uuid (null property in Graphiti's schema) - Add ontology-based entity type classifier (_classify_entity_type) so nodes get colored by type in the D3 graph visualization instead of all being Entity - Apply classifier in ZepEntityReader so filter_defined_entities finds entities (previously 0 personas loaded because all labels were ['Entity']) - Add startup recovery: auto-mark graph_building projects as graph_completed on backend restart if Neo4j already has their data - Add resume capability to graph build: skip already-processed episodes after a restart (断点续传) - Add non-blocking graph data cache with background refresh in graph.py - Add EMBEDDING_MODEL config (default: gemini-embedding-001 for Gemini users) - Add CLAUDE.md with project architecture and dev commands Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 86 +++ backend/app/__init__.py | 30 +- backend/app/api/graph.py | 119 +++-- backend/app/config.py | 15 +- backend/app/services/graph_builder.py | 172 +++--- backend/app/services/graphiti_adapter.py | 491 ++++++++++++++++++ .../app/services/oasis_profile_generator.py | 12 +- backend/app/services/zep_entity_reader.py | 46 +- .../app/services/zep_graph_memory_updater.py | 9 +- backend/app/services/zep_tools.py | 36 +- backend/app/utils/zep_paging.py | 17 +- backend/pyproject.toml | 8 +- backend/uv.lock | 118 ++++- 13 files changed, 941 insertions(+), 218 deletions(-) create mode 100644 CLAUDE.md create mode 100644 backend/app/services/graphiti_adapter.py diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..43711aa2 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,86 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +MiroFish is a multi-agent swarm intelligence prediction engine. It builds knowledge graphs from seed data, simulates thousands of AI agents interacting on virtual Twitter/Reddit platforms (via CAMEL-OASIS), and generates analytical reports — all to predict outcomes of real-world scenarios. + +## Commands + +### Setup +```bash +npm run setup:all # Install all dependencies (frontend + backend) +npm run setup # Frontend npm install only +npm run setup:backend # Backend: uv sync (Python deps) +``` + +### Development +```bash +npm run dev # Run backend + frontend concurrently +npm run backend # Backend only: Flask on port 5001 +npm run frontend # Frontend only: Vite on port 3000 +``` + +### Build +```bash +npm run build # Build frontend (Vite) +``` + +### Backend (Python) +```bash +cd backend && uv run python run.py # Start Flask server +cd backend && uv run python -m pytest # Run tests (if any) +``` + +### Docker +```bash +docker-compose up # Full stack via Docker +``` + +## Architecture + +### Stack +- **Backend**: Python ≥3.11 Flask 3.0, managed by `uv` +- **Frontend**: Vue 3 + Vite, port 3000; proxies `/api` → port 5001 +- **LLM**: OpenAI SDK-compatible (default: Qwen via `dashscope`; also works with GLM, OpenAI) +- **Memory/Graph**: Zep Cloud (knowledge graph for entity storage and retrieval) +- **Simulation**: CAMEL-OASIS (multi-agent Twitter + Reddit simulation) +- **Visualization**: D3.js + +### Required Environment Variables +Copy `.env.example` to `.env`: +``` +LLM_API_KEY # Required +LLM_BASE_URL # Default: https://dashscope.aliyuncs.com/compatible-mode/v1 +LLM_MODEL_NAME # Default: qwen-plus +ZEP_API_KEY # Required (Zep Cloud) +``` + +### 5-Step Pipeline +The core workflow is a sequential async pipeline: +1. **Graph Build** — Upload files → LLM extracts ontology → Zep Cloud builds knowledge graph +2. **Env Setup** — Read Zep entities → Generate OASIS agent profiles (AI personalities) +3. **Simulation** — CAMEL-OASIS runs agents on dual platforms (Twitter + Reddit) in parallel +4. **Report** — ReportAgent (ReACT loop) queries graph with tools: `SearchResult`, `InsightForge`, `Panorama`, `Interview` +5. **Interaction** — Chat with simulated agents or the ReportAgent + +### Backend Structure (`backend/app/`) +- `api/` — Flask blueprints: `graph_bp`, `simulation_bp`, `report_bp` +- `services/` — Core logic: graph building, simulation runner, report agent, Zep tools +- `models/` — `Project` and `Task` state objects (in-memory, JSON-serializable) +- `utils/` — LLM client wrapper, file parser, retry logic, Zep pagination +- `config/config.py` — All configuration (LLM, Zep, chunking, simulation params) + +Long-running operations (ontology generation, graph build, profile generation, report generation) run as background tasks tracked via `Task` objects with progress polling. + +### Frontend Structure (`frontend/src/`) +- `views/` — Page components mapped to routes; `Process.vue` is the main 50KB workflow orchestrator +- `components/` — `Step1-5` step components + `GraphPanel.vue` (D3 graph visualization) +- `api/` — Axios services (`graph.js`, `simulation.js`, `report.js`) with 5-min timeout and exponential retry + +### Key Implementation Details +- Reasoning model outputs (e.g., MiniMax/GLM with `` tags or markdown code fences) are stripped before processing — see recent fix in commit `985f89f` +- Simulation state is managed in `SimulationManager`; IPC between processes via `simulation_ipc.py` +- Interview/chat with agents uses prefix injection to suppress tool calls in responses +- Default simulation: max 10 rounds, Twitter actions include CREATE_POST/LIKE/REPOST/FOLLOW/QUOTE/DO_NOTHING; Reddit adds CREATE_COMMENT/DISLIKE diff --git a/backend/app/__init__.py b/backend/app/__init__.py index aba624bb..6f3345cd 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -73,8 +73,36 @@ def create_app(config_class=Config): def health(): return {'status': 'ok', 'service': 'MiroFish Backend'} + # On startup: recover any projects stuck in graph_building (task was killed by restart) + if should_log_startup: + _recover_stuck_projects() + if should_log_startup: logger.info("MiroFish Backend 启动完成") - + return app + +def _recover_stuck_projects(): + """Mark graph_building projects as completed if Neo4j already has their data.""" + from .models.project import ProjectManager, ProjectStatus + from .utils.logger import get_logger as _get_logger + _log = _get_logger('mirofish.startup') + try: + for p in ProjectManager.list_projects(): + if p.status == ProjectStatus.GRAPH_BUILDING and p.graph_id: + from .services.graphiti_adapter import _get_graphiti, _run, _neo4j_query + g = _get_graphiti() + r = _run(_neo4j_query(g, + 'MATCH (n:Entity {group_id: $gid}) RETURN count(n) AS n', + {'gid': p.graph_id} + )) + node_count = int(r[0]['n']) if r else 0 + if node_count > 0: + p.status = ProjectStatus.GRAPH_COMPLETED + p.graph_build_task_id = None + ProjectManager.save_project(p) + _log.info(f"Recovered stuck project {p.project_id}: {node_count} nodes found, marked graph_completed") + except Exception as e: + _get_logger('mirofish.startup').warning(f"Startup recovery failed: {e}") + diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 12ff1ba2..f432269d 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -4,6 +4,7 @@ """ import os +import time import traceback import threading from flask import request, jsonify @@ -18,6 +19,12 @@ from ..utils.logger import get_logger from ..models.task import TaskManager, TaskStatus from ..models.project import ProjectManager, ProjectStatus +# In-memory cache for graph data to avoid hammering Zep's rate-limited API. +# Stale cache is served instantly on 429; a background thread refreshes it. +_graph_data_cache: dict = {} # graph_id -> {"data": ..., "ts": float} +_graph_refresh_locks: dict = {} # graph_id -> threading.Lock (one refresh at a time) +_GRAPH_CACHE_TTL = 300 # seconds before triggering a background refresh + # 获取日志器 logger = get_logger('mirofish.api') @@ -284,8 +291,8 @@ def build_graph(): # 检查配置 errors = [] - if not Config.ZEP_API_KEY: - errors.append("ZEP_API_KEY未配置") + if not Config.NEO4J_PASSWORD: + errors.append("NEO4J未配置") if errors: logger.error(f"配置错误: {errors}") return jsonify({ @@ -382,7 +389,7 @@ def build_graph(): ) # 创建图谱构建服务 - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) + builder = GraphBuilderService() # 分块 task_manager.update_task( @@ -426,17 +433,30 @@ def build_graph(): progress=progress ) - task_manager.update_task( - task_id, - message=f"开始添加 {total_chunks} 个文本块...", - progress=15 - ) - + # Count already-processed episodes to resume after a restart + from app.services.graphiti_adapter import _get_graphiti, _run, _neo4j_query + try: + g = _get_graphiti() + ep_count = _run(_neo4j_query(g, + 'MATCH (e:Episodic {group_id: $gid}) RETURN count(e) AS n', + {'gid': graph_id} + )) + already_done = int(ep_count[0]['n']) if ep_count else 0 + except Exception: + already_done = 0 + + skip_chunks = already_done + remaining = total_chunks - skip_chunks + msg_start = (f"断点续传:跳过 {skip_chunks} 个已处理块,继续处理 {remaining} 块..." + if skip_chunks > 0 else f"开始添加 {total_chunks} 个文本块...") + task_manager.update_task(task_id, message=msg_start, progress=15) + episode_uuids = builder.add_text_batches( - graph_id, + graph_id, chunks, batch_size=3, - progress_callback=add_progress_callback + progress_callback=add_progress_callback, + skip_chunks=skip_chunks, ) # 等待Zep处理完成(查询每个episode的processed状态) @@ -561,32 +581,59 @@ def list_tasks(): # ============== 图谱数据接口 ============== +def _refresh_graph_cache(graph_id: str): + """Background thread: fetch graph data from Neo4j and update cache.""" + lock = _graph_refresh_locks.setdefault(graph_id, threading.Lock()) + if not lock.acquire(blocking=False): + return # another refresh already in progress + try: + # Look up ontology from the project that owns this graph_id + ontology = None + for project in ProjectManager.list_projects(): + if project.graph_id == graph_id and project.ontology: + ontology = project.ontology + break + + builder = GraphBuilderService() + graph_data = builder.get_graph_data(graph_id, ontology=ontology) + _graph_data_cache[graph_id] = {"data": graph_data, "ts": time.time()} + logger.info(f"Graph cache refreshed for {graph_id}") + except Exception as e: + logger.warning(f"Background graph cache refresh failed for {graph_id}: {str(e)[:100]}") + finally: + lock.release() + + @graph_bp.route('/data/', methods=['GET']) def get_graph_data(graph_id: str): """ - 获取图谱数据(节点和边) + 获取图谱数据(节点和边)。 + - 有缓存且未过期:直接返回缓存,不调用 Zep + - 有缓存但已过期:立即返回旧缓存,后台异步刷新 + - 无缓存:后台线程拉取,返回 202 让前端稍后重试 """ - try: - if not Config.ZEP_API_KEY: - return jsonify({ - "success": False, - "error": "ZEP_API_KEY未配置" - }), 500 - - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) - graph_data = builder.get_graph_data(graph_id) - - return jsonify({ - "success": True, - "data": graph_data - }) - - except Exception as e: - return jsonify({ - "success": False, - "error": str(e), - "traceback": traceback.format_exc() - }), 500 + if not Config.NEO4J_PASSWORD: + return jsonify({"success": False, "error": "NEO4J未配置"}), 500 + + cached = _graph_data_cache.get(graph_id) + age = time.time() - cached["ts"] if cached else None + + if cached and age < _GRAPH_CACHE_TTL: + # Fresh cache — return immediately + return jsonify({"success": True, "data": cached["data"], "cached": True}) + + if cached: + # Stale cache — serve it immediately, refresh in background + threading.Thread(target=_refresh_graph_cache, args=(graph_id,), daemon=True).start() + return jsonify({"success": True, "data": cached["data"], "cached": True, "stale": True}) + + # No cache at all — kick off background fetch, tell frontend to retry + threading.Thread(target=_refresh_graph_cache, args=(graph_id,), daemon=True).start() + return jsonify({ + "success": False, + "error": "Graph data is loading, please retry in a moment.", + "retry": True + }), 202 @graph_bp.route('/delete/', methods=['DELETE']) @@ -595,13 +642,13 @@ def delete_graph(graph_id: str): 删除Zep图谱 """ try: - if not Config.ZEP_API_KEY: + if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "NEO4J未配置" }), 500 - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) + builder = GraphBuilderService() builder.delete_graph(graph_id) return jsonify({ diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50..0552394d 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -32,8 +32,15 @@ class Config: LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') - # Zep配置 - ZEP_API_KEY = os.environ.get('ZEP_API_KEY') + # Neo4j + Graphiti配置(替代 Zep Cloud) + NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') + NEO4J_USER = os.environ.get('NEO4J_USER', 'neo4j') + NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', 'mirofish123') + # Embedding model — override when using non-OpenAI APIs (e.g. Gemini: text-embedding-004) + EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'text-embedding-3-small') + + # Zep配置(保留兼容性,已废弃) + ZEP_API_KEY = os.environ.get('ZEP_API_KEY', '') # 文件上传配置 MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB @@ -69,7 +76,7 @@ class Config: errors = [] if not cls.LLM_API_KEY: errors.append("LLM_API_KEY 未配置") - if not cls.ZEP_API_KEY: - errors.append("ZEP_API_KEY 未配置") + if not cls.NEO4J_PASSWORD: + errors.append("NEO4J_PASSWORD 未配置") return errors diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 0e0444bf..39e14282 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -10,8 +10,7 @@ import threading from typing import Dict, Any, List, Optional, Callable from dataclasses import dataclass -from zep_cloud.client import Zep -from zep_cloud import EpisodeData, EntityEdgeSourceTarget +from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..models.task import TaskManager, TaskStatus @@ -19,6 +18,54 @@ from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges from .text_processor import TextProcessor +def _classify_entity_type(name: str, summary: str, ontology: Optional[Dict]) -> str: + """ + Classify an entity into an ontology type using keyword matching + against entity type names, descriptions, and examples. + Falls back to 'Entity' if no ontology or no match found. + """ + if not ontology: + return "Entity" + entity_types = ontology.get("entity_types", []) + if not entity_types: + return "Entity" + + name_lower = (name or "").lower() + summary_lower = (summary or "").lower() + search_text = f"{name_lower} {summary_lower}" + + best_type = "Entity" + best_score = 0 + + for et in entity_types: + score = 0 + type_name = et.get("name", "") + type_name_lower = type_name.lower() + + # Exact name match in type name + if type_name_lower in name_lower: + score += 10 + + # Check examples list + for example in et.get("examples", []): + if example.lower() in search_text: + score += 8 + elif name_lower in example.lower(): + score += 6 + + # Check description keywords + desc_words = (et.get("description", "")).lower().split() + for word in desc_words: + if len(word) > 4 and word in search_text: + score += 1 + + if score > best_score: + best_score = score + best_type = type_name + + return best_type if best_score > 0 else "Entity" + + @dataclass class GraphInfo: """图谱信息""" @@ -43,11 +90,7 @@ class GraphBuilderService: """ def __init__(self, api_key: Optional[str] = None): - self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") - - self.client = Zep(api_key=self.api_key) + self.client = GraphitiAdapter() self.task_manager = TaskManager() def build_graph_async( @@ -197,106 +240,27 @@ class GraphBuilderService: return graph_id def set_ontology(self, graph_id: str, ontology: Dict[str, Any]): - """设置图谱本体(公开方法)""" - import warnings - from typing import Optional - from pydantic import Field - from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel - - # 抑制 Pydantic v2 关于 Field(default=None) 的警告 - # 这是 Zep SDK 要求的用法,警告来自动态类创建,可以安全忽略 - warnings.filterwarnings('ignore', category=UserWarning, module='pydantic') - - # Zep 保留名称,不能作为属性名 - RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} - - def safe_attr_name(attr_name: str) -> str: - """将保留名称转换为安全名称""" - if attr_name.lower() in RESERVED_NAMES: - return f"entity_{attr_name}" - return attr_name - - # 动态创建实体类型 - entity_types = {} - for entity_def in ontology.get("entity_types", []): - name = entity_def["name"] - description = entity_def.get("description", f"A {name} entity.") - - # 创建属性字典和类型注解(Pydantic v2 需要) - attrs = {"__doc__": description} - annotations = {} - - for attr_def in entity_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 - attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[EntityText] # 类型注解 - - attrs["__annotations__"] = annotations - - # 动态创建类 - entity_class = type(name, (EntityModel,), attrs) - entity_class.__doc__ = description - entity_types[name] = entity_class - - # 动态创建边类型 - edge_definitions = {} - for edge_def in ontology.get("edge_types", []): - name = edge_def["name"] - description = edge_def.get("description", f"A {name} relationship.") - - # 创建属性字典和类型注解 - attrs = {"__doc__": description} - annotations = {} - - for attr_def in edge_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 - attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[str] # 边属性用str类型 - - attrs["__annotations__"] = annotations - - # 动态创建类 - class_name = ''.join(word.capitalize() for word in name.split('_')) - edge_class = type(class_name, (EdgeModel,), attrs) - edge_class.__doc__ = description - - # 构建source_targets - source_targets = [] - for st in edge_def.get("source_targets", []): - source_targets.append( - EntityEdgeSourceTarget( - source=st.get("source", "Entity"), - target=st.get("target", "Entity") - ) - ) - - if source_targets: - edge_definitions[name] = (edge_class, source_targets) - - # 调用Zep API设置本体 - if entity_types or edge_definitions: - self.client.graph.set_ontology( - graph_ids=[graph_id], - entities=entity_types if entity_types else None, - edges=edge_definitions if edge_definitions else None, - ) + """设置图谱本体提示(Graphiti自动提取实体,本体作为提示存储)""" + self.client.graph.set_ontology( + graph_ids=[graph_id], + entities=ontology.get("entity_types"), + edges=ontology.get("edge_types"), + ) def add_text_batches( self, graph_id: str, chunks: List[str], batch_size: int = 3, - progress_callback: Optional[Callable] = None + progress_callback: Optional[Callable] = None, + skip_chunks: int = 0, ) -> List[str]: - """分批添加文本到图谱,返回所有 episode 的 uuid 列表""" + """分批添加文本到图谱,返回所有 episode 的 uuid 列表。 + skip_chunks: 跳过已处理的块数(用于断点续传)。""" episode_uuids = [] total_chunks = len(chunks) - - for i in range(0, total_chunks, batch_size): + + for i in range(skip_chunks, total_chunks, batch_size): batch_chunks = chunks[i:i + batch_size] batch_num = i // batch_size + 1 total_batches = (total_chunks + batch_size - 1) // batch_size @@ -307,10 +271,11 @@ class GraphBuilderService: f"发送第 {batch_num}/{total_batches} 批数据 ({len(batch_chunks)} 块)...", progress ) + # 构建episode数据 episodes = [ - EpisodeData(data=chunk, type="text") + type('Episode', (), {'data': chunk, 'type': 'text'})() for chunk in batch_chunks ] @@ -417,7 +382,7 @@ class GraphBuilderService: entity_types=list(entity_types) ) - def get_graph_data(self, graph_id: str) -> Dict[str, Any]: + def get_graph_data(self, graph_id: str, ontology: Optional[Dict] = None) -> Dict[str, Any]: """ 获取完整图谱数据(包含详细信息) @@ -442,10 +407,15 @@ class GraphBuilderService: if created_at: created_at = str(created_at) + entity_type = _classify_entity_type(node.name, node.summary or "", ontology) + labels = node.labels or [] + if entity_type != "Entity" and entity_type not in labels: + labels = [entity_type] + [l for l in labels if l != "Entity"] + nodes_data.append({ "uuid": node.uuid_, "name": node.name, - "labels": node.labels or [], + "labels": labels, "summary": node.summary or "", "attributes": node.attributes or {}, "created_at": created_at, diff --git a/backend/app/services/graphiti_adapter.py b/backend/app/services/graphiti_adapter.py new file mode 100644 index 00000000..cf9cf182 --- /dev/null +++ b/backend/app/services/graphiti_adapter.py @@ -0,0 +1,491 @@ +""" +Graphiti Adapter — Drop-in replacement for the Zep Cloud client. + +Exposes the same namespace as the Zep client so all consuming code +(graph_builder, zep_tools, zep_entity_reader, etc.) needs only a +one-line import swap: + + from .graphiti_adapter import GraphitiAdapter + self.client = GraphitiAdapter() + +Then all self.client.graph.* calls work unchanged. +""" + +import asyncio +import threading +import uuid as _uuid_mod +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from graphiti_core import Graphiti +from graphiti_core.nodes import EpisodeType, EntityNode +from graphiti_core.edges import EntityEdge +from graphiti_core.search.search_config import SearchConfig, SearchResults +from graphiti_core.search.search_config_recipes import ( + NODE_HYBRID_SEARCH_RRF, + EDGE_HYBRID_SEARCH_RRF, +) +from graphiti_core.llm_client.config import LLMConfig +from graphiti_core.llm_client.gemini_client import GeminiClient +from graphiti_core.embedder.gemini import GeminiEmbedder, GeminiEmbedderConfig +from graphiti_core.cross_encoder.client import CrossEncoderClient + +from ..config import Config +from ..utils.logger import get_logger + +logger = get_logger('mirofish.graphiti_adapter') + + +class _GeminiReranker(CrossEncoderClient): + """Simple reranker using Gemini — returns passages sorted by relevance.""" + + def __init__(self, client: GeminiClient): + self._client = client + + async def rank(self, query: str, passages: list[str]) -> list[tuple[str, float]]: + if not passages: + return [] + # Return in original order — Gemini doesn't support logprobs for reranking + # This is a no-op reranker: correct but unoptimized ordering + return [(p, 1.0 - i * 0.01) for i, p in enumerate(passages)] + +# --------------------------------------------------------------------------- +# Persistent event loop in a dedicated background thread. +# All async calls are submitted here so the Neo4j driver (which is bound +# to one event loop) never crosses loop boundaries. +# --------------------------------------------------------------------------- +_loop: Optional[asyncio.AbstractEventLoop] = None +_loop_thread: Optional[threading.Thread] = None +_loop_lock = threading.Lock() + + +def _get_loop() -> asyncio.AbstractEventLoop: + global _loop, _loop_thread + if _loop is None: + with _loop_lock: + if _loop is None: + _loop = asyncio.new_event_loop() + _loop_thread = threading.Thread( + target=_loop.run_forever, daemon=True, name="graphiti-event-loop" + ) + _loop_thread.start() + return _loop + + +def _run(coro): + """Submit coroutine to the persistent event loop thread and wait for result.""" + future = asyncio.run_coroutine_threadsafe(coro, _get_loop()) + return future.result(timeout=300) + + +# --------------------------------------------------------------------------- +# Singleton Graphiti instance (one Neo4j driver for the whole process) +# --------------------------------------------------------------------------- +_graphiti_instance: Optional[Graphiti] = None +_graphiti_lock = threading.Lock() + + +def _get_graphiti() -> Graphiti: + global _graphiti_instance + if _graphiti_instance is None: + with _graphiti_lock: + if _graphiti_instance is None: + logger.info("Initializing Graphiti client...") + llm_cfg = LLMConfig( + api_key=Config.LLM_API_KEY, + model=Config.LLM_MODEL_NAME, + ) + llm_client = GeminiClient(config=llm_cfg) + embedder = GeminiEmbedder( + config=GeminiEmbedderConfig( + api_key=Config.LLM_API_KEY, + embedding_model=Config.EMBEDDING_MODEL, + ) + ) + cross_encoder = _GeminiReranker(llm_client) + g = Graphiti( + Config.NEO4J_URI, + Config.NEO4J_USER, + Config.NEO4J_PASSWORD, + llm_client=llm_client, + embedder=embedder, + cross_encoder=cross_encoder, + ) + # Use the persistent loop so the driver is bound to it from the start + _run(g.build_indices_and_constraints()) + _graphiti_instance = g + logger.info("Graphiti client ready.") + return _graphiti_instance + + +# --------------------------------------------------------------------------- +# Compatibility data classes (mimic Zep response objects) +# --------------------------------------------------------------------------- + +@dataclass +class _NodeResult: + """Zep-compatible node object.""" + uuid_: str + name: str + labels: List[str] + summary: str + attributes: Dict[str, Any] + created_at: Optional[str] = None + + @property + def uuid(self): + return self.uuid_ + + +@dataclass +class _EdgeResult: + """Zep-compatible edge object.""" + uuid_: str + name: str + fact: str + source_node_uuid: str + target_node_uuid: str + attributes: Dict[str, Any] + created_at: Optional[str] = None + valid_at: Optional[str] = None + invalid_at: Optional[str] = None + expired_at: Optional[str] = None + + @property + def uuid(self): + return self.uuid_ + + +@dataclass +class _EpisodeResult: + """Zep-compatible episode object — always processed (Graphiti is sync).""" + uuid_: str + processed: bool = True + + @property + def uuid(self): + return self.uuid_ + + +@dataclass +class _SearchResults: + """Zep-compatible search result object.""" + edges: List[_EdgeResult] = field(default_factory=list) + nodes: List[_NodeResult] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Helpers: convert Graphiti objects → Zep-compatible objects +# --------------------------------------------------------------------------- + +def _to_ts(dt: Optional[datetime]) -> Optional[str]: + if dt is None: + return None + return dt.isoformat() + + +def _entity_node_to_result(n: EntityNode) -> _NodeResult: + return _NodeResult( + uuid_=n.uuid, + name=n.name, + labels=list(n.labels) if n.labels else ["Entity"], + summary=n.summary or "", + attributes=n.attributes or {}, + created_at=_to_ts(n.created_at), + ) + + +def _entity_edge_to_result(e: EntityEdge) -> _EdgeResult: + return _EdgeResult( + uuid_=e.uuid, + name=e.name or "", + fact=e.fact or "", + source_node_uuid=e.source_node_uuid, + target_node_uuid=e.target_node_uuid, + attributes={}, + created_at=_to_ts(e.created_at), + valid_at=_to_ts(e.valid_at), + invalid_at=_to_ts(e.invalid_at), + expired_at=_to_ts(e.expired_at), + ) + + +def _neo4j_record_to_node(record: Dict) -> _NodeResult: + labels = record.get("labels", ["Entity"]) + if isinstance(labels, (list, tuple)): + labels = [str(l) for l in labels] + return _NodeResult( + uuid_=record.get("uuid", ""), + name=record.get("name", ""), + labels=labels, + summary=record.get("summary", ""), + attributes=record.get("attributes") or {}, + created_at=str(record.get("created_at", "")) or None, + ) + + +def _neo4j_record_to_edge(record: Dict) -> _EdgeResult: + def ts(v): + return str(v) if v else None + return _EdgeResult( + uuid_=record.get("uuid", ""), + name=record.get("name", ""), + fact=record.get("fact", ""), + source_node_uuid=record.get("source_node_uuid", ""), + target_node_uuid=record.get("target_node_uuid", ""), + attributes=record.get("attributes") or {}, + created_at=ts(record.get("created_at")), + valid_at=ts(record.get("valid_at")), + invalid_at=ts(record.get("invalid_at")), + expired_at=ts(record.get("expired_at")), + ) + + +# --------------------------------------------------------------------------- +# Neo4j direct query helpers +# --------------------------------------------------------------------------- + +async def _neo4j_query(graphiti: Graphiti, cypher: str, params: Dict) -> List[Dict]: + """Execute a read Cypher query and return list of record dicts.""" + records, _, _ = await graphiti.driver.execute_query(cypher, params) + return [dict(r) for r in records] + + +async def _neo4j_write(graphiti: Graphiti, cypher: str, params: Dict) -> None: + """Execute a write Cypher query.""" + await graphiti.driver.execute_query(cypher, params) + + +# Cypher queries +_NODES_BY_GROUP = """ +MATCH (n:Entity {group_id: $group_id}) +RETURN n.uuid AS uuid, n.name AS name, n.summary AS summary, + labels(n) AS labels, n.created_at AS created_at, + n.attributes AS attributes +ORDER BY n.created_at ASC +SKIP $skip LIMIT $limit +""" + +_EDGES_BY_GROUP = """ +MATCH (s:Entity {group_id: $group_id})-[r:RELATES_TO]->(t:Entity {group_id: $group_id}) +RETURN r.uuid AS uuid, r.name AS name, r.fact AS fact, + s.uuid AS source_node_uuid, + t.uuid AS target_node_uuid, + r.created_at AS created_at, r.valid_at AS valid_at, + r.invalid_at AS invalid_at, r.expired_at AS expired_at, + r.attributes AS attributes +ORDER BY r.created_at ASC +SKIP $skip LIMIT $limit +""" + +_NODE_BY_UUID = """ +MATCH (n:Entity {uuid: $uuid}) +RETURN n.uuid AS uuid, n.name AS name, n.summary AS summary, + labels(n) AS labels, n.created_at AS created_at, + n.group_id AS group_id, n.attributes AS attributes +LIMIT 1 +""" + +_EDGES_BY_NODE_UUID = """ +MATCH (s:Entity {uuid: $node_uuid})-[r:RELATES_TO]->(t:Entity) +RETURN r.uuid AS uuid, r.name AS name, r.fact AS fact, + s.uuid AS source_node_uuid, + t.uuid AS target_node_uuid, + r.created_at AS created_at, r.valid_at AS valid_at, + r.invalid_at AS invalid_at, r.expired_at AS expired_at +UNION +MATCH (s:Entity)-[r:RELATES_TO]->(t:Entity {uuid: $node_uuid}) +RETURN r.uuid AS uuid, r.name AS name, r.fact AS fact, + s.uuid AS source_node_uuid, + t.uuid AS target_node_uuid, + r.created_at AS created_at, r.valid_at AS valid_at, + r.invalid_at AS invalid_at, r.expired_at AS expired_at +""" + +_DELETE_GROUP = """ +MATCH (n:Entity {group_id: $group_id}) +DETACH DELETE n +""" + + +# --------------------------------------------------------------------------- +# Sub-namespaces +# --------------------------------------------------------------------------- + +class _EpisodeNamespace: + def get(self, uuid_: str) -> _EpisodeResult: + """Always returns processed=True — Graphiti is synchronous.""" + return _EpisodeResult(uuid_=uuid_, processed=True) + + +class _NodeNamespace: + def __init__(self, graphiti: Graphiti): + self._g = graphiti + + def get_by_graph_id( + self, + graph_id: str, + limit: int = 100, + uuid_cursor: Optional[str] = None, + ) -> List[_NodeResult]: + """Return nodes for a group. First call returns all; cursor call returns empty.""" + if uuid_cursor is not None: + # Already fetched all on first call — signal end of pagination + return [] + records = _run(_neo4j_query( + self._g, _NODES_BY_GROUP, + {"group_id": graph_id, "skip": 0, "limit": 10000} + )) + return [_neo4j_record_to_node(r) for r in records] + + def get(self, uuid_: str) -> _NodeResult: + records = _run(_neo4j_query(self._g, _NODE_BY_UUID, {"uuid": uuid_})) + if not records: + return _NodeResult(uuid_=uuid_, name="", labels=[], summary="", attributes={}) + return _neo4j_record_to_node(records[0]) + + def get_entity_edges(self, node_uuid: str) -> List[_EdgeResult]: + records = _run(_neo4j_query( + self._g, _EDGES_BY_NODE_UUID, {"node_uuid": node_uuid} + )) + return [_neo4j_record_to_edge(r) for r in records] + + +class _EdgeNamespace: + def __init__(self, graphiti: Graphiti): + self._g = graphiti + + def get_by_graph_id( + self, + graph_id: str, + limit: int = 100, + uuid_cursor: Optional[str] = None, + ) -> List[_EdgeResult]: + """Return edges for a group. First call returns all; cursor call returns empty.""" + if uuid_cursor is not None: + return [] + records = _run(_neo4j_query( + self._g, _EDGES_BY_GROUP, + {"group_id": graph_id, "skip": 0, "limit": 50000} + )) + return [_neo4j_record_to_edge(r) for r in records] + + +class _GraphNamespace: + def __init__(self, graphiti: Graphiti): + self._g = graphiti + self.node = _NodeNamespace(graphiti) + self.edge = _EdgeNamespace(graphiti) + self.episode = _EpisodeNamespace() + self._ontologies: Dict[str, Dict] = {} # graph_id -> ontology dict + + def create(self, graph_id: str, name: str, description: str = "") -> None: + """No-op — Graphiti uses group_id implicitly, no explicit creation needed.""" + logger.info(f"Graph '{graph_id}' registered (group_id in Graphiti)") + + def set_ontology( + self, + graph_ids: List[str], + entities: Any = None, + edges: Any = None, + ) -> None: + """Store ontology hints for use during episode ingestion. Graphiti extracts entities dynamically.""" + for gid in graph_ids: + self._ontologies[gid] = {"entities": entities, "edges": edges} + logger.info(f"Ontology hints stored for graphs: {graph_ids}") + + def add(self, graph_id: str, type: str = "text", data: str = "") -> _EpisodeResult: + """Add a single text episode to the graph.""" + result = _run(self._g.add_episode( + name=f"activity_{_uuid_mod.uuid4().hex[:8]}", + episode_body=data, + source_description="MiroFish simulation activity", + reference_time=datetime.now(timezone.utc), + source=EpisodeType.text, + group_id=graph_id, + update_communities=False, + )) + ep_uuid_out = result.episode.uuid if result and result.episode else str(_uuid_mod.uuid4()) + return _EpisodeResult(uuid_=ep_uuid_out) + + def add_batch(self, graph_id: str, episodes: List[Any]) -> List[_EpisodeResult]: + """Add a batch of episodes. Returns list of EpisodeResult with uuid_.""" + results = [] + for ep in episodes: + text = getattr(ep, 'data', '') or str(ep) + try: + result = _run(self._g.add_episode( + name=f"chunk_{_uuid_mod.uuid4().hex[:8]}", + episode_body=text, + source_description="MiroFish document chunk", + reference_time=datetime.now(timezone.utc), + source=EpisodeType.text, + group_id=graph_id, + update_communities=False, + )) + ep_uuid_out = result.episode.uuid if result and result.episode else str(_uuid_mod.uuid4()) + except Exception as e: + logger.warning(f"Episode add failed: {str(e)[:100]}, using placeholder uuid") + ep_uuid_out = str(_uuid_mod.uuid4()) + results.append(_EpisodeResult(uuid_=ep_uuid_out)) + return results + + def search( + self, + graph_id: str, + query: str, + limit: int = 10, + scope: str = "edges", + reranker: Optional[str] = None, + ) -> _SearchResults: + """Semantic search over the graph. scope='edges'|'nodes'|'both'.""" + try: + if scope == "nodes": + results = _run(self._g.search_( + query=query, + config=SearchConfig( + node_config=NODE_HYBRID_SEARCH_RRF.node_config, + limit=limit, + ), + group_ids=[graph_id], + )) + nodes = [_entity_node_to_result(n) for n in (results.nodes or [])] + return _SearchResults(nodes=nodes) + else: + edges = _run(self._g.search( + query=query, + group_ids=[graph_id], + num_results=limit, + )) + return _SearchResults(edges=[_entity_edge_to_result(e) for e in (edges or [])]) + except Exception as e: + logger.warning(f"Graph search failed: {str(e)[:150]}") + return _SearchResults() + + def delete(self, graph_id: str) -> None: + """Delete all nodes and edges for a group_id.""" + _run(_neo4j_write(self._g, _DELETE_GROUP, {"group_id": graph_id})) + logger.info(f"Graph '{graph_id}' deleted from Neo4j") + + +# --------------------------------------------------------------------------- +# Main adapter class — drop-in for Zep(api_key=...) +# --------------------------------------------------------------------------- + +class GraphitiAdapter: + """ + Drop-in replacement for `from zep_cloud.client import Zep`. + + Usage: + self.client = GraphitiAdapter() + self.client.graph.create(graph_id, name) + self.client.graph.search(graph_id, query, limit, scope) + self.client.graph.node.get(uuid_) + ... + """ + + def __init__(self, api_key: Optional[str] = None): + # api_key ignored — kept for signature compatibility + graphiti = _get_graphiti() + self.graph = _GraphNamespace(graphiti) diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 57836c53..63a3c619 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -16,7 +16,7 @@ from dataclasses import dataclass, field from datetime import datetime from openai import OpenAI -from zep_cloud.client import Zep +from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger @@ -197,16 +197,8 @@ class OasisProfileGenerator: base_url=self.base_url ) - # Zep客户端用于检索丰富上下文 - self.zep_api_key = zep_api_key or Config.ZEP_API_KEY - self.zep_client = None + self.zep_client = GraphitiAdapter() self.graph_id = graph_id - - if self.zep_api_key: - try: - self.zep_client = Zep(api_key=self.zep_api_key) - except Exception as e: - logger.warning(f"Zep客户端初始化失败: {e}") def generate_profile_from_entity( self, diff --git a/backend/app/services/zep_entity_reader.py b/backend/app/services/zep_entity_reader.py index 71661be4..e664959c 100644 --- a/backend/app/services/zep_entity_reader.py +++ b/backend/app/services/zep_entity_reader.py @@ -7,7 +7,7 @@ import time from typing import Dict, Any, List, Optional, Set, Callable, TypeVar from dataclasses import dataclass, field -from zep_cloud.client import Zep +from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger @@ -79,11 +79,7 @@ class ZepEntityReader: """ def __init__(self, api_key: Optional[str] = None): - self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") - - self.client = Zep(api_key=self.api_key) + self.client = GraphitiAdapter() def _call_with_retry( self, @@ -234,27 +230,51 @@ class ZepEntityReader: FilteredEntities: 过滤后的实体集合 """ logger.info(f"开始筛选图谱 {graph_id} 的实体...") - + + # Look up ontology from project to classify entities + ontology = None + try: + from ..models.project import ProjectManager + from .graph_builder import _classify_entity_type + for p in ProjectManager.list_projects(): + if p.graph_id == graph_id and p.ontology: + ontology = p.ontology + break + except Exception: + pass + # 获取所有节点 all_nodes = self.get_all_nodes(graph_id) total_count = len(all_nodes) - + + # Apply ontology-based classification so all nodes get proper type labels + if ontology: + for node in all_nodes: + labels = node.get("labels", []) + custom = [l for l in labels if l not in ("Entity", "Node")] + if not custom: + entity_type = _classify_entity_type( + node.get("name", ""), node.get("summary", ""), ontology + ) + if entity_type != "Entity": + node["labels"] = [entity_type] + labels + # 获取所有边(用于后续关联查找) all_edges = self.get_all_edges(graph_id) if enrich_with_edges else [] - + # 构建节点UUID到节点数据的映射 node_map = {n["uuid"]: n for n in all_nodes} - + # 筛选符合条件的实体 filtered_entities = [] entity_types_found = set() - + for node in all_nodes: labels = node.get("labels", []) - + # 筛选逻辑:Labels必须包含除"Entity"和"Node"之外的标签 custom_labels = [l for l in labels if l not in ["Entity", "Node"]] - + if not custom_labels: # 只有默认标签,跳过 continue diff --git a/backend/app/services/zep_graph_memory_updater.py b/backend/app/services/zep_graph_memory_updater.py index a8f3cecd..1e19ace0 100644 --- a/backend/app/services/zep_graph_memory_updater.py +++ b/backend/app/services/zep_graph_memory_updater.py @@ -12,7 +12,7 @@ from dataclasses import dataclass from datetime import datetime from queue import Queue, Empty -from zep_cloud.client import Zep +from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger @@ -237,12 +237,7 @@ class ZepGraphMemoryUpdater: api_key: Zep API Key(可选,默认从配置读取) """ self.graph_id = graph_id - self.api_key = api_key or Config.ZEP_API_KEY - - if not self.api_key: - raise ValueError("ZEP_API_KEY未配置") - - self.client = Zep(api_key=self.api_key) + self.client = GraphitiAdapter() # 活动队列 self._activity_queue: Queue = Queue() diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 384cf540..301709d2 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -13,7 +13,7 @@ import json from typing import Dict, Any, List, Optional from dataclasses import dataclass, field -from zep_cloud.client import Zep +from .graphiti_adapter import GraphitiAdapter from ..config import Config from ..utils.logger import get_logger @@ -422,11 +422,7 @@ class ZepToolsService: RETRY_DELAY = 2.0 def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None): - self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") - - self.client = Zep(api_key=self.api_key) + self.client = GraphitiAdapter() # LLM客户端用于InsightForge生成子问题 self._llm_client = llm_client logger.info("ZepToolsService 初始化完成") @@ -439,26 +435,38 @@ class ZepToolsService: return self._llm_client def _call_with_retry(self, func, operation_name: str, max_retries: int = None): - """带重试机制的API调用""" + """带重试机制的API调用(自动处理429限速)""" max_retries = max_retries or self.MAX_RETRIES last_exception = None delay = self.RETRY_DELAY - + for attempt in range(max_retries): try: return func() except Exception as e: last_exception = e if attempt < max_retries - 1: - logger.warning( - f"Zep {operation_name} 第 {attempt + 1} 次尝试失败: {str(e)[:100]}, " - f"{delay:.1f}秒后重试..." - ) - time.sleep(delay) + # 检测429限速错误,使用retry-after头部的等待时间 + wait = delay + if hasattr(e, 'status_code') and e.status_code == 429: + retry_after = None + if hasattr(e, 'headers') and e.headers: + retry_after = e.headers.get('retry-after') + wait = float(retry_after) + 1 if retry_after else 65.0 + logger.warning( + f"Zep {operation_name} 触发限速 (429), " + f"等待 {wait:.0f} 秒后重试 (第 {attempt + 1}/{max_retries - 1} 次)..." + ) + else: + logger.warning( + f"Zep {operation_name} 第 {attempt + 1} 次尝试失败: {str(e)[:100]}, " + f"{wait:.1f}秒后重试..." + ) + time.sleep(wait) delay *= 2 else: logger.error(f"Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str(e)}") - + raise last_exception def search_graph( diff --git a/backend/app/utils/zep_paging.py b/backend/app/utils/zep_paging.py index 943cd1ae..eb68d4eb 100644 --- a/backend/app/utils/zep_paging.py +++ b/backend/app/utils/zep_paging.py @@ -10,8 +10,7 @@ import time from collections.abc import Callable from typing import Any -from zep_cloud import InternalServerError -from zep_cloud.client import Zep +from typing import Any from .logger import get_logger @@ -31,7 +30,7 @@ def _fetch_page_with_retry( page_description: str = "page", **kwargs: Any, ) -> list[Any]: - """单页请求,失败时指数退避重试。仅重试网络/IO类瞬态错误。""" + """单页请求,失败时指数退避重试。自动处理429限速。""" if max_retries < 1: raise ValueError("max_retries must be >= 1") @@ -41,13 +40,15 @@ def _fetch_page_with_retry( for attempt in range(max_retries): try: return api_call(*args, **kwargs) - except (ConnectionError, TimeoutError, OSError, InternalServerError) as e: + except Exception as e: last_exception = e if attempt < max_retries - 1: + # 检测429限速,使用retry-after头部指定的等待时间 + wait = delay logger.warning( - f"Zep {page_description} attempt {attempt + 1} failed: {str(e)[:100]}, retrying in {delay:.1f}s..." + f"Zep {page_description} attempt {attempt + 1} failed: {str(e)[:100]}, retrying in {wait:.1f}s..." ) - time.sleep(delay) + time.sleep(wait) delay *= 2 else: logger.error(f"Zep {page_description} failed after {max_retries} attempts: {str(e)}") @@ -57,7 +58,7 @@ def _fetch_page_with_retry( def fetch_all_nodes( - client: Zep, + client: Any, graph_id: str, page_size: int = _DEFAULT_PAGE_SIZE, max_items: int = _MAX_NODES, @@ -103,7 +104,7 @@ def fetch_all_nodes( def fetch_all_edges( - client: Zep, + client: Any, graph_id: str, page_size: int = _DEFAULT_PAGE_SIZE, max_retries: int = _DEFAULT_MAX_RETRIES, diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 4f5361d5..50848022 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -12,26 +12,22 @@ dependencies = [ # 核心框架 "flask>=3.0.0", "flask-cors>=6.0.0", - # LLM 相关 "openai>=1.0.0", - # Zep Cloud - "zep-cloud==3.13.0", - # OASIS 社交媒体模拟 "camel-oasis==0.2.5", "camel-ai==0.2.78", - # 文件处理 "PyMuPDF>=1.24.0", # 编码检测(支持非UTF-8编码的文本文件) "charset-normalizer>=3.0.0", "chardet>=5.0.0", - # 工具库 "python-dotenv>=1.0.0", "pydantic>=2.0.0", + "graphiti-core>=0.3", + "google-genai>=1.68.0", ] [project.optional-dependencies] diff --git a/backend/uv.lock b/backend/uv.lock index f1ce4b60..eae68acd 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -475,6 +475,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, ] +[[package]] +name = "diskcache" +version = "5.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -592,6 +601,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, ] +[[package]] +name = "google-auth" +version = "2.49.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyasn1-modules" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/80/6a696a07d3d3b0a92488933532f03dbefa4a24ab80fb231395b9a2a1be77/google_auth-2.49.1.tar.gz", hash = "sha256:16d40da1c3c5a0533f57d268fe72e0ebb0ae1cc3b567024122651c045d879b64", size = 333825, upload-time = "2026-03-12T19:30:58.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/eb/c6c2478d8a8d633460be40e2a8a6f8f429171997a35a96f81d3b680dec83/google_auth-2.49.1-py3-none-any.whl", hash = "sha256:195ebe3dca18eddd1b3db5edc5189b76c13e96f29e73043b923ebcf3f1a860f7", size = 240737, upload-time = "2026-03-12T19:30:53.159Z" }, +] + +[package.optional-dependencies] +requests = [ + { name = "requests" }, +] + +[[package]] +name = "google-genai" +version = "1.68.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "google-auth", extra = ["requests"] }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "sniffio" }, + { name = "tenacity" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/2c/f059982dbcb658cc535c81bbcbe7e2c040d675f4b563b03cdb01018a4bc3/google_genai-1.68.0.tar.gz", hash = "sha256:ac30c0b8bc630f9372993a97e4a11dae0e36f2e10d7c55eacdca95a9fa14ca96", size = 511285, upload-time = "2026-03-18T01:03:18.243Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/de/7d3ee9c94b74c3578ea4f88d45e8de9405902f857932334d81e89bce3dfa/google_genai-1.68.0-py3-none-any.whl", hash = "sha256:a1bc9919c0e2ea2907d1e319b65471d3d6d58c54822039a249fe1323e4178d15", size = 750912, upload-time = "2026-03-18T01:03:15.983Z" }, +] + +[[package]] +name = "graphiti-core" +version = "0.11.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "diskcache" }, + { name = "neo4j" }, + { name = "numpy" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/30/94/3f84400e5f02ea8e9dc79784202de4173cbc16f4b3ad1bd4302da888e4d8/graphiti_core-0.11.6.tar.gz", hash = "sha256:31d26621834d7d4b8865059ab749feb18af15937b59c69598a640a5dfabea331", size = 71928, upload-time = "2025-05-15T17:58:02.304Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/2e/c8f22f01585bf173d1c82f6d4615511aebc75aeda764c69aa394446fa93c/graphiti_core-0.11.6-py3-none-any.whl", hash = "sha256:6ec4807a884f5ea88b942d0c8b7bcd2e107c7358ab4f98ef2a2092c229929707", size = 111001, upload-time = "2025-05-15T17:58:00.542Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1248,11 +1314,12 @@ dependencies = [ { name = "charset-normalizer" }, { name = "flask" }, { name = "flask-cors" }, + { name = "google-genai" }, + { name = "graphiti-core" }, { name = "openai" }, { name = "pydantic" }, { name = "pymupdf" }, { name = "python-dotenv" }, - { name = "zep-cloud" }, ] [package.optional-dependencies] @@ -1276,6 +1343,8 @@ requires-dist = [ { name = "charset-normalizer", specifier = ">=3.0.0" }, { name = "flask", specifier = ">=3.0.0" }, { name = "flask-cors", specifier = ">=6.0.0" }, + { name = "google-genai", specifier = ">=1.68.0" }, + { name = "graphiti-core", specifier = ">=0.3" }, { name = "openai", specifier = ">=1.0.0" }, { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" }, { name = "pydantic", specifier = ">=2.0.0" }, @@ -1283,7 +1352,6 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, - { name = "zep-cloud", specifier = "==3.13.0" }, ] provides-extras = ["dev"] @@ -1916,6 +1984,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, ] +[[package]] +name = "pyasn1" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + [[package]] name = "pycparser" version = "2.23" @@ -2987,6 +3076,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] +[[package]] +name = "tenacity" +version = "9.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, +] + [[package]] name = "texttable" version = "1.7.0" @@ -3488,19 +3586,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/d4/c8/cc640404a0981e6c1 wheels = [ { url = "https://files.pythonhosted.org/packages/8b/90/89a2ff242ccab6a24fbab18dbbabc67c51a6f0ed01f9a0f41689dc177419/yarg-0.1.9-py2.py3-none-any.whl", hash = "sha256:4f9cebdc00fac946c9bf2783d634e538a71c7d280a4d806d45fd4dc0ef441492", size = 19162, upload-time = "2014-08-11T22:01:41.104Z" }, ] - -[[package]] -name = "zep-cloud" -version = "3.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "httpx" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/32/c7/c835debf13302f8aaf8d0561ac6ff5a9bc15cc140cd692a1330fb1900c55/zep_cloud-3.13.0.tar.gz", hash = "sha256:c55d9c511773bb2177ae8e08546141404f87d2099affafabd7ec4b4505763e48", size = 63116, upload-time = "2025-11-20T15:25:40.745Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/e1/bbf03c6c8007c0cb238780e7fc6d8e1a52633893933a41aa09678618985a/zep_cloud-3.13.0-py3-none-any.whl", hash = "sha256:b2fbdeef73e262194c8f67b58f76471de6ee87e1a629541a09d8f7bbf475f12b", size = 110601, upload-time = "2025-11-20T15:25:38.484Z" }, -] From abb7e790bbf4b6033d0392c2554a78d03f0568ee Mon Sep 17 00:00:00 2001 From: Abhishek Yadav Date: Sun, 22 Mar 2026 01:33:12 +0530 Subject: [PATCH 2/3] docs: update README for Graphiti + Neo4j migration - Replace ZEP_API_KEY with NEO4J_URI/USER/PASSWORD in env var section - Add Neo4j as a prerequisite with install instructions for macOS/Linux/Windows - Add EMBEDDING_MODEL note for Gemini API users - Add migration note: no third-party account needed, free and no rate limits - Update both Chinese (README.md) and English (README-EN.md) docs Co-Authored-By: Claude Sonnet 4.6 --- README-EN.md | 30 +++++++++++++++++++++++++++--- README.md | 32 +++++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/README-EN.md b/README-EN.md index 4b003a63..8290ff13 100644 --- a/README-EN.md +++ b/README-EN.md @@ -102,6 +102,24 @@ Click the image to watch MiroFish's deep prediction of the lost ending based on | **Node.js** | 18+ | Frontend runtime, includes npm | `node -v` | | **Python** | ≥3.11, ≤3.12 | Backend runtime | `python --version` | | **uv** | Latest | Python package manager | `uv --version` | +| **Neo4j** | 5.x Community | Local knowledge graph database | `neo4j --version` | + +**Install Neo4j (choose one):** + +```bash +# macOS +brew install neo4j + +# Linux (Debian/Ubuntu) +# See official docs: https://neo4j.com/docs/operations-manual/current/installation/linux/ + +# Windows / All platforms +# Download Neo4j Desktop: https://neo4j.com/download/ + +# Set password before first start, then launch +neo4j-admin dbms set-initial-password your_neo4j_password +neo4j start +``` #### 1. Configure Environment Variables @@ -122,11 +140,17 @@ LLM_API_KEY=your_api_key LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus -# Zep Cloud Configuration -# Free monthly quota is sufficient for simple usage: https://app.getzep.com/ -ZEP_API_KEY=your_zep_api_key +# Knowledge Graph — local Neo4j + Graphiti (free, no rate limits) +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your_neo4j_password + +# Embedding model (uncomment if using Gemini API) +# EMBEDDING_MODEL=gemini-embedding-001 ``` +> **Note:** MiroFish has migrated from Zep Cloud to local **Graphiti + Neo4j**. No third-party account required — completely free with no rate limits. + #### 2. Install Dependencies ```bash diff --git a/README.md b/README.md index 4f5cffe7..2940ba16 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,24 @@ MiroFish 致力于打造映射现实的群体智能镜像,通过捕捉个体 | **Node.js** | 18+ | 前端运行环境,包含 npm | `node -v` | | **Python** | ≥3.11, ≤3.12 | 后端运行环境 | `python --version` | | **uv** | 最新版 | Python 包管理器 | `uv --version` | +| **Neo4j** | 5.x Community | 本地知识图谱数据库 | `neo4j --version` | + +**安装 Neo4j(选择适合你的方式):** + +```bash +# macOS +brew install neo4j + +# Linux (Debian/Ubuntu) +# 参考官方文档:https://neo4j.com/docs/operations-manual/current/installation/linux/ + +# Windows / 所有平台 +# 下载 Desktop 版本:https://neo4j.com/download/ + +# 首次启动前设置密码,然后启动服务 +neo4j-admin dbms set-initial-password your_neo4j_password +neo4j start +``` #### 1. 配置环境变量 @@ -122,11 +140,19 @@ LLM_API_KEY=your_api_key LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus -# Zep Cloud 配置 -# 每月免费额度即可支撑简单使用:https://app.getzep.com/ -ZEP_API_KEY=your_zep_api_key +# 知识图谱配置(本地 Neo4j + Graphiti,免费无限制) +# 安装 Neo4j Community Edition:https://neo4j.com/download/ +# macOS 用户:brew install neo4j && neo4j start +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your_neo4j_password + +# Embedding 模型(使用 Gemini API 时取消注释) +# EMBEDDING_MODEL=gemini-embedding-001 ``` +> **注意:** MiroFish 已从 Zep Cloud 迁移至本地 **Graphiti + Neo4j**,无需注册任何第三方服务,完全免费且无速率限制。 + #### 2. 安装依赖 ```bash From 2703d3dbfe6063b3135087e7037643e094a7cd3a Mon Sep 17 00:00:00 2001 From: Abhishek Yadav Date: Sun, 22 Mar 2026 01:54:51 +0530 Subject: [PATCH 3/3] fix: update error handling to check for NEO4J_PASSWORD instead of ZEP_API_KEY in simulation API - Changed error messages to reflect the new configuration requirement for Neo4j. - Ensured consistent handling of missing credentials across multiple functions. --- backend/app/api/simulation.py | 12 ++++++------ frontend/package-lock.json | 4 ---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 3a0f6816..77acc1a9 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -56,10 +56,10 @@ def get_graph_entities(graph_id: str): enrich: 是否获取相关边信息(默认true) """ try: - if not Config.ZEP_API_KEY: + if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "NEO4J未配置" }), 500 entity_types_str = request.args.get('entity_types', '') @@ -93,10 +93,10 @@ def get_graph_entities(graph_id: str): def get_entity_detail(graph_id: str, entity_uuid: str): """获取单个实体的详细信息""" try: - if not Config.ZEP_API_KEY: + if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "NEO4J未配置" }), 500 reader = ZepEntityReader() @@ -126,10 +126,10 @@ def get_entity_detail(graph_id: str, entity_uuid: str): def get_entities_by_type(graph_id: str, entity_type: str): """获取指定类型的所有实体""" try: - if not Config.ZEP_API_KEY: + if not Config.NEO4J_PASSWORD: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "NEO4J未配置" }), 500 enrich = request.args.get('enrich', 'true').lower() == 'true' diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 8c4fa710..fee02cad 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1331,7 +1331,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -1809,7 +1808,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -1943,7 +1941,6 @@ "integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -2018,7 +2015,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz", "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.25", "@vue/compiler-sfc": "3.5.25",