chore: resolve .env.example merge conflict; keep feat/pluggable-graph-backend version

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-25 21:11:06 +00:00
commit 6e4e5c40dd
35 changed files with 1478 additions and 454 deletions

View File

@ -1,29 +1,55 @@
# LLM API configuration (supports any OpenAI-compatible API) # ===== LLM Configuration =====
# Recommended: Alibaba Qwen via Bailian platform: https://bailian.console.aliyun.com/ # Any OpenAI-compatible API is supported
# Note: high token consumption — start with simulations under 40 rounds
LLM_API_KEY=your_api_key_here LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus LLM_MODEL_NAME=qwen-plus
# ===== Graph backend selection ===== # --- Gemini (Google AI Studio) ---
# Options: "zep" (default, cloud-managed) or "graphiti" (self-hosted Neo4j) # Set LLM_PROVIDER=gemini to auto-configure the Google AI Studio endpoint.
# Get a free API key at: https://aistudio.google.com/
# LLM_PROVIDER=gemini
# LLM_API_KEY=AIza...
# LLM_MODEL_NAME=gemini-3.1-flash-lite-preview
# ===== Graph Backend =====
# GRAPH_BACKEND=zep (default) — Zep Cloud managed memory graph
# GRAPH_BACKEND=graphiti — Self-hosted Neo4j + Graphiti (requires graphiti extras)
GRAPH_BACKEND=zep GRAPH_BACKEND=zep
# ===== Zep Cloud configuration (required when GRAPH_BACKEND=zep) ===== # --- Zep Cloud (default backend) ---
# Free monthly quota is sufficient for basic use: https://app.getzep.com/ # Free tier available: https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here ZEP_API_KEY=your_zep_api_key_here
# ===== Graphiti + Neo4j configuration (required when GRAPH_BACKEND=graphiti) ===== # --- Graphiti + Neo4j (alternative backend) ---
# Install extras: pip install "mirofish-backend[graphiti]"
# NEO4J_URI=bolt://localhost:7687 # NEO4J_URI=bolt://localhost:7687
# NEO4J_USER=neo4j # NEO4J_USER=neo4j
# NEO4J_PASSWORD=your_neo4j_password_here # NEO4J_PASSWORD=your_neo4j_password_here
# NEO4J_DATABASE=neo4j # GRAPHITI_BATCH_SIZE=10 # chunks per bulk call; higher = faster but more LLM parallelism
# ===== Boost LLM configuration (optional) ===== # --- Embedding LLM (used by Graphiti for vector indexing) ---
# If not using a boost LLM, remove or leave these variables empty # Falls back to LLM_API_KEY / LLM_BASE_URL if not set.
LLM_BOOST_API_KEY= # Use a dedicated embedding deployment when your LLM_BASE_URL points to a generative model.
LLM_BOOST_BASE_URL= # LLM_EMBED_API_KEY=your_embed_api_key_here
LLM_BOOST_MODEL_NAME= # LLM_EMBED_BASE_URL=https://<resource>.cognitiveservices.azure.com/openai/deployments/<embed-deployment>/embeddings?api-version=2024-05-01-preview
# LLM_EMBED_MODEL_NAME=text-embedding-3-small
# --- Small/fast LLM (used by Graphiti for reranking and lightweight tasks) ---
# Falls back to LLM_API_KEY / LLM_BASE_URL / LLM_MODEL_NAME if not set.
# Use a cheaper model (e.g. gpt-4o-mini, gpt-5-mini) to reduce costs.
# LLM_SMALL_API_KEY=your_small_api_key_here
# LLM_SMALL_BASE_URL=https://<resource>.cognitiveservices.azure.com/openai/deployments/<small-model>/chat/completions?api-version=2024-05-01-preview
# LLM_SMALL_MODEL_NAME=gpt-4o-mini
# ===== Ontology limits =====
# ONTOLOGY_MAX_ENTITY_TYPES=12 # total entity types (specific + 2 fallback); Zep Cloud max is 10 specific + 2 = 12
# ONTOLOGY_MAX_EDGE_TYPES=10 # max relationship types; Zep Cloud max is 10
# ===== Boost LLM (optional) =====
# Secondary faster LLM — omit these lines entirely if not used
LLM_BOOST_API_KEY=your_api_key_here
LLM_BOOST_BASE_URL=your_base_url_here
LLM_BOOST_MODEL_NAME=your_model_name_here
# ===== Authentication ===== # ===== Authentication =====
# Password for the "demo" user — REQUIRED in production # Password for the "demo" user — REQUIRED in production

80
CLAUDE.md Normal file
View File

@ -0,0 +1,80 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
MiroFish is an AI-powered multi-agent simulation platform. It extracts entities from user-provided documents (PDF/MD/TXT), builds a knowledge graph (via Zep Cloud), generates agent personas, runs social interaction simulations (via OASIS/CAMEL-AI), and produces analytical reports.
## Commands
### Setup
```bash
cp .env.example .env # Configure API keys before first run
npm run setup:all # Install Node + Python dependencies (root, frontend, backend)
npm run setup # Node deps only
npm run setup:backend # Python deps only (uv venv)
```
### Development
```bash
npm run dev # Start both frontend (port 3000) & backend (port 5001) concurrently
npm run frontend # Frontend only
npm run backend # Backend only (uv run python run.py)
```
### Build
```bash
npm run build # Vite production build of frontend
```
### Testing
```bash
pytest # Run Python tests (pytest + pytest-asyncio available in venv)
```
Python 3.113.12 required (strict constraint). Node 18+ required.
## Architecture
### Overview
Full-stack monorepo: **Vue 3 SPA** (frontend, port 3000) + **Flask API** (backend, port 5001). Vite proxies all `/api/*` requests to the backend.
### 5-Step Workflow Pipeline
1. **Graph Build** — Upload seed documents → entity/relationship extraction via LLM → Zep Cloud knowledge graph
2. **Environment Setup** — Agent persona generation (OASIS profiles) from the graph
3. **Simulation** — OASIS multi-agent simulation (Twitter + Reddit platforms) run as a subprocess
4. **Report** — ReportAgent (LLM with tool calling) analyzes simulation output
5. **Interaction** — Live chat with simulated agents
### Key Backend Patterns
- **`models/project.py`** — `ProjectManager` singleton. Projects persist as `uploads/projects/{uuid}/project.json` + files. The server is the source of truth; frontend only holds a `projectId`.
- **`models/task.py`** — `TaskManager` singleton. In-memory async task tracking (PENDING → PROCESSING → COMPLETED|FAILED). Frontend polls `GET /api/.../task/{taskId}`.
- **`services/simulation_runner.py`** — Spawns OASIS as a subprocess. Communicates via IPC files at `/tmp/mirofish_sim_{id}_*.json`. Atexit cleanup registered.
- **`services/report_agent.py`** — Multi-turn LLM agent with tool use (Zep queries). Max 5 tool calls, 2 reflection rounds.
- **`utils/locale.py`** — Thread-local locale storage. Reads `Accept-Language` header from requests; falls back to thread-local for background workers (default: `zh`).
### Key Frontend Patterns
- **`api/index.js`** — Axios instance with retry (`requestWithRetry`, 3 attempts, exponential backoff) and response interceptor. Auto-injects `Accept-Language` header from current locale.
- **`store/pendingUpload.js`** — Lightweight reactive state (no Vuex/Pinia) for deferred file uploads between views.
- Views are self-contained; no shared state beyond `projectId` in the URL route.
### i18n
Translation files at `/locales/{en,zh}.json` are shared by both frontend and backend. The frontend uses `vue-i18n` v11 with `localStorage` persistence. The backend reads the `Accept-Language` header. `/locales/languages.json` also contains per-language LLM prompt instructions (to force LLM output language).
### Configuration (`backend/app/config.py`)
Required environment variables (from `.env`):
- `LLM_API_KEY`, `LLM_BASE_URL`, `LLM_MODEL_NAME` — any OpenAI-compatible API (default: Qwen-plus via Alibaba Bailian)
- `ZEP_API_KEY` — Zep Cloud memory graph
- Optional: `LLM_BOOST_API_KEY`, `LLM_BOOST_BASE_URL`, `LLM_BOOST_MODEL_NAME` — faster secondary LLM
## Git Remotes
- `origin` — this fork: `https://github.com/jaumemir/MiroFish`
- `upstream` — original project: `https://github.com/666ghj/MiroFish`
To cherry-pick from upstream branches or PRs:
```bash
git fetch upstream
git cherry-pick <commit-sha>
# or: git merge upstream/<branch-name>
```

View File

@ -28,7 +28,8 @@ WORKDIR /app
# Copiar i instal·lar dependències Python (aprofita caché si pyproject.toml no canvia) # Copiar i instal·lar dependències Python (aprofita caché si pyproject.toml no canvia)
COPY backend/pyproject.toml backend/uv.lock ./backend/ COPY backend/pyproject.toml backend/uv.lock ./backend/
RUN cd backend && uv sync --frozen --no-dev # Install all optional extras so the image supports any GRAPH_BACKEND at runtime
RUN cd backend && uv sync --frozen --no-dev --extra graphiti
# Copiar el codi font del backend i els fitxers compartits # Copiar el codi font del backend i els fitxers compartits
COPY backend/ ./backend/ COPY backend/ ./backend/

View File

@ -29,8 +29,18 @@ REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
# ── Validar variables obligatòries ─────────────────────────────────────────── # ── Validar variables obligatòries ───────────────────────────────────────────
REQUIRED_VARS=( REQUIRED_VARS=(
AZURE_SUBSCRIPTION_ID RESOURCE_GROUP PROJECT_NAME AZURE_SUBSCRIPTION_ID RESOURCE_GROUP PROJECT_NAME
DEMO_PASSWORD SECRET_KEY LLM_API_KEY LLM_BASE_URL LLM_MODEL_NAME ZEP_API_KEY DEMO_PASSWORD SECRET_KEY LLM_API_KEY LLM_BASE_URL LLM_MODEL_NAME
) )
# Validate graph backend config
GRAPH_BACKEND="${GRAPH_BACKEND:-zep}"
if [[ "$GRAPH_BACKEND" == "zep" && -z "${ZEP_API_KEY:-}" ]]; then
echo "ERROR: ZEP_API_KEY is required when GRAPH_BACKEND=zep"
exit 1
fi
if [[ "$GRAPH_BACKEND" == "graphiti" && -z "${NEO4J_PASSWORD:-}" ]]; then
echo "ERROR: NEO4J_PASSWORD is required when GRAPH_BACKEND=graphiti"
exit 1
fi
for var in "${REQUIRED_VARS[@]}"; do for var in "${REQUIRED_VARS[@]}"; do
if [[ -z "${!var:-}" ]]; then if [[ -z "${!var:-}" ]]; then
echo "ERROR: La variable $var no està configurada a config.sh" echo "ERROR: La variable $var no està configurada a config.sh"
@ -124,7 +134,12 @@ DEPLOY_OUTPUT=$(az deployment group create \
demoPassword="$DEMO_PASSWORD" \ demoPassword="$DEMO_PASSWORD" \
llmApiKey="$LLM_API_KEY" \ llmApiKey="$LLM_API_KEY" \
llmBoostApiKey="${LLM_BOOST_API_KEY:-}" \ llmBoostApiKey="${LLM_BOOST_API_KEY:-}" \
zepApiKey="$ZEP_API_KEY" \ llmProvider="${LLM_PROVIDER:-}" \
zepApiKey="${ZEP_API_KEY:-}" \
neo4jPassword="${NEO4J_PASSWORD:-}" \
neo4jUri="${NEO4J_URI:-bolt://localhost:7687}" \
neo4jUser="${NEO4J_USER:-neo4j}" \
graphBackend="${GRAPH_BACKEND:-zep}" \
secretKey="$SECRET_KEY" \ secretKey="$SECRET_KEY" \
llmBaseUrl="$LLM_BASE_URL" \ llmBaseUrl="$LLM_BASE_URL" \
llmModelName="$LLM_MODEL_NAME" \ llmModelName="$LLM_MODEL_NAME" \

View File

@ -34,14 +34,42 @@ LLM_API_KEY="<la-teva-llm-api-key>"
LLM_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1" LLM_BASE_URL="https://dashscope.aliyuncs.com/compatible-mode/v1"
LLM_MODEL_NAME="qwen-plus" LLM_MODEL_NAME="qwen-plus"
# Proveïdor LLM especial (opcional):
# gemini → configura automàticament Google AI Studio (no cal LLM_BASE_URL)
# (buit) → qualsevol API compatible OpenAI
LLM_PROVIDER=""
# ── LLM accelerador (opcional — deixar buit per desactivar) ─────────────────── # ── LLM accelerador (opcional — deixar buit per desactivar) ───────────────────
LLM_BOOST_API_KEY="" LLM_BOOST_API_KEY=""
LLM_BOOST_BASE_URL="" LLM_BOOST_BASE_URL=""
LLM_BOOST_MODEL_NAME="" LLM_BOOST_MODEL_NAME=""
# ── Zep Cloud (graf de memòria) ─────────────────────────────────────────────── # ── Backend de graf ───────────────────────────────────────────────────────────
# Opcions: zep (Zep Cloud, per defecte) | graphiti (Neo4j local/Azure)
GRAPH_BACKEND="zep"
# --- Zep Cloud (si GRAPH_BACKEND=zep) ---
ZEP_API_KEY="<la-teva-zep-api-key>" ZEP_API_KEY="<la-teva-zep-api-key>"
# --- Graphiti + Neo4j (si GRAPH_BACKEND=graphiti) ---
# URI bolt del servidor Neo4j (pot ser una VM Azure, ACI, etc.)
# NEO4J_URI="bolt://<ip-o-hostname>:7687"
# NEO4J_USER="neo4j"
# NEO4J_PASSWORD="<contrasenya-neo4j>"
# GRAPHITI_BATCH_SIZE="10" # chunks per crida bulk; més alt = més ràpid però més paral·lelisme LLM
# --- Embedding LLM (usat per Graphiti per a indexació vectorial) ---
# Si no s'estableix, fa fallback a LLM_API_KEY / LLM_BASE_URL.
# LLM_EMBED_API_KEY="<la-teva-embed-api-key>"
# LLM_EMBED_BASE_URL="https://<recurs>.cognitiveservices.azure.com/openai/deployments/<embed-deployment>/embeddings?api-version=2024-05-01-preview"
# LLM_EMBED_MODEL_NAME="text-embedding-3-small"
# --- Small/fast LLM (usat per Graphiti per a reranking i tasques lleugeres) ---
# Si no s'estableix, fa fallback a LLM_API_KEY / LLM_BASE_URL / LLM_MODEL_NAME.
# LLM_SMALL_API_KEY="<la-teva-small-api-key>"
# LLM_SMALL_BASE_URL="https://<recurs>.cognitiveservices.azure.com/openai/deployments/<small-model>/chat/completions?api-version=2024-05-01-preview"
# LLM_SMALL_MODEL_NAME="gpt-4o-mini"
# ── Simulació OASIS (valors per defecte recomanats) ─────────────────────────── # ── Simulació OASIS (valors per defecte recomanats) ───────────────────────────
OASIS_DEFAULT_MAX_ROUNDS="10" OASIS_DEFAULT_MAX_ROUNDS="10"

View File

@ -49,9 +49,13 @@ param llmApiKey string
@secure() @secure()
param llmBoostApiKey string = '' param llmBoostApiKey string = ''
@description('Clau de l\'API Zep Cloud') @description('Clau de l\'API Zep Cloud (obligatori si GRAPH_BACKEND=zep)')
@secure() @secure()
param zepApiKey string param zepApiKey string = ''
@description('Contrasenya de Neo4j (obligatori si GRAPH_BACKEND=graphiti)')
@secure()
param neo4jPassword string = ''
@description('SECRET_KEY de Flask per a JWT (python -c "import secrets; print(secrets.token_hex(32))")') @description('SECRET_KEY de Flask per a JWT (python -c "import secrets; print(secrets.token_hex(32))")')
@secure() @secure()
@ -65,6 +69,20 @@ param llmBaseUrl string = 'https://dashscope.aliyuncs.com/compatible-mode/v1'
@description('Nom del model LLM principal') @description('Nom del model LLM principal')
param llmModelName string = 'qwen-plus' param llmModelName string = 'qwen-plus'
@description('Proveïdor LLM (gemini per a Google AI Studio; buit per a qualsevol compatible OpenAI)')
param llmProvider string = ''
// ─── Paràmetres del backend de graf ──────────────────────────────────────────
@description('Backend de graf: zep (Zep Cloud) o graphiti (Neo4j local/Azure)')
param graphBackend string = 'zep'
@description('URI de connexió bolt de Neo4j (necessari si GRAPH_BACKEND=graphiti)')
param neo4jUri string = 'bolt://localhost:7687'
@description('Usuari de Neo4j')
param neo4jUser string = 'neo4j'
// ─── Paràmetres LLM accelerador (opcionals) ────────────────────────────────── // ─── Paràmetres LLM accelerador (opcionals) ──────────────────────────────────
@description('URL base de l\'API LLM acceleradora (opcional)') @description('URL base de l\'API LLM acceleradora (opcional)')
@ -104,6 +122,7 @@ resource containerApp 'Microsoft.App/containerApps@2023-05-01' = {
{ name: 'llm-api-key', value: llmApiKey } { name: 'llm-api-key', value: llmApiKey }
{ name: 'llm-boost-api-key', value: llmBoostApiKey } { name: 'llm-boost-api-key', value: llmBoostApiKey }
{ name: 'zep-api-key', value: zepApiKey } { name: 'zep-api-key', value: zepApiKey }
{ name: 'neo4j-password', value: neo4jPassword }
{ name: 'secret-key', value: secretKey } { name: 'secret-key', value: secretKey }
] ]
@ -143,14 +162,21 @@ resource containerApp 'Microsoft.App/containerApps@2023-05-01' = {
{ name: 'LLM_API_KEY', secretRef: 'llm-api-key' } { name: 'LLM_API_KEY', secretRef: 'llm-api-key' }
{ name: 'LLM_BOOST_API_KEY', secretRef: 'llm-boost-api-key' } { name: 'LLM_BOOST_API_KEY', secretRef: 'llm-boost-api-key' }
{ name: 'ZEP_API_KEY', secretRef: 'zep-api-key' } { name: 'ZEP_API_KEY', secretRef: 'zep-api-key' }
{ name: 'NEO4J_PASSWORD', secretRef: 'neo4j-password' }
{ name: 'SECRET_KEY', secretRef: 'secret-key' } { name: 'SECRET_KEY', secretRef: 'secret-key' }
// ── Variables no sensibles ── // ── Variables no sensibles ──
{ name: 'LLM_BASE_URL', value: llmBaseUrl } { name: 'LLM_BASE_URL', value: llmBaseUrl }
{ name: 'LLM_MODEL_NAME', value: llmModelName } { name: 'LLM_MODEL_NAME', value: llmModelName }
{ name: 'LLM_PROVIDER', value: llmProvider }
{ name: 'LLM_BOOST_BASE_URL', value: llmBoostBaseUrl } { name: 'LLM_BOOST_BASE_URL', value: llmBoostBaseUrl }
{ name: 'LLM_BOOST_MODEL_NAME', value: llmBoostModelName } { name: 'LLM_BOOST_MODEL_NAME', value: llmBoostModelName }
// ── Backend de graf ──
{ name: 'GRAPH_BACKEND', value: graphBackend }
{ name: 'NEO4J_URI', value: neo4jUri }
{ name: 'NEO4J_USER', value: neo4jUser }
// ── Simulació OASIS ── // ── Simulació OASIS ──
{ name: 'OASIS_DEFAULT_MAX_ROUNDS', value: oasisDefaultMaxRounds } { name: 'OASIS_DEFAULT_MAX_ROUNDS', value: oasisDefaultMaxRounds }

0
backend/__init__.py Normal file
View File

View File

@ -284,9 +284,7 @@ def build_graph():
logger.info("=== Starting graph build ===") logger.info("=== Starting graph build ===")
# Check configuration # Check configuration
errors = [] errors = Config.get_graph_config_errors()
if not Config.ZEP_API_KEY:
errors.append(t('api.zepApiKeyMissing'))
if errors: if errors:
logger.error(f"Configuration error: {errors}") logger.error(f"Configuration error: {errors}")
return jsonify({ return jsonify({
@ -387,7 +385,7 @@ def build_graph():
) )
# Create graph builder service # Create graph builder service
builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) builder = GraphBuilderService()
# Split into chunks # Split into chunks
task_manager.update_task( task_manager.update_task(
@ -437,10 +435,11 @@ def build_graph():
progress=15 progress=15
) )
batch_size = Config.GRAPHITI_BATCH_SIZE if Config.GRAPH_BACKEND == 'graphiti' else 3
episode_uuids = builder.add_text_batches( episode_uuids = builder.add_text_batches(
graph_id, graph_id,
chunks, chunks,
batch_size=3, batch_size=batch_size,
progress_callback=add_progress_callback progress_callback=add_progress_callback
) )
@ -572,13 +571,7 @@ def get_graph_data(graph_id: str):
Get graph data (nodes and edges) Get graph data (nodes and edges)
""" """
try: try:
if not Config.ZEP_API_KEY: builder = GraphBuilderService()
return jsonify({
"success": False,
"error": t('api.zepApiKeyMissing')
}), 500
builder = GraphBuilderService(api_key=Config.ZEP_API_KEY)
graph_data = builder.get_graph_data(graph_id) graph_data = builder.get_graph_data(graph_id)
return jsonify({ return jsonify({
@ -600,13 +593,7 @@ def delete_graph(graph_id: str):
Delete a Zep graph Delete a Zep graph
""" """
try: try:
if not Config.ZEP_API_KEY: builder = GraphBuilderService()
return jsonify({
"success": False,
"error": t('api.zepApiKeyMissing')
}), 500
builder = GraphBuilderService(api_key=Config.ZEP_API_KEY)
builder.delete_graph(graph_id) builder.delete_graph(graph_id)
return jsonify({ return jsonify({

View File

@ -57,12 +57,6 @@ def get_graph_entities(graph_id: str):
enrich: whether to fetch related edge info (default true) enrich: whether to fetch related edge info (default true)
""" """
try: try:
if not Config.ZEP_API_KEY:
return jsonify({
"success": False,
"error": t('api.zepApiKeyMissing')
}), 500
entity_types_str = request.args.get('entity_types', '') entity_types_str = request.args.get('entity_types', '')
entity_types = [t.strip() for t in entity_types_str.split(',') if t.strip()] if entity_types_str else None entity_types = [t.strip() for t in entity_types_str.split(',') if t.strip()] if entity_types_str else None
enrich = request.args.get('enrich', 'true').lower() == 'true' enrich = request.args.get('enrich', 'true').lower() == 'true'
@ -94,12 +88,6 @@ def get_graph_entities(graph_id: str):
def get_entity_detail(graph_id: str, entity_uuid: str): def get_entity_detail(graph_id: str, entity_uuid: str):
"""Get detailed information about a single entity""" """Get detailed information about a single entity"""
try: try:
if not Config.ZEP_API_KEY:
return jsonify({
"success": False,
"error": t('api.zepApiKeyMissing')
}), 500
reader = ZepEntityReader() reader = ZepEntityReader()
entity = reader.get_entity_with_context(graph_id, entity_uuid) entity = reader.get_entity_with_context(graph_id, entity_uuid)
@ -127,12 +115,6 @@ def get_entity_detail(graph_id: str, entity_uuid: str):
def get_entities_by_type(graph_id: str, entity_type: str): def get_entities_by_type(graph_id: str, entity_type: str):
"""Get all entities of a specified type""" """Get all entities of a specified type"""
try: try:
if not Config.ZEP_API_KEY:
return jsonify({
"success": False,
"error": t('api.zepApiKeyMissing')
}), 500
enrich = request.args.get('enrich', 'true').lower() == 'true' enrich = request.args.get('enrich', 'true').lower() == 'true'
reader = ZepEntityReader() reader = ZepEntityReader()

View File

@ -32,10 +32,34 @@ class Config:
LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_API_KEY = os.environ.get('LLM_API_KEY')
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
# Embedding LLM (used by Graphiti for vector indexing)
# Falls back to LLM_* values if not set
LLM_EMBED_API_KEY = os.environ.get('LLM_EMBED_API_KEY') or os.environ.get('LLM_API_KEY')
LLM_EMBED_BASE_URL = os.environ.get('LLM_EMBED_BASE_URL') or os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
LLM_EMBED_MODEL_NAME = os.environ.get('LLM_EMBED_MODEL_NAME', 'text-embedding-3-small')
# Small/fast LLM (used by Graphiti for lightweight tasks like reranking)
# Falls back to LLM_* values if not set
LLM_SMALL_API_KEY = os.environ.get('LLM_SMALL_API_KEY') or os.environ.get('LLM_API_KEY')
LLM_SMALL_BASE_URL = os.environ.get('LLM_SMALL_BASE_URL') or os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
LLM_SMALL_MODEL_NAME = os.environ.get('LLM_SMALL_MODEL_NAME') or os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
# Zep settings # Graph backend: "zep" (default, cloud) o "graphiti" (Neo4j local)
GRAPH_BACKEND = os.environ.get('GRAPH_BACKEND', 'zep')
# Zep Cloud
ZEP_API_KEY = os.environ.get('ZEP_API_KEY') ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
# Graphiti + Neo4j
NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687')
NEO4J_USER = os.environ.get('NEO4J_USER', 'neo4j')
NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD')
GRAPHITI_BATCH_SIZE = int(os.environ.get('GRAPHITI_BATCH_SIZE', '10'))
# LLM provider ("" = OpenAI-compatible per defecte, "gemini" = Google AI Studio)
LLM_PROVIDER = os.environ.get('LLM_PROVIDER', '')
# File upload settings # File upload settings
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads') UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
@ -45,6 +69,10 @@ class Config:
DEFAULT_CHUNK_SIZE = 500 # default chunk size DEFAULT_CHUNK_SIZE = 500 # default chunk size
DEFAULT_CHUNK_OVERLAP = 50 # default overlap size DEFAULT_CHUNK_OVERLAP = 50 # default overlap size
# Ontology generation limits
ONTOLOGY_MAX_ENTITY_TYPES = int(os.environ.get('ONTOLOGY_MAX_ENTITY_TYPES', '12'))
ONTOLOGY_MAX_EDGE_TYPES = int(os.environ.get('ONTOLOGY_MAX_EDGE_TYPES', '10'))
# OASIS simulation settings # OASIS simulation settings
OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10')) OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10'))
OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations') OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations')
@ -64,13 +92,25 @@ class Config:
REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')) REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2'))
REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5')) REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5'))
@classmethod
def get_graph_config_errors(cls) -> list:
errors = []
if cls.GRAPH_BACKEND == 'zep':
if not cls.ZEP_API_KEY:
errors.append("ZEP_API_KEY is not configured (required when GRAPH_BACKEND=zep)")
elif cls.GRAPH_BACKEND == 'graphiti':
if not cls.NEO4J_PASSWORD:
errors.append("NEO4J_PASSWORD is not configured (required when GRAPH_BACKEND=graphiti)")
else:
errors.append(f"Unknown GRAPH_BACKEND value: '{cls.GRAPH_BACKEND}'. Use 'zep' or 'graphiti'.")
return errors
@classmethod @classmethod
def validate(cls): def validate(cls):
"""Validate required configuration""" """Validate required configuration"""
errors = [] errors = []
if not cls.LLM_API_KEY: if not cls.LLM_API_KEY:
errors.append("LLM_API_KEY is not configured") errors.append("LLM_API_KEY is not configured")
if not cls.ZEP_API_KEY: errors.extend(cls.get_graph_config_errors())
errors.append("ZEP_API_KEY is not configured")
return errors return errors

View File

@ -0,0 +1,3 @@
from .factory import get_graph_backend
__all__ = ["get_graph_backend"]

38
backend/app/graph/base.py Normal file
View File

@ -0,0 +1,38 @@
"""Abstract graph backend interface."""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
class GraphBackend(ABC):
@abstractmethod
def create_graph(self, graph_id: str, name: str, description: str = "") -> None: ...
@abstractmethod
def set_ontology(self, graph_ids: List[str], entities: Dict[str, Any], edges: Dict[str, Any]) -> None: ...
@abstractmethod
def add_batch(self, graph_id: str, episodes: List[Any]) -> List[str]: ...
@abstractmethod
def get_episode(self, uuid_: str) -> Any: ...
@abstractmethod
def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]: ...
@abstractmethod
def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: ...
@abstractmethod
def get_node(self, uuid_: str) -> Dict[str, Any]: ...
@abstractmethod
def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: ...
@abstractmethod
def search(self, graph_id: str, query: str, limit: int = 10, scope: str = "edges") -> Dict[str, Any]: ...
@abstractmethod
def add_text(self, graph_id: str, data: str) -> None: ...
@abstractmethod
def delete_graph(self, graph_id: str) -> None: ...

View File

@ -0,0 +1,39 @@
"""Graph backend factory — returns singleton based on GRAPH_BACKEND env var."""
from typing import Optional
from .base import GraphBackend
from ..utils.logger import get_logger
logger = get_logger('mirofish.graph.factory')
_backend_instance: Optional[GraphBackend] = None
def get_graph_backend() -> GraphBackend:
"""Return the configured graph backend singleton."""
global _backend_instance
if _backend_instance is not None:
return _backend_instance
from ..config import Config
backend_type = Config.GRAPH_BACKEND
logger.info(f"Initializing graph backend: {backend_type}")
if backend_type == "zep":
from .zep_backend import ZepBackend
_backend_instance = ZepBackend()
elif backend_type == "graphiti":
from .graphiti_backend import GraphitiBackend
_backend_instance = GraphitiBackend()
else:
raise ValueError(
f"Unknown GRAPH_BACKEND='{backend_type}'. Valid values: 'zep', 'graphiti'."
)
return _backend_instance
def reset_graph_backend() -> None:
"""Reset singleton (useful for testing)."""
global _backend_instance
_backend_instance = None

View File

@ -0,0 +1,470 @@
"""Graphiti + Neo4j implementation of GraphBackend."""
import asyncio
import json
import threading
import typing
import uuid as uuid_mod
from typing import Any, Dict, List, Optional
from .base import GraphBackend
from ..config import Config
from ..utils.logger import get_logger
from ..utils.llm_client import parse_azure_url
def _neo4j_val(v: Any) -> Any:
"""Convert Neo4j native types to JSON-serializable Python types."""
if v is None:
return None
t = type(v).__name__
if t in ('DateTime', 'Date', 'Time', 'LocalDateTime', 'LocalTime', 'Duration'):
return str(v)
if isinstance(v, (list, tuple)):
return [_neo4j_val(i) for i in v]
if isinstance(v, dict):
return {k: _neo4j_val(vv) for k, vv in v.items()}
return v
def _flatten_attributes(attrs: dict) -> dict:
"""Flatten entity attribute dicts so every value is a Neo4j-safe primitive.
Graphiti extracts entity attributes via a Pydantic model, but the raw LLM
response sometimes wraps each value in a nested dict (e.g. {"value": "CTTI"}).
Neo4j only accepts primitive types or arrays thereof, so we coerce any
dict value to its string representation. Lists of primitives are kept as-is
because Neo4j supports array properties.
"""
result = {}
for k, v in attrs.items():
if v is None:
continue
if isinstance(v, dict):
# Unwrap {"value": "..."} pattern emitted by some LLMs; fall back to str()
result[k] = v.get("value") or v.get("text") or str(v)
else:
result[k] = v
return result
def _neo4j_props(node_or_rel: Any) -> Dict[str, Any]:
"""Return a JSON-safe dict of a Neo4j node or relationship's properties."""
return {k: _neo4j_val(v) for k, v in dict(node_or_rel).items()}
logger = get_logger('mirofish.graph.graphiti')
def _make_azure_generic_client(config, client):
"""Return an OpenAIGenericClient subclass that uses max_completion_tokens
instead of max_tokens required by gpt-5 / o-series models on Azure."""
from graphiti_core.llm_client.openai_generic_client import OpenAIGenericClient
import openai as _openai
from graphiti_core.llm_client.errors import RateLimitError as _RateLimitError
from pydantic import BaseModel as _BaseModel
class _AzureGenericClient(OpenAIGenericClient):
async def _generate_response(self, messages, response_model=None, max_tokens=None, model_size=None):
from openai.types.chat import ChatCompletionMessageParam
if max_tokens is None:
max_tokens = self.max_tokens
openai_messages: list[ChatCompletionMessageParam] = []
for m in messages:
if m.role == 'user':
openai_messages.append({'role': 'user', 'content': m.content})
elif m.role == 'system':
openai_messages.append({'role': 'system', 'content': m.content})
response_format: dict[str, Any] = {'type': 'json_object'}
if response_model is not None:
schema_name = getattr(response_model, '__name__', 'structured_response')
response_format = {
'type': 'json_schema',
'json_schema': {
'name': schema_name,
'schema': response_model.model_json_schema(),
},
}
try:
response = await self.client.chat.completions.create(
model=self.model,
messages=openai_messages,
temperature=self.temperature,
max_completion_tokens=max_tokens,
response_format=response_format,
)
return json.loads(response.choices[0].message.content or '{}')
except _openai.RateLimitError as e:
raise _RateLimitError from e
return _AzureGenericClient(config=config, client=client)
def _run_async(coro, timeout=300):
"""Run an async coroutine from a sync context using a dedicated thread loop."""
loop = _get_event_loop()
future = asyncio.run_coroutine_threadsafe(coro, loop)
return future.result(timeout=timeout)
_loop: Optional[asyncio.AbstractEventLoop] = None
_loop_thread: Optional[threading.Thread] = None
_loop_lock = threading.Lock()
def _get_event_loop() -> asyncio.AbstractEventLoop:
global _loop, _loop_thread
with _loop_lock:
if _loop is None or not _loop.is_running():
_loop = asyncio.new_event_loop()
_loop_thread = threading.Thread(target=_loop.run_forever, daemon=True)
_loop_thread.start()
return _loop
class GraphitiBackend(GraphBackend):
def __init__(
self,
uri: Optional[str] = None,
user: Optional[str] = None,
password: Optional[str] = None,
):
self._uri = uri or Config.NEO4J_URI
self._user = user or Config.NEO4J_USER
self._password = password or Config.NEO4J_PASSWORD
if not self._password:
raise ValueError("NEO4J_PASSWORD is not configured")
self._entity_types: Dict[str, Any] = {}
self._edge_types: Dict[str, Any] = {}
self._entity_defs: Dict[str, Any] = {}
self._edge_defs: Dict[str, Any] = {}
self._client = self._build_client()
def _build_client(self):
from graphiti_core import Graphiti
from graphiti_core.llm_client.openai_generic_client import OpenAIGenericClient
from graphiti_core.llm_client.config import LLMConfig
from graphiti_core.embedder.openai import OpenAIEmbedder, OpenAIEmbedderConfig
from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient
from openai import AsyncOpenAI
llm_base_url, llm_query = parse_azure_url(Config.LLM_BASE_URL)
small_base_url, small_query = parse_azure_url(Config.LLM_SMALL_BASE_URL)
embed_base_url, embed_query = parse_azure_url(Config.LLM_EMBED_BASE_URL)
# Pre-built async clients so api-version is passed as default_query (Azure requirement)
async_llm_client = AsyncOpenAI(
api_key=Config.LLM_API_KEY,
base_url=llm_base_url,
default_query=llm_query or None,
)
async_small_client = AsyncOpenAI(
api_key=Config.LLM_SMALL_API_KEY,
base_url=small_base_url,
default_query=small_query or None,
)
async_embed_client = AsyncOpenAI(
api_key=Config.LLM_EMBED_API_KEY,
base_url=embed_base_url,
default_query=embed_query or None,
)
llm_config = LLMConfig(
api_key=Config.LLM_API_KEY,
model=Config.LLM_MODEL_NAME,
small_model=Config.LLM_SMALL_MODEL_NAME,
base_url=llm_base_url,
)
llm_client = _make_azure_generic_client(config=llm_config, client=async_llm_client)
embedder = OpenAIEmbedder(
config=OpenAIEmbedderConfig(
api_key=Config.LLM_EMBED_API_KEY,
base_url=embed_base_url,
embedding_model=Config.LLM_EMBED_MODEL_NAME,
),
client=async_embed_client,
)
cross_encoder = OpenAIRerankerClient(config=llm_config, client=async_small_client)
client = Graphiti(
uri=self._uri,
user=self._user,
password=self._password,
llm_client=llm_client,
embedder=embedder,
cross_encoder=cross_encoder,
)
self._patch_extract_entity_attributes()
return client
@staticmethod
def _patch_extract_entity_attributes() -> None:
"""Monkey-patch graphiti's _extract_entity_attributes to sanitize LLM output.
Some LLMs return attribute values as nested dicts ({"value": "CTTI"}) instead
of plain strings. Neo4j rejects these with TypeError. We intercept the raw
llm_response dict before it is stored in node.attributes and flatten it.
"""
import graphiti_core.utils.maintenance.node_operations as _node_ops
original = _node_ops._extract_entity_attributes
async def _patched(llm_client, node, episode, previous_episodes, entity_type):
result = await original(llm_client, node, episode, previous_episodes, entity_type)
# result is a dict — flatten any dict-valued attributes
return _flatten_attributes(result) if result else result
_node_ops._extract_entity_attributes = _patched
def create_graph(self, graph_id: str, name: str, description: str = "") -> None:
logger.info(f"Graphiti graph namespace ready: {graph_id}")
def set_ontology(self, graph_ids: List[str], entities: Dict[str, Any], edges: Dict[str, Any]) -> None:
from pydantic import BaseModel as _BaseModel, Field as _Field
def _make_model(name: str, type_def: Any) -> Any:
if isinstance(type_def, dict):
doc = type_def.get("description", "")
attrs_defs = type_def.get("attributes", [])
else:
doc = getattr(type_def, "__doc__", "") or ""
attrs_defs = []
annotations: Dict[str, Any] = {}
fields: Dict[str, Any] = {"__doc__": doc, "__annotations__": annotations}
for attr in attrs_defs:
attr_name = attr.get("name", "")
attr_desc = attr.get("description", attr_name)
if not attr_name:
continue
annotations[attr_name] = Optional[str]
fields[attr_name] = _Field(default=None, description=attr_desc)
return type(name, (_BaseModel,), fields)
self._entity_types: Dict[str, Any] = {
name: _make_model(name, td) for name, td in (entities or {}).items()
}
self._edge_types: Dict[str, Any] = {
name: _make_model(name, td) for name, td in (edges or {}).items()
}
# Keep a separate plain dict for use in extraction instructions
self._entity_defs: Dict[str, Any] = dict(entities or {})
self._edge_defs: Dict[str, Any] = dict(edges or {})
if self._entity_types:
logger.info(f"Graphiti entity types: {list(self._entity_types.keys())}")
if self._edge_types:
logger.info(f"Graphiti edge types: {list(self._edge_types.keys())}")
def _build_extraction_instructions(self) -> Optional[str]:
"""Return custom instructions that constrain extraction to ontology types and attributes."""
entity_defs = self._entity_defs or {}
edge_defs = self._edge_defs or {}
if not entity_defs and not edge_defs:
return None
parts = []
if entity_defs:
entity_lines = []
for name, td in entity_defs.items():
desc = td.get("description", "") if isinstance(td, dict) else ""
attrs = td.get("attributes", []) if isinstance(td, dict) else []
if attrs:
attr_str = ", ".join(
f"{a['name']} ({a.get('description', a['name'])})"
for a in attrs if a.get("name")
)
entity_lines.append(f" - {name}: {desc} [attributes: {attr_str}]")
else:
entity_lines.append(f" - {name}: {desc}")
parts.append(
"Only classify entities using these types (use 'Entity' only if none fits):\n"
+ "\n".join(entity_lines)
+ "\nFor each entity, extract values for the listed attributes when present in the text."
)
if edge_defs:
edge_names = list(edge_defs.keys())
parts.append(
f"Only use these relationship types: {', '.join(edge_names)}. "
"Do not invent new relationship type names."
)
return "\n\n".join(parts)
def add_batch(self, graph_id: str, episodes: List[Any]) -> List[str]:
from graphiti_core.nodes import EpisodeType
from datetime import datetime, timezone
import time as _time
entity_types = self._entity_types or None
edge_types = self._edge_types or None
instructions = self._build_extraction_instructions()
ids = []
for ep in episodes:
data = ep["data"] if isinstance(ep, dict) else ep.data
ep_id = str(uuid_mod.uuid4())
ids.append(ep_id)
last_exc = None
for attempt in range(3):
try:
_run_async(
self._client.add_episode(
name=ep_id,
episode_body=data,
source_description="MiroFish document chunk",
reference_time=datetime.now(timezone.utc),
source=EpisodeType.text,
group_id=graph_id,
entity_types=entity_types,
edge_types=edge_types,
custom_extraction_instructions=instructions,
),
timeout=300,
)
last_exc = None
break
except Exception as exc:
last_exc = exc
# "node not found" race condition — wait and retry
if "not found" in str(exc).lower() and attempt < 2:
logger.warning(f"Episode {ep_id} attempt {attempt + 1} failed ({exc}), retrying...")
_time.sleep(2 * (attempt + 1))
else:
raise
if last_exc:
raise last_exc
return ids
def get_episode(self, uuid_: str) -> Any:
class _FakeEpisode:
processed = True
return _FakeEpisode()
def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]:
results = _run_async(
self._client.driver.execute_query(
"MATCH (n:Entity {group_id: $gid}) RETURN n",
params={"gid": graph_id},
)
)
nodes = []
for record in results.records:
n = record["n"]
nodes.append({
"uuid": n.get("uuid", n.element_id),
"name": n.get("name", ""),
"labels": list(n.labels),
"summary": n.get("summary", ""),
"attributes": _neo4j_props(n),
"created_at": str(n.get("created_at", "")),
})
return nodes
def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]:
results = _run_async(
self._client.driver.execute_query(
"MATCH (s)-[r]->(t) WHERE r.group_id = $gid RETURN s, r, t",
params={"gid": graph_id},
)
)
edges = []
for record in results.records:
r = record["r"]
edges.append({
"uuid": r.get("uuid", r.element_id),
"name": r.get("name", type(r).__name__),
"fact": r.get("fact", ""),
"source_node_uuid": record["s"].get("uuid", ""),
"target_node_uuid": record["t"].get("uuid", ""),
"fact_type": r.get("fact_type", ""),
"attributes": _neo4j_props(r),
"created_at": str(r.get("created_at", "")),
"valid_at": str(r.get("valid_at", "")),
"invalid_at": str(r.get("invalid_at", "")),
"expired_at": str(r.get("expired_at", "")),
"episodes": [],
})
return edges
def get_node(self, uuid_: str) -> Dict[str, Any]:
results = _run_async(
self._client.driver.execute_query(
"MATCH (n {uuid: $uuid}) RETURN n LIMIT 1",
params={"uuid": uuid_},
)
)
if not results.records:
return {}
n = results.records[0]["n"]
return {
"uuid": n.get("uuid", ""),
"name": n.get("name", ""),
"labels": list(n.labels),
"summary": n.get("summary", ""),
"attributes": _neo4j_props(n),
}
def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]:
results = _run_async(
self._client.driver.execute_query(
"MATCH (n {uuid: $uuid})-[r]->(t) RETURN r, t "
"UNION MATCH (s)-[r]->(n {uuid: $uuid}) RETURN r, s as t",
params={"uuid": node_uuid},
)
)
edges = []
for record in results.records:
r = record["r"]
edges.append({
"uuid": r.get("uuid", r.element_id),
"name": r.get("name", ""),
"fact": r.get("fact", ""),
"source_node_uuid": r.get("source_node_uuid", node_uuid),
"target_node_uuid": r.get("target_node_uuid", ""),
})
return edges
def search(self, graph_id: str, query: str, limit: int = 10, scope: str = "edges") -> Dict[str, Any]:
results = _run_async(
self._client.search(query=query, group_ids=[graph_id], num_results=limit)
)
edges = [
{
"uuid": getattr(r, "uuid", ""),
"name": getattr(r, "name", ""),
"fact": getattr(r, "fact", ""),
"source_node_uuid": getattr(r, "source_node_uuid", ""),
"target_node_uuid": getattr(r, "target_node_uuid", ""),
}
for r in (results or [])
]
return {"edges": edges, "nodes": []}
def add_text(self, graph_id: str, data: str) -> None:
from graphiti_core.nodes import EpisodeType
from datetime import datetime, timezone
ep_id = str(uuid_mod.uuid4())
_run_async(
self._client.add_episode(
name=ep_id,
episode_body=data,
source_description="MiroFish document chunk",
reference_time=datetime.now(timezone.utc),
source=EpisodeType.text,
group_id=graph_id,
entity_types=self._entity_types or None,
edge_types=self._edge_types or None,
custom_extraction_instructions=self._build_extraction_instructions(),
)
)
def delete_graph(self, graph_id: str) -> None:
_run_async(
self._client.driver.execute_query(
"MATCH (n {group_id: $gid}) DETACH DELETE n",
params={"gid": graph_id},
)
)

View File

@ -0,0 +1,151 @@
"""Zep Cloud implementation of GraphBackend."""
import time
from typing import Any, Dict, List, Optional
from zep_cloud.client import Zep
from zep_cloud import InternalServerError
from .base import GraphBackend
from ..config import Config
from ..utils.logger import get_logger
logger = get_logger('mirofish.graph.zep')
_PAGE_SIZE = 100
_MAX_ITEMS = 2000
_MAX_RETRIES = 3
_RETRY_DELAY = 2.0
def _fetch_page_with_retry(api_call, *args, max_retries=_MAX_RETRIES, retry_delay=_RETRY_DELAY, **kwargs):
for attempt in range(max_retries):
try:
return api_call(*args, **kwargs) or []
except (ConnectionError, TimeoutError, OSError, InternalServerError):
if attempt == max_retries - 1:
raise
time.sleep(retry_delay * (2 ** attempt))
return []
def _fetch_all(list_fn, graph_id: str, cursor_key: str = "uuid_cursor") -> List[Any]:
results, cursor = [], None
while True:
kwargs = {"limit": _PAGE_SIZE}
if cursor:
kwargs[cursor_key] = cursor
batch = _fetch_page_with_retry(list_fn, graph_id, **kwargs)
results.extend(batch)
if not batch or len(batch) < _PAGE_SIZE or len(results) >= _MAX_ITEMS:
break
cursor = batch[-1].uuid_
return results
class ZepBackend(GraphBackend):
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or Config.ZEP_API_KEY
if not self.api_key:
raise ValueError("ZEP_API_KEY is not configured")
self._client = Zep(api_key=self.api_key)
def create_graph(self, graph_id: str, name: str, description: str = "") -> None:
self._client.graph.create(graph_id=graph_id, name=name, description=description)
def set_ontology(self, graph_ids: List[str], entities: Dict[str, Any], edges: Dict[str, Any]) -> None:
self._client.graph.set_ontology(graph_ids=graph_ids, entities=entities, edges=edges)
def add_batch(self, graph_id: str, episodes: List[Any]) -> List[str]:
from zep_cloud import EpisodeData
ep_objects = [
EpisodeData(data=ep["data"], type=ep.get("type", "text"))
if isinstance(ep, dict) else ep
for ep in episodes
]
result = self._client.graph.add_batch(graph_id=graph_id, episodes=ep_objects)
return [ep.uuid_ for ep in (result or [])]
def get_episode(self, uuid_: str) -> Any:
return self._client.graph.episode.get(uuid_=uuid_)
def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]:
nodes = _fetch_all(self._client.graph.node.get_by_graph_id, graph_id)
return [
{
"uuid": getattr(n, "uuid_", None) or getattr(n, "uuid", None),
"name": getattr(n, "name", ""),
"labels": list(getattr(n, "labels", []) or []),
"summary": getattr(n, "summary", ""),
"attributes": dict(getattr(n, "attributes", {}) or {}),
"created_at": str(getattr(n, "created_at", "")),
}
for n in nodes
]
def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]:
edges = _fetch_all(self._client.graph.edge.get_by_graph_id, graph_id)
return [
{
"uuid": getattr(e, "uuid_", None) or getattr(e, "uuid", None),
"name": getattr(e, "name", ""),
"fact": getattr(e, "fact", ""),
"source_node_uuid": getattr(e, "source_node_uuid", None),
"target_node_uuid": getattr(e, "target_node_uuid", None),
"fact_type": getattr(e, "fact_type", None),
"attributes": dict(getattr(e, "attributes", {}) or {}),
"created_at": str(getattr(e, "created_at", "")),
"valid_at": str(getattr(e, "valid_at", "")),
"invalid_at": str(getattr(e, "invalid_at", "")),
"expired_at": str(getattr(e, "expired_at", "")),
"episodes": list(getattr(e, "episodes", []) or []),
}
for e in edges
]
def get_node(self, uuid_: str) -> Dict[str, Any]:
n = self._client.graph.node.get(uuid_=uuid_)
return {
"uuid": getattr(n, "uuid_", None) or getattr(n, "uuid", None),
"name": getattr(n, "name", ""),
"labels": list(getattr(n, "labels", []) or []),
"summary": getattr(n, "summary", ""),
"attributes": dict(getattr(n, "attributes", {}) or {}),
}
def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]:
for attempt in range(_MAX_RETRIES):
try:
edges = self._client.graph.node.get_entity_edges(node_uuid=node_uuid) or []
return [
{
"uuid": getattr(e, "uuid_", None) or getattr(e, "uuid", None),
"name": getattr(e, "name", ""),
"fact": getattr(e, "fact", ""),
"source_node_uuid": getattr(e, "source_node_uuid", None),
"target_node_uuid": getattr(e, "target_node_uuid", None),
}
for e in edges
]
except (ConnectionError, TimeoutError, OSError, InternalServerError):
if attempt == _MAX_RETRIES - 1:
raise
time.sleep(_RETRY_DELAY * (2 ** attempt))
return []
def search(self, graph_id: str, query: str, limit: int = 10, scope: str = "edges") -> Dict[str, Any]:
result = self._client.graph.search(
graph_id=graph_id,
query=query,
limit=limit,
scope=scope,
reranker="cross_encoder",
)
edges = getattr(result, "edges", []) or []
nodes = getattr(result, "nodes", []) or []
return {"edges": edges, "nodes": nodes}
def add_text(self, graph_id: str, data: str) -> None:
self._client.graph.add(graph_id=graph_id, type="text", data=data)
def delete_graph(self, graph_id: str) -> None:
self._client.graph.delete(graph_id=graph_id)

View File

@ -10,12 +10,9 @@ import threading
from typing import Dict, Any, List, Optional, Callable from typing import Dict, Any, List, Optional, Callable
from dataclasses import dataclass from dataclasses import dataclass
from zep_cloud.client import Zep
from zep_cloud import EpisodeData, EntityEdgeSourceTarget
from ..config import Config from ..config import Config
from ..graph import get_graph_backend
from ..models.task import TaskManager, TaskStatus from ..models.task import TaskManager, TaskStatus
from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges
from .text_processor import TextProcessor from .text_processor import TextProcessor
from ..utils.locale import t, get_locale, set_locale from ..utils.locale import t, get_locale, set_locale
@ -43,12 +40,8 @@ class GraphBuilderService:
Responsible for calling the Zep API to build the knowledge graph. Responsible for calling the Zep API to build the knowledge graph.
""" """
def __init__(self, api_key: Optional[str] = None): def __init__(self):
self.api_key = api_key or Config.ZEP_API_KEY self._graph = get_graph_backend()
if not self.api_key:
raise ValueError("ZEP_API_KEY is not configured")
self.client = Zep(api_key=self.api_key)
self.task_manager = TaskManager() self.task_manager = TaskManager()
def build_graph_async( def build_graph_async(
@ -191,19 +184,36 @@ class GraphBuilderService:
self.task_manager.fail_task(task_id, error_msg) self.task_manager.fail_task(task_id, error_msg)
def create_graph(self, name: str) -> str: def create_graph(self, name: str) -> str:
"""Create a Zep graph (public method)""" """Create a graph (public method)"""
graph_id = f"mirofish_{uuid.uuid4().hex[:16]}" graph_id = f"mirofish_{uuid.uuid4().hex[:16]}"
self._graph.create_graph(
self.client.graph.create(
graph_id=graph_id, graph_id=graph_id,
name=name, name=name,
description="MiroFish Social Simulation Graph" description="MiroFish Social Simulation Graph"
) )
return graph_id return graph_id
def set_ontology(self, graph_id: str, ontology: Dict[str, Any]): def set_ontology(self, graph_id: str, ontology: Dict[str, Any]):
"""Set graph ontology (public method)""" """Set graph ontology (public method)"""
from ..config import Config
if Config.GRAPH_BACKEND != "zep":
entities = {
e["name"]: {
"description": e.get("description", ""),
"attributes": e.get("attributes", []),
}
for e in ontology.get("entity_types", [])
}
edges = {
e["name"]: {
"description": e.get("description", ""),
"attributes": e.get("attributes", []),
}
for e in ontology.get("edge_types", [])
}
self._graph.set_ontology(graph_ids=[graph_id], entities=entities, edges=edges)
return
import warnings import warnings
from typing import Optional from typing import Optional
from pydantic import Field from pydantic import Field
@ -217,60 +227,51 @@ class GraphBuilderService:
RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'}
def safe_attr_name(attr_name: str) -> str: def safe_attr_name(attr_name: str) -> str:
"""Convert reserved names to safe attribute names"""
if attr_name.lower() in RESERVED_NAMES: if attr_name.lower() in RESERVED_NAMES:
return f"entity_{attr_name}" return f"entity_{attr_name}"
return attr_name return attr_name
# Dynamically create entity types # Dynamically create entity types
entity_types = {} entity_types = {}
for entity_def in ontology.get("entity_types", []): for entity_def in ontology.get("entity_types", []):
name = entity_def["name"] name = entity_def["name"]
description = entity_def.get("description", f"A {name} entity.") description = entity_def.get("description", f"A {name} entity.")
# Build attribute dict and type annotations (required by Pydantic v2)
attrs = {"__doc__": description} attrs = {"__doc__": description}
annotations = {} annotations = {}
for attr_def in entity_def.get("attributes", []): for attr_def in entity_def.get("attributes", []):
attr_name = safe_attr_name(attr_def["name"]) # Use safe name attr_name = safe_attr_name(attr_def["name"])
attr_desc = attr_def.get("description", attr_name) attr_desc = attr_def.get("description", attr_name)
# Zep API requires Field description — this is mandatory
attrs[attr_name] = Field(description=attr_desc, default=None) attrs[attr_name] = Field(description=attr_desc, default=None)
annotations[attr_name] = Optional[EntityText] # Type annotation annotations[attr_name] = Optional[EntityText]
attrs["__annotations__"] = annotations attrs["__annotations__"] = annotations
# Dynamically create class
entity_class = type(name, (EntityModel,), attrs) entity_class = type(name, (EntityModel,), attrs)
entity_class.__doc__ = description entity_class.__doc__ = description
entity_types[name] = entity_class entity_types[name] = entity_class
# Dynamically create edge types # Dynamically create edge types
edge_definitions = {} edge_definitions = {}
for edge_def in ontology.get("edge_types", []): for edge_def in ontology.get("edge_types", []):
name = edge_def["name"] name = edge_def["name"]
description = edge_def.get("description", f"A {name} relationship.") description = edge_def.get("description", f"A {name} relationship.")
# Build attribute dict and type annotations
attrs = {"__doc__": description} attrs = {"__doc__": description}
annotations = {} annotations = {}
for attr_def in edge_def.get("attributes", []): for attr_def in edge_def.get("attributes", []):
attr_name = safe_attr_name(attr_def["name"]) # Use safe name attr_name = safe_attr_name(attr_def["name"])
attr_desc = attr_def.get("description", attr_name) attr_desc = attr_def.get("description", attr_name)
# Zep API requires Field description — this is mandatory
attrs[attr_name] = Field(description=attr_desc, default=None) attrs[attr_name] = Field(description=attr_desc, default=None)
annotations[attr_name] = Optional[str] # Edge attributes use str type annotations[attr_name] = Optional[str]
attrs["__annotations__"] = annotations attrs["__annotations__"] = annotations
# Dynamically create class
class_name = ''.join(word.capitalize() for word in name.split('_')) class_name = ''.join(word.capitalize() for word in name.split('_'))
edge_class = type(class_name, (EdgeModel,), attrs) edge_class = type(class_name, (EdgeModel,), attrs)
edge_class.__doc__ = description edge_class.__doc__ = description
# Build source_targets from zep_cloud import EntityEdgeSourceTarget
source_targets = [] source_targets = []
for st in edge_def.get("source_targets", []): for st in edge_def.get("source_targets", []):
source_targets.append( source_targets.append(
@ -279,13 +280,12 @@ class GraphBuilderService:
target=st.get("target", "Entity") target=st.get("target", "Entity")
) )
) )
if source_targets: if source_targets:
edge_definitions[name] = (edge_class, source_targets) edge_definitions[name] = (edge_class, source_targets)
# Call Zep API to set ontology
if entity_types or edge_definitions: if entity_types or edge_definitions:
self.client.graph.set_ontology( self._graph.set_ontology(
graph_ids=[graph_id], graph_ids=[graph_id],
entities=entity_types if entity_types else None, entities=entity_types if entity_types else None,
edges=edge_definitions if edge_definitions else None, edges=edge_definitions if edge_definitions else None,
@ -314,25 +314,14 @@ class GraphBuilderService:
progress progress
) )
# Build episode data
episodes = [ episodes = [
EpisodeData(data=chunk, type="text") {"data": chunk, "type": "text"}
for chunk in batch_chunks for chunk in batch_chunks
] ]
# Send to Zep
try: try:
batch_result = self.client.graph.add_batch( returned_uuids = self._graph.add_batch(graph_id=graph_id, episodes=episodes)
graph_id=graph_id, episode_uuids.extend(returned_uuids)
episodes=episodes
)
# Collect returned episode UUIDs
if batch_result and isinstance(batch_result, list):
for ep in batch_result:
ep_uuid = getattr(ep, 'uuid_', None) or getattr(ep, 'uuid', None)
if ep_uuid:
episode_uuids.append(ep_uuid)
# Avoid sending requests too quickly # Avoid sending requests too quickly
time.sleep(1) time.sleep(1)
@ -376,7 +365,7 @@ class GraphBuilderService:
# Check processing status of each episode # Check processing status of each episode
for ep_uuid in list(pending_episodes): for ep_uuid in list(pending_episodes):
try: try:
episode = self.client.graph.episode.get(uuid_=ep_uuid) episode = self._graph.get_episode(ep_uuid)
is_processed = getattr(episode, 'processed', False) is_processed = getattr(episode, 'processed', False)
if is_processed: if is_processed:
@ -402,19 +391,14 @@ class GraphBuilderService:
def _get_graph_info(self, graph_id: str) -> GraphInfo: def _get_graph_info(self, graph_id: str) -> GraphInfo:
"""Retrieve graph info""" """Retrieve graph info"""
# Fetch nodes (paginated) nodes = self._graph.get_all_nodes(graph_id)
nodes = fetch_all_nodes(self.client, graph_id) edges = self._graph.get_all_edges(graph_id)
# Fetch edges (paginated)
edges = fetch_all_edges(self.client, graph_id)
# Count entity types
entity_types = set() entity_types = set()
for node in nodes: for node in nodes:
if node.labels: for label in node.get("labels", []):
for label in node.labels: if label not in ["Entity", "Node"]:
if label not in ["Entity", "Node"]: entity_types.add(label)
entity_types.add(label)
return GraphInfo( return GraphInfo(
graph_id=graph_id, graph_id=graph_id,
@ -424,83 +408,25 @@ class GraphBuilderService:
) )
def get_graph_data(self, graph_id: str) -> Dict[str, Any]: def get_graph_data(self, graph_id: str) -> Dict[str, Any]:
""" """Retrieve full graph data (nodes + edges with timestamps and attributes)."""
Retrieve full graph data (with detailed information). nodes = self._graph.get_all_nodes(graph_id)
edges = self._graph.get_all_edges(graph_id)
Args: node_map = {n["uuid"]: n.get("name", "") for n in nodes}
graph_id: graph ID
Returns:
Dictionary containing nodes and edges with timestamps, attributes, and other details
"""
nodes = fetch_all_nodes(self.client, graph_id)
edges = fetch_all_edges(self.client, graph_id)
# Build node map for looking up node names
node_map = {}
for node in nodes:
node_map[node.uuid_] = node.name or ""
nodes_data = []
for node in nodes:
# Get creation timestamp
created_at = getattr(node, 'created_at', None)
if created_at:
created_at = str(created_at)
nodes_data.append({
"uuid": node.uuid_,
"name": node.name,
"labels": node.labels or [],
"summary": node.summary or "",
"attributes": node.attributes or {},
"created_at": created_at,
})
edges_data = []
for edge in edges:
# Get timestamps
created_at = getattr(edge, 'created_at', None)
valid_at = getattr(edge, 'valid_at', None)
invalid_at = getattr(edge, 'invalid_at', None)
expired_at = getattr(edge, 'expired_at', None)
# Get episodes
episodes = getattr(edge, 'episodes', None) or getattr(edge, 'episode_ids', None)
if episodes and not isinstance(episodes, list):
episodes = [str(episodes)]
elif episodes:
episodes = [str(e) for e in episodes]
# Get fact_type
fact_type = getattr(edge, 'fact_type', None) or edge.name or ""
edges_data.append({
"uuid": edge.uuid_,
"name": edge.name or "",
"fact": edge.fact or "",
"fact_type": fact_type,
"source_node_uuid": edge.source_node_uuid,
"target_node_uuid": edge.target_node_uuid,
"source_node_name": node_map.get(edge.source_node_uuid, ""),
"target_node_name": node_map.get(edge.target_node_uuid, ""),
"attributes": edge.attributes or {},
"created_at": str(created_at) if created_at else None,
"valid_at": str(valid_at) if valid_at else None,
"invalid_at": str(invalid_at) if invalid_at else None,
"expired_at": str(expired_at) if expired_at else None,
"episodes": episodes or [],
})
return { return {
"graph_id": graph_id, "graph_id": graph_id,
"nodes": nodes_data, "nodes": nodes,
"edges": edges_data, "edges": [
"node_count": len(nodes_data), {**e, "source_node_name": node_map.get(e.get("source_node_uuid", ""), ""),
"edge_count": len(edges_data), "target_node_name": node_map.get(e.get("target_node_uuid", ""), "")}
for e in edges
],
"node_count": len(nodes),
"edge_count": len(edges),
} }
def delete_graph(self, graph_id: str): def delete_graph(self, graph_id: str):
"""Delete graph""" """Delete graph"""
self.client.graph.delete(graph_id=graph_id) self._graph.delete_graph(graph_id)

View File

@ -22,6 +22,7 @@ from zep_cloud.client import Zep
from ..config import Config from ..config import Config
from ..utils.logger import get_logger from ..utils.logger import get_logger
from ..utils.locale import get_language_instruction, get_locale, set_locale, t from ..utils.locale import get_language_instruction, get_locale, set_locale, t
from ..utils.llm_client import parse_azure_url
from .zep_entity_reader import EntityNode, ZepEntityReader from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.oasis_profile') logger = get_logger('mirofish.oasis_profile')
@ -190,15 +191,17 @@ class OasisProfileGenerator:
graph_id: Optional[str] = None graph_id: Optional[str] = None
): ):
self.api_key = api_key or Config.LLM_API_KEY self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL raw_url = base_url or Config.LLM_BASE_URL
self.model_name = model_name or Config.LLM_MODEL_NAME self.model_name = model_name or Config.LLM_MODEL_NAME
if not self.api_key: if not self.api_key:
raise ValueError("LLM_API_KEY is not configured") raise ValueError("LLM_API_KEY is not configured")
self.base_url, _default_query = parse_azure_url(raw_url)
self.client = OpenAI( self.client = OpenAI(
api_key=self.api_key, api_key=self.api_key,
base_url=self.base_url base_url=self.base_url,
default_query=_default_query if _default_query else None
) )
# Zep client for enriching context via retrieval # Zep client for enriching context via retrieval

View File

@ -8,7 +8,8 @@ import logging
import re import re
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from ..utils.llm_client import LLMClient from ..utils.llm_client import LLMClient
from ..utils.locale import get_language_instruction from ..utils.locale import get_language_instruction, t
from ..config import Config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -62,29 +63,29 @@ Please output JSON format with the following structure:
{ {
"entity_types": [ "entity_types": [
{ {
"name": "Entity type name (English, PascalCase)", "name": "Entity type name (PascalCase, in the language specified by the language instruction)",
"description": "Brief description (English, max 100 characters)", "description": "Brief description (in the language specified by the language instruction, max 100 characters)",
"attributes": [ "attributes": [
{ {
"name": "Attribute name (English, snake_case)", "name": "Attribute name (snake_case, in the language specified by the language instruction)",
"type": "text", "type": "text",
"description": "Attribute description" "description": "Attribute description (in the language specified by the language instruction)"
} }
], ],
"examples": ["Example entity 1", "Example entity 2"] "examples": ["Example entity 1 (in the language specified by the language instruction)", "Example entity 2"]
} }
], ],
"edge_types": [ "edge_types": [
{ {
"name": "Relationship type name (English, UPPER_SNAKE_CASE)", "name": "Relationship type name (UPPER_SNAKE_CASE, in the language specified by the language instruction)",
"description": "Brief description (English, max 100 characters)", "description": "Brief description (in the language specified by the language instruction, max 100 characters)",
"source_targets": [ "source_targets": [
{"source": "Source entity type", "target": "Target entity type"} {"source": "Source entity type", "target": "Target entity type"}
], ],
"attributes": [] "attributes": []
} }
], ],
"analysis_summary": "Brief analysis summary of the text content" "analysis_summary": "Brief analysis summary of the text content (in the language specified by the language instruction)"
} }
``` ```
@ -92,20 +93,21 @@ Please output JSON format with the following structure:
### 1. Entity Type Design — Must Be Strictly Followed ### 1. Entity Type Design — Must Be Strictly Followed
**Quantity requirement: exactly 10 entity types** **Quantity requirement: see the mandatory rules in the user message**
**Hierarchy requirement (must include both specific types and fallback types)**: **Hierarchy requirement (must include both specific types and fallback types)**:
Your 10 entity types must include the following levels: Your entity types must include the following levels:
A. **Fallback types (required, placed as the last 2 in the list)**: A. **Fallback types (required, placed as the last 2 in the list)**:
- `Person`: Fallback type for any individual person. Use this when a person does not fit any other more specific person type. - `Person`: Fallback type for any individual person. Use this when a person does not fit any other more specific person type.
- `Organization`: Fallback type for any organization. Use this when an organization does not fit any other more specific organization type. - `Organization`: Fallback type for any organization. Use this when an organization does not fit any other more specific organization type.
B. **Specific types (8 types, designed based on text content)**: B. **Specific types (designed based on text content)**:
- Design more specific types for the main roles that appear in the text - Design more specific types for the main roles that appear in the text
- Example: if the text involves an academic event, you might have `Student`, `Professor`, `University` - Example: if the text involves an academic event, you might have `Student`, `Professor`, `University`, `ResearchGroup`, `Alumni`, etc.
- Example: if the text involves a business event, you might have `Company`, `CEO`, `Employee` - Example: if the text involves a business event, you might have `Company`, `CEO`, `Employee`, `Investor`, `Regulator`, etc.
- Ensure broad coverage of all actor categories present in the text
**Why fallback types are needed**: **Why fallback types are needed**:
- Various people appear in text, such as "primary and secondary school teachers", "passersby", "some netizen" - Various people appear in text, such as "primary and secondary school teachers", "passersby", "some netizen"
@ -119,9 +121,10 @@ B. **Specific types (8 types, designed based on text content)**:
### 2. Relationship Type Design ### 2. Relationship Type Design
- Quantity: 6-10 - Quantity: see the mandatory rules in the user message
- Relationships should reflect real connections in social media interactions - Relationships should reflect real connections in social media interactions
- Ensure the source_targets in relationships cover the entity types you have defined - Ensure the source_targets in relationships cover the entity types you have defined
- Aim for rich coverage: include hierarchical, collaborative, adversarial, and informational relationships
### 3. Attribute Design ### 3. Attribute Design
@ -129,47 +132,23 @@ B. **Specific types (8 types, designed based on text content)**:
- **Note**: Attribute names must not use `name`, `uuid`, `group_id`, `created_at`, `summary` (these are system reserved words) - **Note**: Attribute names must not use `name`, `uuid`, `group_id`, `created_at`, `summary` (these are system reserved words)
- Recommended: `full_name`, `title`, `role`, `position`, `location`, `description`, etc. - Recommended: `full_name`, `title`, `role`, `position`, `location`, `description`, etc.
## Entity Type Reference ## Entity and Relationship Type Reference
**Individual types (specific)**: Use the language specified in the language instruction for ALL names. Keep PascalCase for entity names and UPPER_SNAKE_CASE for relationship names, but use words from the target language.
- Student: student
- Professor: professor/scholar
- Journalist: journalist
- Celebrity: celebrity/influencer
- Executive: corporate executive
- Official: government official
- Lawyer: lawyer
- Doctor: doctor
**Individual types (fallback)**: **Individual type examples** (translate to target language):
- Person: any individual (use when not fitting the specific types above) - A person who is a student StudentName in target language, PascalCase
- A person who is a journalist JournalistName in target language, PascalCase
- Fallback for any individual PersonName in target language, PascalCase
**Organization types (specific)**: **Organization type examples** (translate to target language):
- University: university/college - A university UniversityName in target language, PascalCase
- Company: company/enterprise - A government agency AgencyName in target language, PascalCase
- GovernmentAgency: government agency - Fallback for any organization OrganizationName in target language, PascalCase
- MediaOutlet: media organization
- Hospital: hospital
- School: primary/secondary school
- NGO: non-governmental organization
**Organization types (fallback)**: **Relationship type examples** (translate to target language):
- Organization: any organization (use when not fitting the specific types above) - works for WORKS_FOR translated to target language, UPPER_SNAKE_CASE
- reports on REPORTS_ON translated to target language, UPPER_SNAKE_CASE
## Relationship Type Reference
- WORKS_FOR: works for
- STUDIES_AT: studies at
- AFFILIATED_WITH: affiliated with
- REPRESENTS: represents
- REGULATES: regulates
- REPORTS_ON: reports on
- COMMENTS_ON: comments on
- RESPONDS_TO: responds to
- SUPPORTS: supports
- OPPOSES: opposes
- COLLABORATES_WITH: collaborates with
- COMPETES_WITH: competes with
""" """
@ -209,17 +188,17 @@ class OntologyGenerator:
lang_instruction lang_instruction
) )
system_prompt = f"LANGUAGE INSTRUCTION (HIGHEST PRIORITY — MUST BE FOLLOWED): {lang_instruction} All description fields, analysis_summary, and examples MUST be written in this language.\n\n{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above." system_prompt = f"LANGUAGE INSTRUCTION (HIGHEST PRIORITY — MUST BE FOLLOWED): {lang_instruction} ALL fields including names, descriptions, analysis_summary, and examples MUST be written in this language.\n\n{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in PascalCase (e.g., 'AgenciaGovern', 'FuncionariPublic'). Relationship type names MUST be in UPPER_SNAKE_CASE (e.g., 'TREBALLA_PER', 'RESPON_A'). Attribute names MUST be in snake_case. All names, descriptions, and examples must use the language specified above."
messages = [ messages = [
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_message} {"role": "user", "content": user_message}
] ]
# Call LLM # Call LLM — token budget scales with ONTOLOGY_MAX_ENTITY_TYPES / ONTOLOGY_MAX_EDGE_TYPES
result = self.llm_client.chat_json( result = self.llm_client.chat_json(
messages=messages, messages=messages,
temperature=0.3, temperature=0.3,
max_tokens=4096 max_tokens=8192
) )
# Validate and post-process # Validate and post-process
@ -264,15 +243,21 @@ class OntologyGenerator:
{additional_context} {additional_context}
""" """
max_entities = Config.ONTOLOGY_MAX_ENTITY_TYPES
max_edges = Config.ONTOLOGY_MAX_EDGE_TYPES
specific_entities = max_entities - 2
edge_min = max(1, max_edges - 2)
message += f""" message += f"""
Based on the content above, design entity types and relationship types suitable for social opinion simulation. Based on the content above, design entity types and relationship types suitable for social opinion simulation.
**Mandatory rules**: **Mandatory rules**:
1. Output exactly 10 entity types 1. Output exactly {max_entities} entity types
2. The last 2 must be fallback types: Person (individual fallback) and Organization (organization fallback) 2. The last 2 must be fallback types: Person (individual fallback) and Organization (organization fallback)
3. The first 8 are specific types designed from the document content 3. The first {specific_entities} are specific types designed from the document content
4. All entity types must be real-world subjects capable of speaking out, not abstract concepts 4. All entity types must be real-world subjects capable of speaking out, not abstract concepts
5. Attribute names must not use reserved words: name, uuid, group_id use full_name, org_name, etc. instead 5. Attribute names must not use reserved words: name, uuid, group_id use full_name, org_name, etc. instead
6. Output {edge_min}-{max_edges} relationship types covering hierarchical, collaborative, adversarial, and informational relationships
{lang_instruction} {lang_instruction}
""" """
@ -330,9 +315,8 @@ Based on the content above, design entity types and relationship types suitable
if len(edge.get("description", "")) > 100: if len(edge.get("description", "")) > 100:
edge["description"] = edge["description"][:97] + "..." edge["description"] = edge["description"][:97] + "..."
# Zep API limit: maximum 10 custom entity types and 10 custom edge types MAX_ENTITY_TYPES = Config.ONTOLOGY_MAX_ENTITY_TYPES
MAX_ENTITY_TYPES = 10 MAX_EDGE_TYPES = Config.ONTOLOGY_MAX_EDGE_TYPES
MAX_EDGE_TYPES = 10
# Deduplicate: keep first occurrence by name # Deduplicate: keep first occurrence by name
seen_names = set() seen_names = set()
@ -346,31 +330,35 @@ Based on the content above, design entity types and relationship types suitable
logger.warning(f"Duplicate entity type '{name}' removed during validation") logger.warning(f"Duplicate entity type '{name}' removed during validation")
result["entity_types"] = deduped result["entity_types"] = deduped
# Fallback type definitions # Fallback type definitions — names and descriptions come from i18n so they match
# the locale used for the rest of the ontology (e.g. "Persona"/"Organització" in Catalan).
person_fallback_name = _to_pascal_case(t("step1.ontologyFallbackPersonName") or "Person")
org_fallback_name = _to_pascal_case(t("step1.ontologyFallbackOrgName") or "Organization")
person_fallback = { person_fallback = {
"name": "Person", "name": person_fallback_name,
"description": "Any individual person not fitting other specific person types.", "description": t("step1.ontologyFallbackPersonDesc") or "Any individual person not fitting other specific person types.",
"attributes": [ "attributes": [
{"name": "full_name", "type": "text", "description": "Full name of the person"}, {"name": "full_name", "type": "text", "description": "Full name of the person"},
{"name": "role", "type": "text", "description": "Role or occupation"} {"name": "role", "type": "text", "description": "Role or occupation"}
], ],
"examples": ["ordinary citizen", "anonymous netizen"] "examples": t("step1.ontologyFallbackPersonExamples") or ["ordinary citizen", "anonymous netizen"]
} }
organization_fallback = { organization_fallback = {
"name": "Organization", "name": org_fallback_name,
"description": "Any organization not fitting other specific organization types.", "description": t("step1.ontologyFallbackOrgDesc") or "Any organization not fitting other specific organization types.",
"attributes": [ "attributes": [
{"name": "org_name", "type": "text", "description": "Name of the organization"}, {"name": "org_name", "type": "text", "description": "Name of the organization"},
{"name": "org_type", "type": "text", "description": "Type of organization"} {"name": "org_type", "type": "text", "description": "Type of organization"}
], ],
"examples": ["small business", "community group"] "examples": t("step1.ontologyFallbackOrgExamples") or ["small business", "community group"]
} }
# Check whether fallback types already exist # Check whether fallback types already exist (match by i18n name)
entity_names = {e["name"] for e in result["entity_types"]} entity_names = {e["name"] for e in result["entity_types"]}
has_person = "Person" in entity_names has_person = person_fallback_name in entity_names
has_organization = "Organization" in entity_names has_organization = org_fallback_name in entity_names
# Collect fallback types to add # Collect fallback types to add
fallbacks_to_add = [] fallbacks_to_add = []
@ -383,11 +371,9 @@ Based on the content above, design entity types and relationship types suitable
current_count = len(result["entity_types"]) current_count = len(result["entity_types"])
needed_slots = len(fallbacks_to_add) needed_slots = len(fallbacks_to_add)
# If adding them would exceed 10, remove some existing types # If adding them would exceed the limit, remove some existing types from the end
if current_count + needed_slots > MAX_ENTITY_TYPES: if current_count + needed_slots > MAX_ENTITY_TYPES:
# Calculate how many to remove
to_remove = current_count + needed_slots - MAX_ENTITY_TYPES to_remove = current_count + needed_slots - MAX_ENTITY_TYPES
# Remove from the end (preserve the more important specific types at the front)
result["entity_types"] = result["entity_types"][:-to_remove] result["entity_types"] = result["entity_types"][:-to_remove]
# Add fallback types # Add fallback types

View File

@ -23,6 +23,7 @@ from openai import OpenAI
from ..config import Config from ..config import Config
from ..utils.logger import get_logger from ..utils.logger import get_logger
from ..utils.locale import get_language_instruction, t from ..utils.locale import get_language_instruction, t
from ..utils.llm_client import parse_azure_url
from .zep_entity_reader import EntityNode, ZepEntityReader from .zep_entity_reader import EntityNode, ZepEntityReader
logger = get_logger('mirofish.simulation_config') logger = get_logger('mirofish.simulation_config')
@ -231,15 +232,17 @@ class SimulationConfigGenerator:
model_name: Optional[str] = None model_name: Optional[str] = None
): ):
self.api_key = api_key or Config.LLM_API_KEY self.api_key = api_key or Config.LLM_API_KEY
self.base_url = base_url or Config.LLM_BASE_URL raw_url = base_url or Config.LLM_BASE_URL
self.model_name = model_name or Config.LLM_MODEL_NAME self.model_name = model_name or Config.LLM_MODEL_NAME
if not self.api_key: if not self.api_key:
raise ValueError("LLM_API_KEY is not configured") raise ValueError("LLM_API_KEY is not configured")
self.base_url, _default_query = parse_azure_url(raw_url)
self.client = OpenAI( self.client = OpenAI(
api_key=self.api_key, api_key=self.api_key,
base_url=self.base_url base_url=self.base_url,
default_query=_default_query if _default_query else None
) )
def generate_config( def generate_config(

View File

@ -7,11 +7,9 @@ import time
from typing import Dict, Any, List, Optional, Set, Callable, TypeVar from typing import Dict, Any, List, Optional, Set, Callable, TypeVar
from dataclasses import dataclass, field from dataclasses import dataclass, field
from zep_cloud.client import Zep
from ..config import Config from ..config import Config
from ..graph import get_graph_backend
from ..utils.logger import get_logger from ..utils.logger import get_logger
from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges
logger = get_logger('mirofish.zep_entity_reader') logger = get_logger('mirofish.zep_entity_reader')
@ -79,11 +77,7 @@ class ZepEntityReader:
""" """
def __init__(self, api_key: Optional[str] = None): def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or Config.ZEP_API_KEY self._graph = get_graph_backend()
if not self.api_key:
raise ValueError("ZEP_API_KEY is not configured")
self.client = Zep(api_key=self.api_key)
def _call_with_retry( def _call_with_retry(
self, self,
@ -136,18 +130,7 @@ class ZepEntityReader:
""" """
logger.info(f"Fetching all nodes for graph {graph_id}...") logger.info(f"Fetching all nodes for graph {graph_id}...")
nodes = fetch_all_nodes(self.client, graph_id) nodes_data = self._graph.get_all_nodes(graph_id)
nodes_data = []
for node in nodes:
nodes_data.append({
"uuid": getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''),
"name": node.name or "",
"labels": node.labels or [],
"summary": node.summary or "",
"attributes": node.attributes or {},
})
logger.info(f"Fetched {len(nodes_data)} nodes") logger.info(f"Fetched {len(nodes_data)} nodes")
return nodes_data return nodes_data
@ -163,19 +146,7 @@ class ZepEntityReader:
""" """
logger.info(f"Fetching all edges for graph {graph_id}...") logger.info(f"Fetching all edges for graph {graph_id}...")
edges = fetch_all_edges(self.client, graph_id) edges_data = self._graph.get_all_edges(graph_id)
edges_data = []
for edge in edges:
edges_data.append({
"uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''),
"name": edge.name or "",
"fact": edge.fact or "",
"source_node_uuid": edge.source_node_uuid,
"target_node_uuid": edge.target_node_uuid,
"attributes": edge.attributes or {},
})
logger.info(f"Fetched {len(edges_data)} edges") logger.info(f"Fetched {len(edges_data)} edges")
return edges_data return edges_data
@ -190,24 +161,7 @@ class ZepEntityReader:
Edge list Edge list
""" """
try: try:
# Call Zep API with retry return self._graph.get_node_edges(node_uuid)
edges = self._call_with_retry(
func=lambda: self.client.graph.node.get_entity_edges(node_uuid=node_uuid),
operation_name=f"get node edges (node={node_uuid[:8]}...)"
)
edges_data = []
for edge in edges:
edges_data.append({
"uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''),
"name": edge.name or "",
"fact": edge.fact or "",
"source_node_uuid": edge.source_node_uuid,
"target_node_uuid": edge.target_node_uuid,
"attributes": edge.attributes or {},
})
return edges_data
except Exception as e: except Exception as e:
logger.warning(f"Failed to get edges for node {node_uuid}: {str(e)}") logger.warning(f"Failed to get edges for node {node_uuid}: {str(e)}")
return [] return []
@ -346,11 +300,7 @@ class ZepEntityReader:
EntityNode or None EntityNode or None
""" """
try: try:
# Get the node with retry node = self._graph.get_node(entity_uuid)
node = self._call_with_retry(
func=lambda: self.client.graph.node.get(uuid_=entity_uuid),
operation_name=f"get node detail (uuid={entity_uuid[:8]}...)"
)
if not node: if not node:
return None return None
@ -397,11 +347,11 @@ class ZepEntityReader:
}) })
return EntityNode( return EntityNode(
uuid=getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), uuid=node.get("uuid", ""),
name=node.name or "", name=node.get("name", ""),
labels=node.labels or [], labels=node.get("labels", []),
summary=node.summary or "", summary=node.get("summary", ""),
attributes=node.attributes or {}, attributes=node.get("attributes", {}),
related_edges=related_edges, related_edges=related_edges,
related_nodes=related_nodes, related_nodes=related_nodes,
) )

View File

@ -12,9 +12,8 @@ from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from queue import Queue, Empty from queue import Queue, Empty
from zep_cloud.client import Zep
from ..config import Config from ..config import Config
from ..graph import get_graph_backend
from ..utils.logger import get_logger from ..utils.logger import get_logger
from ..utils.locale import get_locale, set_locale from ..utils.locale import get_locale, set_locale
@ -240,12 +239,7 @@ class ZepGraphMemoryUpdater:
api_key: Zep API key (optional; defaults to config value) api_key: Zep API key (optional; defaults to config value)
""" """
self.graph_id = graph_id self.graph_id = graph_id
self.api_key = api_key or Config.ZEP_API_KEY self._graph = get_graph_backend()
if not self.api_key:
raise ValueError("ZEP_API_KEY is not configured")
self.client = Zep(api_key=self.api_key)
# Activity queue # Activity queue
self._activity_queue: Queue = Queue() self._activity_queue: Queue = Queue()
@ -413,11 +407,7 @@ class ZepGraphMemoryUpdater:
# Send with retry # Send with retry
for attempt in range(self.MAX_RETRIES): for attempt in range(self.MAX_RETRIES):
try: try:
self.client.graph.add( self._graph.add_text(self.graph_id, combined_text)
graph_id=self.graph_id,
type="text",
data=combined_text
)
self._total_sent += 1 self._total_sent += 1
self._total_items_sent += len(activities) self._total_items_sent += len(activities)

View File

@ -13,13 +13,11 @@ import json
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from dataclasses import dataclass, field from dataclasses import dataclass, field
from zep_cloud.client import Zep
from ..config import Config from ..config import Config
from ..graph import get_graph_backend
from ..utils.logger import get_logger from ..utils.logger import get_logger
from ..utils.llm_client import LLMClient from ..utils.llm_client import LLMClient
from ..utils.locale import get_locale, t from ..utils.locale import get_locale, t
from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges
logger = get_logger('mirofish.zep_tools') logger = get_logger('mirofish.zep_tools')
@ -423,12 +421,7 @@ class ZepToolsService:
RETRY_DELAY = 2.0 RETRY_DELAY = 2.0
def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None): def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None):
self.api_key = api_key or Config.ZEP_API_KEY self._graph = get_graph_backend()
if not self.api_key:
raise ValueError("ZEP_API_KEY is not configured")
self.client = Zep(api_key=self.api_key)
# LLM client used by InsightForge to generate sub-queries
self._llm_client = llm_client self._llm_client = llm_client
logger.info(t("console.zepToolsInitialized")) logger.info(t("console.zepToolsInitialized"))
@ -485,51 +478,38 @@ class ZepToolsService:
""" """
logger.info(t("console.graphSearch", graphId=graph_id, query=query[:50])) logger.info(t("console.graphSearch", graphId=graph_id, query=query[:50]))
# Try using the Zep Cloud Search API
try: try:
search_results = self._call_with_retry( raw = self._graph.search(graph_id=graph_id, query=query, limit=limit, scope=scope)
func=lambda: self.client.graph.search(
graph_id=graph_id,
query=query,
limit=limit,
scope=scope,
reranker="cross_encoder"
),
operation_name=t("console.graphSearchOp", graphId=graph_id)
)
facts = [] facts = []
edges = [] edges = []
nodes = [] nodes = []
# Parse edge search results for edge in raw.get("edges", []) or []:
if hasattr(search_results, 'edges') and search_results.edges: fact = edge.get("fact", "") if isinstance(edge, dict) else getattr(edge, "fact", "")
for edge in search_results.edges: if fact:
if hasattr(edge, 'fact') and edge.fact: facts.append(fact)
facts.append(edge.fact) edges.append(edge if isinstance(edge, dict) else {
edges.append({ "uuid": getattr(edge, "uuid_", None) or getattr(edge, "uuid", ""),
"uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''), "name": getattr(edge, "name", ""),
"name": getattr(edge, 'name', ''), "fact": getattr(edge, "fact", ""),
"fact": getattr(edge, 'fact', ''), "source_node_uuid": getattr(edge, "source_node_uuid", ""),
"source_node_uuid": getattr(edge, 'source_node_uuid', ''), "target_node_uuid": getattr(edge, "target_node_uuid", ""),
"target_node_uuid": getattr(edge, 'target_node_uuid', ''), })
})
for node in raw.get("nodes", []) or []:
# Parse node search results node_dict = node if isinstance(node, dict) else {
if hasattr(search_results, 'nodes') and search_results.nodes: "uuid": getattr(node, "uuid_", None) or getattr(node, "uuid", ""),
for node in search_results.nodes: "name": getattr(node, "name", ""),
nodes.append({ "labels": getattr(node, "labels", []),
"uuid": getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), "summary": getattr(node, "summary", ""),
"name": getattr(node, 'name', ''), }
"labels": getattr(node, 'labels', []), nodes.append(node_dict)
"summary": getattr(node, 'summary', ''), if node_dict.get("summary"):
}) facts.append(f"[{node_dict['name']}]: {node_dict['summary']}")
# Node summaries count as facts too
if hasattr(node, 'summary') and node.summary:
facts.append(f"[{node.name}]: {node.summary}")
logger.info(t("console.searchComplete", count=len(facts))) logger.info(t("console.searchComplete", count=len(facts)))
return SearchResult( return SearchResult(
facts=facts, facts=facts,
edges=edges, edges=edges,
@ -659,18 +639,18 @@ class ZepToolsService:
""" """
logger.info(t("console.fetchingAllNodes", graphId=graph_id)) logger.info(t("console.fetchingAllNodes", graphId=graph_id))
nodes = fetch_all_nodes(self.client, graph_id) nodes = self._graph.get_all_nodes(graph_id)
result = [] result = [
for node in nodes: NodeInfo(
node_uuid = getattr(node, 'uuid_', None) or getattr(node, 'uuid', None) or "" uuid=n.get("uuid", ""),
result.append(NodeInfo( name=n.get("name", ""),
uuid=str(node_uuid) if node_uuid else "", labels=n.get("labels", []),
name=node.name or "", summary=n.get("summary", ""),
labels=node.labels or [], attributes=n.get("attributes", {})
summary=node.summary or "", )
attributes=node.attributes or {} for n in nodes
)) ]
logger.info(t("console.fetchedNodes", count=len(result))) logger.info(t("console.fetchedNodes", count=len(result)))
return result return result
@ -688,26 +668,22 @@ class ZepToolsService:
""" """
logger.info(t("console.fetchingAllEdges", graphId=graph_id)) logger.info(t("console.fetchingAllEdges", graphId=graph_id))
edges = fetch_all_edges(self.client, graph_id) edges = self._graph.get_all_edges(graph_id)
result = [] result = []
for edge in edges: for e in edges:
edge_uuid = getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', None) or ""
edge_info = EdgeInfo( edge_info = EdgeInfo(
uuid=str(edge_uuid) if edge_uuid else "", uuid=e.get("uuid", ""),
name=edge.name or "", name=e.get("name", ""),
fact=edge.fact or "", fact=e.get("fact", ""),
source_node_uuid=edge.source_node_uuid or "", source_node_uuid=e.get("source_node_uuid", ""),
target_node_uuid=edge.target_node_uuid or "" target_node_uuid=e.get("target_node_uuid", ""),
) )
# Add temporal info
if include_temporal: if include_temporal:
edge_info.created_at = getattr(edge, 'created_at', None) edge_info.created_at = e.get("created_at")
edge_info.valid_at = getattr(edge, 'valid_at', None) edge_info.valid_at = e.get("valid_at")
edge_info.invalid_at = getattr(edge, 'invalid_at', None) edge_info.invalid_at = e.get("invalid_at")
edge_info.expired_at = getattr(edge, 'expired_at', None) edge_info.expired_at = e.get("expired_at")
result.append(edge_info) result.append(edge_info)
logger.info(t("console.fetchedEdges", count=len(result))) logger.info(t("console.fetchedEdges", count=len(result)))
@ -726,20 +702,17 @@ class ZepToolsService:
logger.info(t("console.fetchingNodeDetail", uuid=node_uuid[:8])) logger.info(t("console.fetchingNodeDetail", uuid=node_uuid[:8]))
try: try:
node = self._call_with_retry( node = self._graph.get_node(node_uuid)
func=lambda: self.client.graph.node.get(uuid_=node_uuid),
operation_name=t("console.fetchNodeDetailOp", uuid=node_uuid[:8])
)
if not node: if not node:
return None return None
return NodeInfo( return NodeInfo(
uuid=getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), uuid=node.get("uuid", ""),
name=node.name or "", name=node.get("name", ""),
labels=node.labels or [], labels=node.get("labels", []),
summary=node.summary or "", summary=node.get("summary", ""),
attributes=node.attributes or {} attributes=node.get("attributes", {})
) )
except Exception as e: except Exception as e:
logger.error(t("console.fetchNodeDetailFailed", error=str(e))) logger.error(t("console.fetchNodeDetailFailed", error=str(e)))

View File

@ -12,6 +12,29 @@ from openai import OpenAI
from ..config import Config from ..config import Config
def parse_azure_url(raw_url: str):
"""Strip /chat/completions or /embeddings suffix from Azure endpoint URLs.
Azure Portal gives full URLs like:
https://<resource>.cognitiveservices.azure.com/openai/deployments/<model>/chat/completions?api-version=...
The OpenAI SDK expects a base_url and appends /chat/completions itself.
Returns (clean_base_url, default_query_dict).
"""
default_query: Dict[str, str] = {}
if raw_url and ('/chat/completions' in raw_url or '/embeddings' in raw_url):
parsed = urlparse(raw_url)
qs = parse_qs(parsed.query)
if 'api-version' in qs:
default_query['api-version'] = qs['api-version'][0]
clean_path = (parsed.path
.replace('/chat/completions', '')
.replace('/embeddings', '')
.rstrip('/'))
raw_url = urlunparse(parsed._replace(path=clean_path, query=''))
return raw_url, default_query
class LLMClient: class LLMClient:
"""LLM client""" """LLM client"""
@ -28,18 +51,11 @@ class LLMClient:
if not self.api_key: if not self.api_key:
raise ValueError("LLM_API_KEY is not configured") raise ValueError("LLM_API_KEY is not configured")
# Azure Portal provides full endpoint URLs like: # Google AI Studio OpenAI-compatible endpoint
# https://<resource>.cognitiveservices.azure.com/openai/deployments/<model>/chat/completions?api-version=... if (Config.LLM_PROVIDER or "").lower() == "gemini" and not base_url:
# The OpenAI SDK expects a base_url and appends /chat/completions itself, raw_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
# so we strip that suffix and extract api-version as a default query param.
default_query: Dict[str, str] = {} raw_url, default_query = parse_azure_url(raw_url)
if raw_url and '/chat/completions' in raw_url:
parsed = urlparse(raw_url)
qs = parse_qs(parsed.query)
if 'api-version' in qs:
default_query['api-version'] = qs['api-version'][0]
clean_path = parsed.path.replace('/chat/completions', '').rstrip('/')
raw_url = urlunparse(parsed._replace(path=clean_path, query=''))
self.base_url = raw_url self.base_url = raw_url
self.client = OpenAI( self.client = OpenAI(

View File

@ -37,6 +37,10 @@ dependencies = [
] ]
[project.optional-dependencies] [project.optional-dependencies]
graphiti = [
"graphiti-core>=0.3.0",
"neo4j>=5.23.0",
]
dev = [ dev = [
"pytest>=8.0.0", "pytest>=8.0.0",
"pytest-asyncio>=0.23.0", "pytest-asyncio>=0.23.0",

View File

12
backend/tests/conftest.py Normal file
View File

@ -0,0 +1,12 @@
import pytest
@pytest.fixture(autouse=True)
def reset_graph_factory_singleton():
"""Reset the graph backend singleton before each test to avoid cross-test contamination."""
yield
try:
import backend.app.graph.factory as fmod
fmod._backend_instance = None
except ImportError:
pass

View File

@ -0,0 +1,124 @@
import pytest
from unittest.mock import MagicMock, patch
def test_graph_backend_has_required_methods():
from backend.app.graph.base import GraphBackend
required = [
"create_graph", "set_ontology", "add_batch", "get_episode",
"get_all_nodes", "get_all_edges", "get_node", "get_node_edges",
"search", "add_text", "delete_graph",
]
for method in required:
assert hasattr(GraphBackend, method), f"GraphBackend missing: {method}"
def test_config_graph_backend_default():
import os
os.environ.pop("GRAPH_BACKEND", None)
import importlib
import backend.app.config as cfg_mod
importlib.reload(cfg_mod)
assert cfg_mod.Config.GRAPH_BACKEND == "zep"
def test_config_zep_errors_when_key_missing():
import backend.app.config as cfg_mod
orig_backend = cfg_mod.Config.GRAPH_BACKEND
orig_key = cfg_mod.Config.ZEP_API_KEY
try:
cfg_mod.Config.GRAPH_BACKEND = "zep"
cfg_mod.Config.ZEP_API_KEY = None
errors = cfg_mod.Config.get_graph_config_errors()
assert any("ZEP_API_KEY" in e for e in errors)
finally:
cfg_mod.Config.GRAPH_BACKEND = orig_backend
cfg_mod.Config.ZEP_API_KEY = orig_key
def test_zep_backend_implements_interface():
from backend.app.graph.base import GraphBackend
from backend.app.graph.zep_backend import ZepBackend
assert issubclass(ZepBackend, GraphBackend)
def test_zep_backend_raises_without_key():
import backend.app.config as cfg_mod
orig = cfg_mod.Config.ZEP_API_KEY
try:
cfg_mod.Config.ZEP_API_KEY = None
from backend.app.graph.zep_backend import ZepBackend
with pytest.raises(ValueError, match="ZEP_API_KEY"):
ZepBackend()
finally:
cfg_mod.Config.ZEP_API_KEY = orig
def test_factory_returns_zep_by_default():
import backend.app.graph.factory as fmod
import backend.app.config as cfg
orig_backend = cfg.Config.GRAPH_BACKEND
orig_key = cfg.Config.ZEP_API_KEY
try:
cfg.Config.GRAPH_BACKEND = "zep"
cfg.Config.ZEP_API_KEY = "test-key"
fmod._backend_instance = None
backend_instance = fmod.get_graph_backend()
from backend.app.graph.zep_backend import ZepBackend
assert isinstance(backend_instance, ZepBackend)
finally:
cfg.Config.GRAPH_BACKEND = orig_backend
cfg.Config.ZEP_API_KEY = orig_key
fmod._backend_instance = None
def test_factory_raises_on_unknown_backend():
import backend.app.graph.factory as fmod
import backend.app.config as cfg
orig = cfg.Config.GRAPH_BACKEND
try:
cfg.Config.GRAPH_BACKEND = "unknown"
fmod._backend_instance = None
with pytest.raises(ValueError, match="Unknown GRAPH_BACKEND"):
fmod.get_graph_backend()
finally:
cfg.Config.GRAPH_BACKEND = orig
fmod._backend_instance = None
def test_graphiti_backend_importable():
try:
from backend.app.graph.graphiti_backend import GraphitiBackend
from backend.app.graph.base import GraphBackend
assert issubclass(GraphitiBackend, GraphBackend)
except ImportError as e:
pytest.skip(f"graphiti-core not installed: {e}")
def test_graphiti_backend_raises_without_password():
try:
from backend.app.graph.graphiti_backend import GraphitiBackend
except ImportError:
pytest.skip("graphiti-core not installed")
import backend.app.config as cfg_mod
orig = cfg_mod.Config.NEO4J_PASSWORD
try:
cfg_mod.Config.NEO4J_PASSWORD = None
with pytest.raises(ValueError, match="NEO4J_PASSWORD"):
GraphitiBackend()
finally:
cfg_mod.Config.NEO4J_PASSWORD = orig
def test_config_graphiti_errors_when_missing():
import backend.app.config as cfg_mod
orig_backend = cfg_mod.Config.GRAPH_BACKEND
orig_pw = cfg_mod.Config.NEO4J_PASSWORD
try:
cfg_mod.Config.GRAPH_BACKEND = "graphiti"
cfg_mod.Config.NEO4J_PASSWORD = None
errors = cfg_mod.Config.get_graph_config_errors()
assert len(errors) >= 1
finally:
cfg_mod.Config.GRAPH_BACKEND = orig_backend
cfg_mod.Config.NEO4J_PASSWORD = orig_pw

View File

@ -0,0 +1,72 @@
import pytest
from unittest.mock import patch, MagicMock
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"
def test_gemini_provider_sets_base_url_automatically():
import backend.app.config as cfg
orig_provider = cfg.Config.LLM_PROVIDER
orig_key = cfg.Config.LLM_API_KEY
orig_url = cfg.Config.LLM_BASE_URL
try:
cfg.Config.LLM_PROVIDER = "gemini"
cfg.Config.LLM_API_KEY = "AIzatest"
cfg.Config.LLM_BASE_URL = "https://api.openai.com/v1"
with patch("backend.app.utils.llm_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
import importlib
import backend.app.utils.llm_client as lm
importlib.reload(lm)
client = lm.LLMClient()
assert GEMINI_URL in client.base_url
finally:
cfg.Config.LLM_PROVIDER = orig_provider
cfg.Config.LLM_API_KEY = orig_key
cfg.Config.LLM_BASE_URL = orig_url
def test_non_gemini_provider_uses_configured_url():
import backend.app.config as cfg
orig_provider = cfg.Config.LLM_PROVIDER
orig_key = cfg.Config.LLM_API_KEY
orig_url = cfg.Config.LLM_BASE_URL
try:
cfg.Config.LLM_PROVIDER = ""
cfg.Config.LLM_API_KEY = "sk-test"
cfg.Config.LLM_BASE_URL = "https://api.openai.com/v1"
with patch("backend.app.utils.llm_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
import importlib
import backend.app.utils.llm_client as lm
importlib.reload(lm)
client = lm.LLMClient()
assert "openai.com" in client.base_url
finally:
cfg.Config.LLM_PROVIDER = orig_provider
cfg.Config.LLM_API_KEY = orig_key
cfg.Config.LLM_BASE_URL = orig_url
def test_explicit_base_url_overrides_gemini_auto():
"""If base_url is passed explicitly, it should NOT be replaced even if LLM_PROVIDER=gemini."""
import backend.app.config as cfg
orig_provider = cfg.Config.LLM_PROVIDER
orig_key = cfg.Config.LLM_API_KEY
try:
cfg.Config.LLM_PROVIDER = "gemini"
cfg.Config.LLM_API_KEY = "AIzatest"
with patch("backend.app.utils.llm_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
import importlib
import backend.app.utils.llm_client as lm
importlib.reload(lm)
client = lm.LLMClient(base_url="https://custom.endpoint/v1")
assert "custom.endpoint" in client.base_url
assert GEMINI_URL not in client.base_url
finally:
cfg.Config.LLM_PROVIDER = orig_provider
cfg.Config.LLM_API_KEY = orig_key

View File

@ -475,6 +475,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
] ]
[[package]]
name = "diskcache"
version = "5.6.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
]
[[package]] [[package]]
name = "distlib" name = "distlib"
version = "0.4.0" version = "0.4.0"
@ -592,6 +601,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" },
] ]
[[package]]
name = "graphiti-core"
version = "0.11.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "diskcache" },
{ name = "neo4j" },
{ name = "numpy" },
{ name = "openai" },
{ name = "pydantic" },
{ name = "python-dotenv" },
{ name = "tenacity" },
]
sdist = { url = "https://files.pythonhosted.org/packages/30/94/3f84400e5f02ea8e9dc79784202de4173cbc16f4b3ad1bd4302da888e4d8/graphiti_core-0.11.6.tar.gz", hash = "sha256:31d26621834d7d4b8865059ab749feb18af15937b59c69598a640a5dfabea331", size = 71928, upload-time = "2025-05-15T17:58:02.304Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/2e/c8f22f01585bf173d1c82f6d4615511aebc75aeda764c69aa394446fa93c/graphiti_core-0.11.6-py3-none-any.whl", hash = "sha256:6ec4807a884f5ea88b942d0c8b7bcd2e107c7358ab4f98ef2a2092c229929707", size = 111001, upload-time = "2025-05-15T17:58:00.542Z" },
]
[[package]] [[package]]
name = "gunicorn" name = "gunicorn"
version = "25.3.0" version = "25.3.0"
@ -1275,6 +1302,10 @@ dev = [
{ name = "pytest" }, { name = "pytest" },
{ name = "pytest-asyncio" }, { name = "pytest-asyncio" },
] ]
graphiti = [
{ name = "graphiti-core" },
{ name = "neo4j" },
]
[package.dev-dependencies] [package.dev-dependencies]
dev = [ dev = [
@ -1290,7 +1321,9 @@ requires-dist = [
{ name = "charset-normalizer", specifier = ">=3.0.0" }, { name = "charset-normalizer", specifier = ">=3.0.0" },
{ name = "flask", specifier = ">=3.0.0" }, { name = "flask", specifier = ">=3.0.0" },
{ name = "flask-cors", specifier = ">=6.0.0" }, { name = "flask-cors", specifier = ">=6.0.0" },
{ name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.3.0" },
{ name = "gunicorn", specifier = ">=22.0.0" }, { name = "gunicorn", specifier = ">=22.0.0" },
{ name = "neo4j", marker = "extra == 'graphiti'", specifier = ">=5.23.0" },
{ name = "openai", specifier = ">=1.0.0" }, { name = "openai", specifier = ">=1.0.0" },
{ name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" }, { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" },
{ name = "pydantic", specifier = ">=2.0.0" }, { name = "pydantic", specifier = ">=2.0.0" },
@ -1301,7 +1334,7 @@ requires-dist = [
{ name = "python-dotenv", specifier = ">=1.0.0" }, { name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "zep-cloud", specifier = "==3.13.0" }, { name = "zep-cloud", specifier = "==3.13.0" },
] ]
provides-extras = ["dev"] provides-extras = ["graphiti", "dev"]
[package.metadata.requires-dev] [package.metadata.requires-dev]
dev = [ dev = [
@ -3003,6 +3036,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
] ]
[[package]]
name = "tenacity"
version = "9.1.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" },
]
[[package]] [[package]]
name = "texttable" name = "texttable"
version = "1.7.0" version = "1.7.0"

5
conftest.py Normal file
View File

@ -0,0 +1,5 @@
import sys
import os
# Allow `import backend.app.*` from the project root
sys.path.insert(0, os.path.dirname(__file__))

View File

@ -77,10 +77,12 @@
<div class="detail-section" v-if="selectedItem.data.attributes && Object.keys(selectedItem.data.attributes).length > 0"> <div class="detail-section" v-if="selectedItem.data.attributes && Object.keys(selectedItem.data.attributes).length > 0">
<div class="section-title">Properties:</div> <div class="section-title">Properties:</div>
<div class="properties-list"> <div class="properties-list">
<div v-for="(value, key) in selectedItem.data.attributes" :key="key" class="property-item"> <template v-for="(value, key) in selectedItem.data.attributes" :key="key">
<span class="property-key">{{ key }}:</span> <div class="property-item" v-if="!String(key).endsWith('_embedding') && !Array.isArray(value) && key !== 'summary'">
<span class="property-value">{{ value || 'None' }}</span> <span class="property-key">{{ key }}:</span>
</div> <span class="property-value">{{ value || 'None' }}</span>
</div>
</template>
</div> </div>
</div> </div>
@ -288,8 +290,10 @@ const entityTypes = computed(() => {
// //
const colors = ['#FF6B35', '#004E89', '#7B2D8E', '#1A936F', '#C5283D', '#E9724C', '#3498db', '#9b59b6', '#27ae60', '#f39c12'] const colors = ['#FF6B35', '#004E89', '#7B2D8E', '#1A936F', '#C5283D', '#E9724C', '#3498db', '#9b59b6', '#27ae60', '#f39c12']
props.graphData.nodes.forEach(node => { const HIDDEN = new Set(['Episodic', 'Community', 'EpisodicEdge'])
const type = node.labels?.find(l => l !== 'Entity') || 'Entity' const SKIP = new Set(['Entity', 'Episodic', 'Community'])
props.graphData.nodes.filter(n => !n.labels?.some(l => HIDDEN.has(l))).forEach(node => {
const type = node.labels?.find(l => !SKIP.has(l)) || 'Entity'
if (!typeMap[type]) { if (!typeMap[type]) {
typeMap[type] = { name: type, count: 0, color: colors[Object.keys(typeMap).length % colors.length] } typeMap[type] = { name: type, count: 0, color: colors[Object.keys(typeMap).length % colors.length] }
} }
@ -344,19 +348,23 @@ const renderGraph = () => {
svg.selectAll('*').remove() svg.selectAll('*').remove()
const nodesData = props.graphData.nodes || [] const HIDDEN_LABELS = new Set(['Episodic', 'Community', 'EpisodicEdge'])
const nodesData = (props.graphData.nodes || []).filter(
n => !n.labels?.some(l => HIDDEN_LABELS.has(l))
)
const edgesData = props.graphData.edges || [] const edgesData = props.graphData.edges || []
if (nodesData.length === 0) return if (nodesData.length === 0) return
// Prep data // Prep data
const nodeMap = {} const nodeMap = {}
nodesData.forEach(n => nodeMap[n.uuid] = n) nodesData.forEach(n => nodeMap[n.uuid] = n)
const SKIP_TYPE_LABELS = new Set(['Entity', 'Episodic', 'Community'])
const nodes = nodesData.map(n => ({ const nodes = nodesData.map(n => ({
id: n.uuid, id: n.uuid,
name: n.name || 'Unnamed', name: n.name || 'Unnamed',
type: n.labels?.find(l => l !== 'Entity') || 'Entity', type: n.labels?.find(l => !SKIP_TYPE_LABELS.has(l)) || 'Entity',
rawData: n rawData: n
})) }))

View File

@ -89,6 +89,12 @@
"ontologyGenerating": "Generant", "ontologyGenerating": "Generant",
"ontologyPending": "Pendent", "ontologyPending": "Pendent",
"ontologyDesc": "El LLM analitza el contingut del document i els requisits de simulació, extreu llavors de realitat i auto-genera una estructura d'ontologia adequada", "ontologyDesc": "El LLM analitza el contingut del document i els requisits de simulació, extreu llavors de realitat i auto-genera una estructura d'ontologia adequada",
"ontologyFallbackPersonName": "Persona",
"ontologyFallbackPersonDesc": "Qualsevol persona individual que no encaixa en altres tipus de persona més específics.",
"ontologyFallbackPersonExamples": ["ciutadà ordinari", "internauta anònim"],
"ontologyFallbackOrgName": "Organització",
"ontologyFallbackOrgDesc": "Qualsevol organització que no encaixa en altres tipus d'organització més específics.",
"ontologyFallbackOrgExamples": ["petita empresa", "grup comunitari"],
"analyzingDocs": "Analitzant documents...", "analyzingDocs": "Analitzant documents...",
"graphRagBuild": "Construcció de GraphRAG", "graphRagBuild": "Construcció de GraphRAG",
"graphRagDesc": "Basant-se en l'ontologia generada, els documents es divideixen automàticament en fragments i s'envien a Zep per construir un graf de coneixement, extraient entitats i relacions, formant memòria temporal i resums de comunitat", "graphRagDesc": "Basant-se en l'ontologia generada, els documents es divideixen automàticament en fragments i s'envien a Zep per construir un graf de coneixement, extraient entitats i relacions, formant memòria temporal i resums de comunitat",

View File

@ -89,6 +89,12 @@
"ontologyGenerating": "Generating", "ontologyGenerating": "Generating",
"ontologyPending": "Pending", "ontologyPending": "Pending",
"ontologyDesc": "LLM analyzes document content and simulation requirements, extracts reality seeds, and auto-generates a suitable ontology structure", "ontologyDesc": "LLM analyzes document content and simulation requirements, extracts reality seeds, and auto-generates a suitable ontology structure",
"ontologyFallbackPersonName": "Person",
"ontologyFallbackPersonDesc": "Any individual person not fitting other specific person types.",
"ontologyFallbackPersonExamples": ["ordinary citizen", "anonymous netizen"],
"ontologyFallbackOrgName": "Organization",
"ontologyFallbackOrgDesc": "Any organization not fitting other specific organization types.",
"ontologyFallbackOrgExamples": ["small business", "community group"],
"analyzingDocs": "Analyzing documents...", "analyzingDocs": "Analyzing documents...",
"graphRagBuild": "GraphRAG Build", "graphRagBuild": "GraphRAG Build",
"graphRagDesc": "Based on the generated ontology, documents are auto-chunked and sent to Zep to build a knowledge graph, extracting entities and relations, forming temporal memory and community summaries", "graphRagDesc": "Based on the generated ontology, documents are auto-chunked and sent to Zep to build a knowledge graph, extracting entities and relations, forming temporal memory and community summaries",

View File

@ -89,6 +89,12 @@
"ontologyGenerating": "Generando", "ontologyGenerating": "Generando",
"ontologyPending": "Pendiente", "ontologyPending": "Pendiente",
"ontologyDesc": "El LLM analiza el contenido del documento y los requisitos de simulación, extrae semillas de la realidad y genera automáticamente la estructura ontológica adecuada", "ontologyDesc": "El LLM analiza el contenido del documento y los requisitos de simulación, extrae semillas de la realidad y genera automáticamente la estructura ontológica adecuada",
"ontologyFallbackPersonName": "Person",
"ontologyFallbackPersonDesc": "Cualquier persona individual que no encaja en otros tipos de persona más específicos.",
"ontologyFallbackPersonExamples": ["ciudadano ordinario", "internauta anónimo"],
"ontologyFallbackOrgName": "Organization",
"ontologyFallbackOrgDesc": "Cualquier organización que no encaja en otros tipos de organización más específicos.",
"ontologyFallbackOrgExamples": ["pequeña empresa", "grupo comunitario"],
"analyzingDocs": "Analizando documentos...", "analyzingDocs": "Analizando documentos...",
"graphRagBuild": "Construcción de GraphRAG", "graphRagBuild": "Construcción de GraphRAG",
"graphRagDesc": "A partir de la ontología generada, los documentos se fragmentan automáticamente y se envían a Zep para construir un grafo de conocimiento, extrayendo entidades y relaciones, formando memoria temporal y resúmenes de comunidad", "graphRagDesc": "A partir de la ontología generada, los documentos se fragmentan automáticamente y se envían a Zep para construir un grafo de conocimiento, extrayendo entidades y relaciones, formando memoria temporal y resúmenes de comunidad",

View File

@ -89,6 +89,12 @@
"ontologyGenerating": "生成中", "ontologyGenerating": "生成中",
"ontologyPending": "等待", "ontologyPending": "等待",
"ontologyDesc": "LLM分析文档内容与模拟需求提取出现实种子自动生成合适的本体结构", "ontologyDesc": "LLM分析文档内容与模拟需求提取出现实种子自动生成合适的本体结构",
"ontologyFallbackPersonName": "Person",
"ontologyFallbackPersonDesc": "任何不适合其他具体人物类型的个人。",
"ontologyFallbackPersonExamples": ["普通市民", "匿名网友"],
"ontologyFallbackOrgName": "Organization",
"ontologyFallbackOrgDesc": "任何不适合其他具体组织类型的组织。",
"ontologyFallbackOrgExamples": ["小型企业", "社区团体"],
"analyzingDocs": "正在分析文档...", "analyzingDocs": "正在分析文档...",
"graphRagBuild": "GraphRAG构建", "graphRagBuild": "GraphRAG构建",
"graphRagDesc": "基于生成的本体,将文档自动分块后调用 Zep 构建知识图谱,提取实体和关系,并形成时序记忆与社区摘要", "graphRagDesc": "基于生成的本体,将文档自动分块后调用 Zep 构建知识图谱,提取实体和关系,并形成时序记忆与社区摘要",