From 8af800a1224a17ced2ddaf09925f7a7a25b90483 Mon Sep 17 00:00:00 2001 From: Dominik Seemann Date: Mon, 11 May 2026 13:12:21 +0200 Subject: [PATCH] fix(graph): pass small_model to graphiti so non-openai backends don't 404 Graphiti's OpenAIClient routes "simpler" extraction prompts to ModelSize.small, falling back to a hard-coded `gpt-4.1-nano` when no small_model is configured. Against Qwen/Dashscope (and other OpenAI-SDK endpoints that don't host that model) this surfaces as a 404 mid-graph build, even when LLM_MODEL_NAME points at a working model. Add LLM_SMALL_MODEL_NAME (defaults to LLM_MODEL_NAME) and pass it through the Graphiti LLMConfig so the small slot is always populated with a model the configured endpoint actually serves. Closes #37 --- CLAUDE.md | 6 ++++++ backend/app/config.py | 5 +++++ backend/app/services/graphiti_adapter.py | 1 + 3 files changed, 12 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index ccb99029..bf8dce8b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -64,6 +64,12 @@ Copy `.env.example` to `.env`: LLM_API_KEY # Required LLM_BASE_URL # Default: https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME # Default: qwen-plus +LLM_SMALL_MODEL_NAME # Default: value of LLM_MODEL_NAME + # Used by Graphiti for simpler extraction prompts + # (ModelSize.small). Must exist on the same endpoint + # as LLM_MODEL_NAME — Graphiti otherwise falls back + # to its hard-coded `gpt-4.1-nano`, which 404s on + # non-OpenAI backends (Qwen, GLM, Ollama, ...). # Neo4j + Graphiti (knowledge graph) NEO4J_URI # Default: bolt://localhost:7687 diff --git a/backend/app/config.py b/backend/app/config.py index 8477b23a..1f2b9835 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -33,6 +33,11 @@ class Config: LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') + # Optional smaller/cheaper model used by Graphiti for simpler extraction + # prompts (ModelSize.small). Defaults to LLM_MODEL_NAME so non-OpenAI + # backends (Qwen/Dashscope, GLM, Ollama via /v1, ...) don't fall through + # to Graphiti's hard-coded `gpt-4.1-nano`, which would 404. + LLM_SMALL_MODEL_NAME = os.environ.get('LLM_SMALL_MODEL_NAME') or os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') # Neo4j + Graphiti settings (replacement for Zep Cloud). NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') diff --git a/backend/app/services/graphiti_adapter.py b/backend/app/services/graphiti_adapter.py index 13f4899e..72f4d9e3 100644 --- a/backend/app/services/graphiti_adapter.py +++ b/backend/app/services/graphiti_adapter.py @@ -132,6 +132,7 @@ def _build_llm_and_embedder(provider: str): api_key=Config.LLM_API_KEY, base_url=Config.LLM_BASE_URL, model=Config.LLM_MODEL_NAME, + small_model=Config.LLM_SMALL_MODEL_NAME, ) ) embedder = OpenAIEmbedder(