fix(graph): pass small_model to graphiti so non-openai backends don't 404

Graphiti's OpenAIClient routes "simpler" extraction prompts to ModelSize.small, falling back to a hard-coded `gpt-4.1-nano` when no small_model is configured. Against Qwen/Dashscope (and other OpenAI-SDK endpoints that don't host that model) this surfaces as a 404 mid-graph build, even when LLM_MODEL_NAME points at a working model. Add LLM_SMALL_MODEL_NAME (defaults to LLM_MODEL_NAME) and pass it through the Graphiti LLMConfig so the small slot is always populated with a model the configured endpoint actually serves. Closes #37
2026-05-11 13:12:21 +02:00 · 2026-05-11 13:12:21 +02:00 · 8af800a122
parent 04a00ac437
commit 8af800a122
3 changed files with 12 additions and 0 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -64,6 +64,12 @@ Copy `.env.example` to `.env`:
 LLM_API_KEY              # Required
 LLM_BASE_URL             # Default: https://dashscope.aliyuncs.com/compatible-mode/v1
 LLM_MODEL_NAME           # Default: qwen-plus
+LLM_SMALL_MODEL_NAME     # Default: value of LLM_MODEL_NAME
+                         # Used by Graphiti for simpler extraction prompts
+                         # (ModelSize.small). Must exist on the same endpoint
+                         # as LLM_MODEL_NAME — Graphiti otherwise falls back
+                         # to its hard-coded `gpt-4.1-nano`, which 404s on
+                         # non-OpenAI backends (Qwen, GLM, Ollama, ...).

 # Neo4j + Graphiti (knowledge graph)
 NEO4J_URI                # Default: bolt://localhost:7687
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -33,6 +33,11 @@ class Config:
    LLM_API_KEY = os.environ.get('LLM_API_KEY')
    LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
    LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')
+    # Optional smaller/cheaper model used by Graphiti for simpler extraction
+    # prompts (ModelSize.small). Defaults to LLM_MODEL_NAME so non-OpenAI
+    # backends (Qwen/Dashscope, GLM, Ollama via /v1, ...) don't fall through
+    # to Graphiti's hard-coded `gpt-4.1-nano`, which would 404.
+    LLM_SMALL_MODEL_NAME = os.environ.get('LLM_SMALL_MODEL_NAME') or os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')

    # Neo4j + Graphiti settings (replacement for Zep Cloud).
    NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687')
--- a/backend/app/services/graphiti_adapter.py
+++ b/backend/app/services/graphiti_adapter.py
@ -132,6 +132,7 @@ def _build_llm_and_embedder(provider: str):
                api_key=Config.LLM_API_KEY,
                base_url=Config.LLM_BASE_URL,
                model=Config.LLM_MODEL_NAME,
+                small_model=Config.LLM_SMALL_MODEL_NAME,
            )
        )
        embedder = OpenAIEmbedder(