fix(graphiti): patch _extract_entity_attributes to flatten nested attr dicts

The previous fix applied _flatten_attributes too broadly, breaking graphiti's internal ExtractedEntities validation (lists were converted to strings). Instead, monkey-patch only _extract_entity_attributes — the exact function that returns entity attribute dicts to node.attributes before Neo4j write. Lists of primitives are preserved; only dict-valued attributes are flattened. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 20:24:08 +00:00 · 2026-04-25 20:24:08 +00:00 · e7ebcdff75
parent 6bce05dca2
commit e7ebcdff75
1 changed files with 24 additions and 17 deletions
--- a/backend/app/graph/graphiti_backend.py
+++ b/backend/app/graph/graphiti_backend.py
@ -26,12 +26,13 @@ def _neo4j_val(v: Any) -> Any:


 def _flatten_attributes(attrs: dict) -> dict:
-    """Flatten node.attributes so every value is a Neo4j-safe primitive.
+    """Flatten entity attribute dicts so every value is a Neo4j-safe primitive.

    Graphiti extracts entity attributes via a Pydantic model, but the raw LLM
    response sometimes wraps each value in a nested dict (e.g. {"value": "CTTI"}).
    Neo4j only accepts primitive types or arrays thereof, so we coerce any
-    dict/list value to its string representation.
+    dict value to its string representation. Lists of primitives are kept as-is
+    because Neo4j supports array properties.
    """
    result = {}
    for k, v in attrs.items():
@ -40,8 +41,6 @@ def _flatten_attributes(attrs: dict) -> dict:
        if isinstance(v, dict):
            # Unwrap {"value": "..."} pattern emitted by some LLMs; fall back to str()
            result[k] = v.get("value") or v.get("text") or str(v)
-        elif isinstance(v, list):
-            result[k] = ", ".join(str(i) for i in v)
        else:
            result[k] = v
    return result
@ -91,19 +90,7 @@ def _make_azure_generic_client(config, client):
                    max_completion_tokens=max_tokens,
                    response_format=response_format,
                )
-                raw = json.loads(response.choices[0].message.content or '{}')
-                if response_model is not None:
-                    # Validate through the Pydantic model and dump back to a flat dict
-                    # so that Neo4j never receives nested Maps as property values.
-                    try:
-                        raw = response_model.model_validate(raw).model_dump(
-                            mode='python', exclude_none=True
-                        )
-                        # Coerce any remaining non-primitive values to str
-                        raw = _flatten_attributes(raw)
-                    except Exception:
-                        pass
-                return raw
+                return json.loads(response.choices[0].message.content or '{}')
            except _openai.RateLimitError as e:
                raise _RateLimitError from e

@ -218,8 +205,28 @@ class GraphitiBackend(GraphBackend):
            embedder=embedder,
            cross_encoder=cross_encoder,
        )
+        self._patch_extract_entity_attributes()
        return client

+    @staticmethod
+    def _patch_extract_entity_attributes() -> None:
+        """Monkey-patch graphiti's _extract_entity_attributes to sanitize LLM output.
+
+        Some LLMs return attribute values as nested dicts ({"value": "CTTI"}) instead
+        of plain strings. Neo4j rejects these with TypeError. We intercept the raw
+        llm_response dict before it is stored in node.attributes and flatten it.
+        """
+        import graphiti_core.utils.maintenance.node_operations as _node_ops
+
+        original = _node_ops._extract_entity_attributes
+
+        async def _patched(llm_client, node, episode, previous_episodes, entity_type):
+            result = await original(llm_client, node, episode, previous_episodes, entity_type)
+            # result is a dict — flatten any dict-valued attributes
+            return _flatten_attributes(result) if result else result
+
+        _node_ops._extract_entity_attributes = _patched
+
    def create_graph(self, graph_id: str, name: str, description: str = "") -> None:
        logger.info(f"Graphiti graph namespace ready: {graph_id}")