From e7ebcdff75475332b377d985f53285cad987310a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-14-53.eu-north-1.compute.internal>
Date: Sat, 25 Apr 2026 20:24:08 +0000
Subject: [PATCH] fix(graphiti): patch _extract_entity_attributes to flatten
 nested attr dicts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix applied _flatten_attributes too broadly, breaking graphiti's
internal ExtractedEntities validation (lists were converted to strings).

Instead, monkey-patch only _extract_entity_attributes — the exact function
that returns entity attribute dicts to node.attributes before Neo4j write.
Lists of primitives are preserved; only dict-valued attributes are flattened.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/app/graph/graphiti_backend.py | 41 ++++++++++++++++-----------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/backend/app/graph/graphiti_backend.py b/backend/app/graph/graphiti_backend.py
index ad460c3a..40b31eac 100644
--- a/backend/app/graph/graphiti_backend.py
+++ b/backend/app/graph/graphiti_backend.py
@@ -26,12 +26,13 @@ def _neo4j_val(v: Any) -> Any:
 
 
 def _flatten_attributes(attrs: dict) -> dict:
-    """Flatten node.attributes so every value is a Neo4j-safe primitive.
+    """Flatten entity attribute dicts so every value is a Neo4j-safe primitive.
 
     Graphiti extracts entity attributes via a Pydantic model, but the raw LLM
     response sometimes wraps each value in a nested dict (e.g. {"value": "CTTI"}).
     Neo4j only accepts primitive types or arrays thereof, so we coerce any
-    dict/list value to its string representation.
+    dict value to its string representation. Lists of primitives are kept as-is
+    because Neo4j supports array properties.
     """
     result = {}
     for k, v in attrs.items():
@@ -40,8 +41,6 @@ def _flatten_attributes(attrs: dict) -> dict:
         if isinstance(v, dict):
             # Unwrap {"value": "..."} pattern emitted by some LLMs; fall back to str()
             result[k] = v.get("value") or v.get("text") or str(v)
-        elif isinstance(v, list):
-            result[k] = ", ".join(str(i) for i in v)
         else:
             result[k] = v
     return result
@@ -91,19 +90,7 @@ def _make_azure_generic_client(config, client):
                     max_completion_tokens=max_tokens,
                     response_format=response_format,
                 )
-                raw = json.loads(response.choices[0].message.content or '{}')
-                if response_model is not None:
-                    # Validate through the Pydantic model and dump back to a flat dict
-                    # so that Neo4j never receives nested Maps as property values.
-                    try:
-                        raw = response_model.model_validate(raw).model_dump(
-                            mode='python', exclude_none=True
-                        )
-                        # Coerce any remaining non-primitive values to str
-                        raw = _flatten_attributes(raw)
-                    except Exception:
-                        pass
-                return raw
+                return json.loads(response.choices[0].message.content or '{}')
             except _openai.RateLimitError as e:
                 raise _RateLimitError from e
 
@@ -218,8 +205,28 @@ class GraphitiBackend(GraphBackend):
             embedder=embedder,
             cross_encoder=cross_encoder,
         )
+        self._patch_extract_entity_attributes()
         return client
 
+    @staticmethod
+    def _patch_extract_entity_attributes() -> None:
+        """Monkey-patch graphiti's _extract_entity_attributes to sanitize LLM output.
+
+        Some LLMs return attribute values as nested dicts ({"value": "CTTI"}) instead
+        of plain strings. Neo4j rejects these with TypeError. We intercept the raw
+        llm_response dict before it is stored in node.attributes and flatten it.
+        """
+        import graphiti_core.utils.maintenance.node_operations as _node_ops
+
+        original = _node_ops._extract_entity_attributes
+
+        async def _patched(llm_client, node, episode, previous_episodes, entity_type):
+            result = await original(llm_client, node, episode, previous_episodes, entity_type)
+            # result is a dict — flatten any dict-valued attributes
+            return _flatten_attributes(result) if result else result
+
+        _node_ops._extract_entity_attributes = _patched
+
     def create_graph(self, graph_id: str, name: str, description: str = "") -> None:
         logger.info(f"Graphiti graph namespace ready: {graph_id}")