fix(graphiti): patch _extract_entity_attributes to flatten nested attr dicts

The previous fix applied _flatten_attributes too broadly, breaking graphiti's
internal ExtractedEntities validation (lists were converted to strings).

Instead, monkey-patch only _extract_entity_attributes — the exact function
that returns entity attribute dicts to node.attributes before Neo4j write.
Lists of primitives are preserved; only dict-valued attributes are flattened.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-25 20:24:08 +00:00
parent 6bce05dca2
commit e7ebcdff75
1 changed files with 24 additions and 17 deletions

View File

@ -26,12 +26,13 @@ def _neo4j_val(v: Any) -> Any:
def _flatten_attributes(attrs: dict) -> dict:
"""Flatten node.attributes so every value is a Neo4j-safe primitive.
"""Flatten entity attribute dicts so every value is a Neo4j-safe primitive.
Graphiti extracts entity attributes via a Pydantic model, but the raw LLM
response sometimes wraps each value in a nested dict (e.g. {"value": "CTTI"}).
Neo4j only accepts primitive types or arrays thereof, so we coerce any
dict/list value to its string representation.
dict value to its string representation. Lists of primitives are kept as-is
because Neo4j supports array properties.
"""
result = {}
for k, v in attrs.items():
@ -40,8 +41,6 @@ def _flatten_attributes(attrs: dict) -> dict:
if isinstance(v, dict):
# Unwrap {"value": "..."} pattern emitted by some LLMs; fall back to str()
result[k] = v.get("value") or v.get("text") or str(v)
elif isinstance(v, list):
result[k] = ", ".join(str(i) for i in v)
else:
result[k] = v
return result
@ -91,19 +90,7 @@ def _make_azure_generic_client(config, client):
max_completion_tokens=max_tokens,
response_format=response_format,
)
raw = json.loads(response.choices[0].message.content or '{}')
if response_model is not None:
# Validate through the Pydantic model and dump back to a flat dict
# so that Neo4j never receives nested Maps as property values.
try:
raw = response_model.model_validate(raw).model_dump(
mode='python', exclude_none=True
)
# Coerce any remaining non-primitive values to str
raw = _flatten_attributes(raw)
except Exception:
pass
return raw
return json.loads(response.choices[0].message.content or '{}')
except _openai.RateLimitError as e:
raise _RateLimitError from e
@ -218,8 +205,28 @@ class GraphitiBackend(GraphBackend):
embedder=embedder,
cross_encoder=cross_encoder,
)
self._patch_extract_entity_attributes()
return client
@staticmethod
def _patch_extract_entity_attributes() -> None:
"""Monkey-patch graphiti's _extract_entity_attributes to sanitize LLM output.
Some LLMs return attribute values as nested dicts ({"value": "CTTI"}) instead
of plain strings. Neo4j rejects these with TypeError. We intercept the raw
llm_response dict before it is stored in node.attributes and flatten it.
"""
import graphiti_core.utils.maintenance.node_operations as _node_ops
original = _node_ops._extract_entity_attributes
async def _patched(llm_client, node, episode, previous_episodes, entity_type):
result = await original(llm_client, node, episode, previous_episodes, entity_type)
# result is a dict — flatten any dict-valued attributes
return _flatten_attributes(result) if result else result
_node_ops._extract_entity_attributes = _patched
def create_graph(self, graph_id: str, name: str, description: str = "") -> None:
logger.info(f"Graphiti graph namespace ready: {graph_id}")