fix(ontology): handle string attributes from LLM response to prevent TypeError crash

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ubuntu 2026-04-26 14:59:10 +00:00
parent 182a9525e7
commit 866ed421e2
3 changed files with 75 additions and 2 deletions

View File

@ -240,7 +240,7 @@ class GraphBuilderService:
attrs = {"__doc__": description}
annotations = {}
for attr_def in entity_def.get("attributes", []):
for attr_def in GraphBuilderService._normalize_entity_attributes(entity_def.get("attributes", [])):
attr_name = safe_attr_name(attr_def["name"])
attr_desc = attr_def.get("description", attr_name)
attrs[attr_name] = Field(description=attr_desc, default=None)
@ -260,7 +260,7 @@ class GraphBuilderService:
attrs = {"__doc__": description}
annotations = {}
for attr_def in edge_def.get("attributes", []):
for attr_def in GraphBuilderService._normalize_entity_attributes(edge_def.get("attributes", [])):
attr_name = safe_attr_name(attr_def["name"])
attr_desc = attr_def.get("description", attr_name)
attrs[attr_name] = Field(description=attr_desc, default=None)
@ -430,3 +430,14 @@ class GraphBuilderService:
"""Delete graph"""
self._graph.delete_graph(graph_id)
@staticmethod
def _normalize_entity_attributes(attributes: list) -> list:
"""Ensure each attribute item is a dict; convert strings to minimal dicts."""
result = []
for attr in attributes:
if isinstance(attr, str):
result.append({"name": attr, "type": "text", "description": attr})
elif isinstance(attr, dict):
result.append(attr)
return result

View File

@ -201,6 +201,9 @@ class OntologyGenerator:
max_tokens=8192
)
# Normalise string attributes before validation
result = OntologyGenerator._normalize_ontology_attributes(result)
# Validate and post-process
result = self._validate_and_process(result)
@ -264,6 +267,29 @@ Based on the content above, design entity types and relationship types suitable
return message
@staticmethod
def _normalize_ontology_attributes(ontology: dict) -> dict:
"""Normalize string attributes in LLM-generated ontology to dicts (in-place).
Handles both ``entities``/``edges`` keys (used in tests) and
``entity_types``/``edge_types`` keys (used in production LLM output).
"""
for key in ("entities", "entity_types"):
for entity in ontology.get(key, []):
entity["attributes"] = [
attr if isinstance(attr, dict)
else {"name": attr, "type": "text", "description": attr}
for attr in entity.get("attributes", [])
]
for key in ("edges", "edge_types"):
for edge in ontology.get(key, []):
edge["attributes"] = [
attr if isinstance(attr, dict)
else {"name": attr, "type": "text", "description": attr}
for attr in edge.get("attributes", [])
]
return ontology
def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""Validate and post-process the result"""

View File

@ -0,0 +1,36 @@
def test_graph_builder_normalizes_string_attributes():
"""_normalize_entity_attributes converts strings to dicts without crashing."""
from app.services.graph_builder import GraphBuilderService
mixed = ["name", "age", {"name": "email", "type": "text", "description": "Email"}]
result = GraphBuilderService._normalize_entity_attributes(mixed)
assert all(isinstance(a, dict) for a in result)
assert result[0] == {"name": "name", "type": "text", "description": "name"}
assert result[1] == {"name": "age", "type": "text", "description": "age"}
assert result[2]["name"] == "email"
def test_graph_builder_normalize_empty():
"""Empty attribute list returns empty list."""
from app.services.graph_builder import GraphBuilderService
assert GraphBuilderService._normalize_entity_attributes([]) == []
def test_ontology_generator_normalizes_string_attributes():
"""_normalize_ontology_attributes converts string attrs in entities and edges."""
from app.services.ontology_generator import OntologyGenerator
raw = {
"entities": [{"name": "Person", "description": "A person", "attributes": ["name", "age"]}],
"edges": [{"name": "KNOWS", "description": "Knows", "attributes": ["since"]}],
}
result = OntologyGenerator._normalize_ontology_attributes(raw)
entity_attrs = result["entities"][0]["attributes"]
assert all(isinstance(a, dict) for a in entity_attrs)
assert entity_attrs[0] == {"name": "name", "type": "text", "description": "name"}
edge_attrs = result["edges"][0]["attributes"]
assert all(isinstance(a, dict) for a in edge_attrs)
assert edge_attrs[0] == {"name": "since", "type": "text", "description": "since"}