From 866ed421e21cd6b959cab16497276716887f25c4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 26 Apr 2026 14:59:10 +0000 Subject: [PATCH] fix(ontology): handle string attributes from LLM response to prevent TypeError crash Co-Authored-By: Claude Sonnet 4.6 --- backend/app/services/graph_builder.py | 15 +++++++-- backend/app/services/ontology_generator.py | 26 ++++++++++++++++ backend/tests/test_ontology_attributes.py | 36 ++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 backend/tests/test_ontology_attributes.py diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 01882ef7..3f631b47 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -240,7 +240,7 @@ class GraphBuilderService: attrs = {"__doc__": description} annotations = {} - for attr_def in entity_def.get("attributes", []): + for attr_def in GraphBuilderService._normalize_entity_attributes(entity_def.get("attributes", [])): attr_name = safe_attr_name(attr_def["name"]) attr_desc = attr_def.get("description", attr_name) attrs[attr_name] = Field(description=attr_desc, default=None) @@ -260,7 +260,7 @@ class GraphBuilderService: attrs = {"__doc__": description} annotations = {} - for attr_def in edge_def.get("attributes", []): + for attr_def in GraphBuilderService._normalize_entity_attributes(edge_def.get("attributes", [])): attr_name = safe_attr_name(attr_def["name"]) attr_desc = attr_def.get("description", attr_name) attrs[attr_name] = Field(description=attr_desc, default=None) @@ -430,3 +430,14 @@ class GraphBuilderService: """Delete graph""" self._graph.delete_graph(graph_id) + @staticmethod + def _normalize_entity_attributes(attributes: list) -> list: + """Ensure each attribute item is a dict; convert strings to minimal dicts.""" + result = [] + for attr in attributes: + if isinstance(attr, str): + result.append({"name": attr, "type": "text", "description": attr}) + elif isinstance(attr, dict): + result.append(attr) + return result + diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index bf241a9f..afe95c18 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -201,6 +201,9 @@ class OntologyGenerator: max_tokens=8192 ) + # Normalise string attributes before validation + result = OntologyGenerator._normalize_ontology_attributes(result) + # Validate and post-process result = self._validate_and_process(result) @@ -264,6 +267,29 @@ Based on the content above, design entity types and relationship types suitable return message + @staticmethod + def _normalize_ontology_attributes(ontology: dict) -> dict: + """Normalize string attributes in LLM-generated ontology to dicts (in-place). + + Handles both ``entities``/``edges`` keys (used in tests) and + ``entity_types``/``edge_types`` keys (used in production LLM output). + """ + for key in ("entities", "entity_types"): + for entity in ontology.get(key, []): + entity["attributes"] = [ + attr if isinstance(attr, dict) + else {"name": attr, "type": "text", "description": attr} + for attr in entity.get("attributes", []) + ] + for key in ("edges", "edge_types"): + for edge in ontology.get(key, []): + edge["attributes"] = [ + attr if isinstance(attr, dict) + else {"name": attr, "type": "text", "description": attr} + for attr in edge.get("attributes", []) + ] + return ontology + def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: """Validate and post-process the result""" diff --git a/backend/tests/test_ontology_attributes.py b/backend/tests/test_ontology_attributes.py new file mode 100644 index 00000000..dae487aa --- /dev/null +++ b/backend/tests/test_ontology_attributes.py @@ -0,0 +1,36 @@ +def test_graph_builder_normalizes_string_attributes(): + """_normalize_entity_attributes converts strings to dicts without crashing.""" + from app.services.graph_builder import GraphBuilderService + + mixed = ["name", "age", {"name": "email", "type": "text", "description": "Email"}] + result = GraphBuilderService._normalize_entity_attributes(mixed) + + assert all(isinstance(a, dict) for a in result) + assert result[0] == {"name": "name", "type": "text", "description": "name"} + assert result[1] == {"name": "age", "type": "text", "description": "age"} + assert result[2]["name"] == "email" + + +def test_graph_builder_normalize_empty(): + """Empty attribute list returns empty list.""" + from app.services.graph_builder import GraphBuilderService + assert GraphBuilderService._normalize_entity_attributes([]) == [] + + +def test_ontology_generator_normalizes_string_attributes(): + """_normalize_ontology_attributes converts string attrs in entities and edges.""" + from app.services.ontology_generator import OntologyGenerator + + raw = { + "entities": [{"name": "Person", "description": "A person", "attributes": ["name", "age"]}], + "edges": [{"name": "KNOWS", "description": "Knows", "attributes": ["since"]}], + } + result = OntologyGenerator._normalize_ontology_attributes(raw) + + entity_attrs = result["entities"][0]["attributes"] + assert all(isinstance(a, dict) for a in entity_attrs) + assert entity_attrs[0] == {"name": "name", "type": "text", "description": "name"} + + edge_attrs = result["edges"][0]["attributes"] + assert all(isinstance(a, dict) for a in edge_attrs) + assert edge_attrs[0] == {"name": "since", "type": "text", "description": "since"}