fix(graph): harden ontology normalization for Zep limits

2026-03-21 17:24:55 +09:00 · 2026-03-21 17:24:55 +09:00 · a026178d67
parent 25d43f8a4b
commit a026178d67
2 changed files with 139 additions and 56 deletions
--- a/backend/app/services/graph_builder.py
+++ b/backend/app/services/graph_builder.py
@ -217,17 +217,57 @@ class GraphBuilderService:
                return f"entity_{attr_name}"
            return attr_name
        def normalize_attributes(raw_attributes: Any) -> List[Dict[str, str]]:
            normalized: List[Dict[str, str]] = []
            for attr_def in raw_attributes or []:
                if isinstance(attr_def, str):
                    attr_def = {"name": attr_def, "description": attr_def}
                if not isinstance(attr_def, dict):
                    continue
                attr_name = str(attr_def.get("name", "")).strip()
                if not attr_name:
                    continue
                normalized.append({
                    "name": attr_name,
                    "description": str(attr_def.get("description") or attr_name),
                })
            return normalized
        def normalize_source_targets(raw_source_targets: Any) -> List[EntityEdgeSourceTarget]:
            normalized: List[EntityEdgeSourceTarget] = []
            for source_target in raw_source_targets or []:
                if not isinstance(source_target, dict):
                    continue
                normalized.append(
                    EntityEdgeSourceTarget(
                        source=str(source_target.get("source", "Entity")) or "Entity",
                        target=str(source_target.get("target", "Entity")) or "Entity",
                    )
                )
            # Zep API allows max 10 source_targets per edge type.
            return normalized[:10]
        # 动态创建实体类型
        entity_types = {}
        for entity_def in ontology.get("entity_types", []):
-            name = entity_def["name"]
+            if not isinstance(entity_def, dict):
                continue
            name = str(entity_def.get("name", "")).strip()
            if not name:
                continue
            description = entity_def.get("description", f"A {name} entity.")
            # 创建属性字典和类型注解（Pydantic v2 需要）
            attrs = {"__doc__": description}
            annotations = {}
-            for attr_def in entity_def.get("attributes", []):
+            for attr_def in normalize_attributes(entity_def.get("attributes", [])):
                attr_name = safe_attr_name(attr_def["name"])  # 使用安全名称
                attr_desc = attr_def.get("description", attr_name)
                # Zep API 需要 Field 的 description，这是必需的
@ -244,14 +284,20 @@ class GraphBuilderService:
        # 动态创建边类型
        edge_definitions = {}
        for edge_def in ontology.get("edge_types", []):
-            name = edge_def["name"]
+            if not isinstance(edge_def, dict):
                continue
            name = str(edge_def.get("name", "")).strip()
            if not name:
                continue
            description = edge_def.get("description", f"A {name} relationship.")
            # 创建属性字典和类型注解
            attrs = {"__doc__": description}
            annotations = {}
-            for attr_def in edge_def.get("attributes", []):
+            for attr_def in normalize_attributes(edge_def.get("attributes", [])):
                attr_name = safe_attr_name(attr_def["name"])  # 使用安全名称
                attr_desc = attr_def.get("description", attr_name)
                # Zep API 需要 Field 的 description，这是必需的
@ -265,16 +311,7 @@ class GraphBuilderService:
            edge_class = type(class_name, (EdgeModel,), attrs)
            edge_class.__doc__ = description
-            # 构建source_targets
+            source_targets = normalize_source_targets(edge_def.get("source_targets", []))
            source_targets = []
            for st in edge_def.get("source_targets", []):
                source_targets.append(
                    EntityEdgeSourceTarget(
                        source=st.get("source", "Entity"),
                        target=st.get("target", "Entity")
                    )
                )
            if source_targets:
                edge_definitions[name] = (edge_class, source_targets)
--- a/backend/app/services/ontology_generator.py
+++ b/backend/app/services/ontology_generator.py
@ -257,32 +257,78 @@ class OntologyGenerator:
    def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
        """验证和后处理结果"""
        if not isinstance(result, dict):
            result = {}
        # 确保必要字段存在
-        if "entity_types" not in result:
+        if not isinstance(result.get("entity_types"), list):
            result["entity_types"] = []
-        if "edge_types" not in result:
+        if not isinstance(result.get("edge_types"), list):
            result["edge_types"] = []
        if "analysis_summary" not in result:
            result["analysis_summary"] = ""
        # 验证实体类型
        validated_entities = []
        for entity in result["entity_types"]:
-            if "attributes" not in entity:
+            if isinstance(entity, str):
-                entity["attributes"] = []
+                entity = {"name": entity, "description": f"Entity type: {entity}"}
-            if "examples" not in entity:
+            if not isinstance(entity, dict):
-                entity["examples"] = []
+                continue
-            # 确保description不超过100字符
+
-            if len(entity.get("description", "")) > 100:
+            name = str(entity.get("name", "")).strip()
-                entity["description"] = entity["description"][:97] + "..."
+            if not name:
                continue
            attributes = entity.get("attributes")
            if not isinstance(attributes, list):
                attributes = []
            examples = entity.get("examples")
            if not isinstance(examples, list):
                examples = []
            normalized = dict(entity)
            normalized["name"] = name
            normalized["attributes"] = attributes
            normalized["examples"] = examples
            if len(normalized.get("description", "")) > 100:
                normalized["description"] = normalized["description"][:97] + "..."
            validated_entities.append(normalized)
        result["entity_types"] = validated_entities
        # 验证关系类型
        validated_edges = []
        for edge in result["edge_types"]:
-            if "source_targets" not in edge:
+            if isinstance(edge, str):
-                edge["source_targets"] = []
+                edge = {"name": edge, "description": f"Relationship type: {edge}"}
-            if "attributes" not in edge:
+            if not isinstance(edge, dict):
-                edge["attributes"] = []
+                continue
-            if len(edge.get("description", "")) > 100:
+
-                edge["description"] = edge["description"][:97] + "..."
+            name = str(edge.get("name", "")).strip()
            if not name:
                continue
            source_targets = edge.get("source_targets")
            if not isinstance(source_targets, list):
                source_targets = []
            attributes = edge.get("attributes")
            if not isinstance(attributes, list):
                attributes = []
            normalized = dict(edge)
            normalized["name"] = name
            normalized["source_targets"] = source_targets
            normalized["attributes"] = attributes
            if len(normalized.get("description", "")) > 100:
                normalized["description"] = normalized["description"][:97] + "..."
            validated_edges.append(normalized)
        result["edge_types"] = validated_edges
        # Zep API 限制：最多 10 个自定义实体类型，最多 10 个自定义边类型
        MAX_ENTITY_TYPES = 10