fix(graph): harden ontology normalization for Zep limits

This commit is contained in:
MiroFish Bot 2026-03-21 17:24:55 +09:00
parent 25d43f8a4b
commit a026178d67
2 changed files with 139 additions and 56 deletions

View File

@ -217,17 +217,57 @@ class GraphBuilderService:
return f"entity_{attr_name}" return f"entity_{attr_name}"
return attr_name return attr_name
def normalize_attributes(raw_attributes: Any) -> List[Dict[str, str]]:
normalized: List[Dict[str, str]] = []
for attr_def in raw_attributes or []:
if isinstance(attr_def, str):
attr_def = {"name": attr_def, "description": attr_def}
if not isinstance(attr_def, dict):
continue
attr_name = str(attr_def.get("name", "")).strip()
if not attr_name:
continue
normalized.append({
"name": attr_name,
"description": str(attr_def.get("description") or attr_name),
})
return normalized
def normalize_source_targets(raw_source_targets: Any) -> List[EntityEdgeSourceTarget]:
normalized: List[EntityEdgeSourceTarget] = []
for source_target in raw_source_targets or []:
if not isinstance(source_target, dict):
continue
normalized.append(
EntityEdgeSourceTarget(
source=str(source_target.get("source", "Entity")) or "Entity",
target=str(source_target.get("target", "Entity")) or "Entity",
)
)
# Zep API allows max 10 source_targets per edge type.
return normalized[:10]
# 动态创建实体类型 # 动态创建实体类型
entity_types = {} entity_types = {}
for entity_def in ontology.get("entity_types", []): for entity_def in ontology.get("entity_types", []):
name = entity_def["name"] if not isinstance(entity_def, dict):
continue
name = str(entity_def.get("name", "")).strip()
if not name:
continue
description = entity_def.get("description", f"A {name} entity.") description = entity_def.get("description", f"A {name} entity.")
# 创建属性字典和类型注解Pydantic v2 需要) # 创建属性字典和类型注解Pydantic v2 需要)
attrs = {"__doc__": description} attrs = {"__doc__": description}
annotations = {} annotations = {}
for attr_def in entity_def.get("attributes", []): for attr_def in normalize_attributes(entity_def.get("attributes", [])):
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
attr_desc = attr_def.get("description", attr_name) attr_desc = attr_def.get("description", attr_name)
# Zep API 需要 Field 的 description这是必需的 # Zep API 需要 Field 的 description这是必需的
@ -244,14 +284,20 @@ class GraphBuilderService:
# 动态创建边类型 # 动态创建边类型
edge_definitions = {} edge_definitions = {}
for edge_def in ontology.get("edge_types", []): for edge_def in ontology.get("edge_types", []):
name = edge_def["name"] if not isinstance(edge_def, dict):
continue
name = str(edge_def.get("name", "")).strip()
if not name:
continue
description = edge_def.get("description", f"A {name} relationship.") description = edge_def.get("description", f"A {name} relationship.")
# 创建属性字典和类型注解 # 创建属性字典和类型注解
attrs = {"__doc__": description} attrs = {"__doc__": description}
annotations = {} annotations = {}
for attr_def in edge_def.get("attributes", []): for attr_def in normalize_attributes(edge_def.get("attributes", [])):
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
attr_desc = attr_def.get("description", attr_name) attr_desc = attr_def.get("description", attr_name)
# Zep API 需要 Field 的 description这是必需的 # Zep API 需要 Field 的 description这是必需的
@ -265,16 +311,7 @@ class GraphBuilderService:
edge_class = type(class_name, (EdgeModel,), attrs) edge_class = type(class_name, (EdgeModel,), attrs)
edge_class.__doc__ = description edge_class.__doc__ = description
# 构建source_targets source_targets = normalize_source_targets(edge_def.get("source_targets", []))
source_targets = []
for st in edge_def.get("source_targets", []):
source_targets.append(
EntityEdgeSourceTarget(
source=st.get("source", "Entity"),
target=st.get("target", "Entity")
)
)
if source_targets: if source_targets:
edge_definitions[name] = (edge_class, source_targets) edge_definitions[name] = (edge_class, source_targets)

View File

@ -257,32 +257,78 @@ class OntologyGenerator:
def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
"""验证和后处理结果""" """验证和后处理结果"""
if not isinstance(result, dict):
result = {}
# 确保必要字段存在 # 确保必要字段存在
if "entity_types" not in result: if not isinstance(result.get("entity_types"), list):
result["entity_types"] = [] result["entity_types"] = []
if "edge_types" not in result: if not isinstance(result.get("edge_types"), list):
result["edge_types"] = [] result["edge_types"] = []
if "analysis_summary" not in result: if "analysis_summary" not in result:
result["analysis_summary"] = "" result["analysis_summary"] = ""
# 验证实体类型 # 验证实体类型
validated_entities = []
for entity in result["entity_types"]: for entity in result["entity_types"]:
if "attributes" not in entity: if isinstance(entity, str):
entity["attributes"] = [] entity = {"name": entity, "description": f"Entity type: {entity}"}
if "examples" not in entity: if not isinstance(entity, dict):
entity["examples"] = [] continue
# 确保description不超过100字符
if len(entity.get("description", "")) > 100: name = str(entity.get("name", "")).strip()
entity["description"] = entity["description"][:97] + "..." if not name:
continue
attributes = entity.get("attributes")
if not isinstance(attributes, list):
attributes = []
examples = entity.get("examples")
if not isinstance(examples, list):
examples = []
normalized = dict(entity)
normalized["name"] = name
normalized["attributes"] = attributes
normalized["examples"] = examples
if len(normalized.get("description", "")) > 100:
normalized["description"] = normalized["description"][:97] + "..."
validated_entities.append(normalized)
result["entity_types"] = validated_entities
# 验证关系类型 # 验证关系类型
validated_edges = []
for edge in result["edge_types"]: for edge in result["edge_types"]:
if "source_targets" not in edge: if isinstance(edge, str):
edge["source_targets"] = [] edge = {"name": edge, "description": f"Relationship type: {edge}"}
if "attributes" not in edge: if not isinstance(edge, dict):
edge["attributes"] = [] continue
if len(edge.get("description", "")) > 100:
edge["description"] = edge["description"][:97] + "..." name = str(edge.get("name", "")).strip()
if not name:
continue
source_targets = edge.get("source_targets")
if not isinstance(source_targets, list):
source_targets = []
attributes = edge.get("attributes")
if not isinstance(attributes, list):
attributes = []
normalized = dict(edge)
normalized["name"] = name
normalized["source_targets"] = source_targets
normalized["attributes"] = attributes
if len(normalized.get("description", "")) > 100:
normalized["description"] = normalized["description"][:97] + "..."
validated_edges.append(normalized)
result["edge_types"] = validated_edges
# Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型 # Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型
MAX_ENTITY_TYPES = 10 MAX_ENTITY_TYPES = 10