fix(graph): harden ontology normalization for Zep limits
This commit is contained in:
parent
25d43f8a4b
commit
a026178d67
|
|
@ -217,17 +217,57 @@ class GraphBuilderService:
|
||||||
return f"entity_{attr_name}"
|
return f"entity_{attr_name}"
|
||||||
return attr_name
|
return attr_name
|
||||||
|
|
||||||
|
def normalize_attributes(raw_attributes: Any) -> List[Dict[str, str]]:
|
||||||
|
normalized: List[Dict[str, str]] = []
|
||||||
|
for attr_def in raw_attributes or []:
|
||||||
|
if isinstance(attr_def, str):
|
||||||
|
attr_def = {"name": attr_def, "description": attr_def}
|
||||||
|
if not isinstance(attr_def, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
attr_name = str(attr_def.get("name", "")).strip()
|
||||||
|
if not attr_name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
normalized.append({
|
||||||
|
"name": attr_name,
|
||||||
|
"description": str(attr_def.get("description") or attr_name),
|
||||||
|
})
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
def normalize_source_targets(raw_source_targets: Any) -> List[EntityEdgeSourceTarget]:
|
||||||
|
normalized: List[EntityEdgeSourceTarget] = []
|
||||||
|
for source_target in raw_source_targets or []:
|
||||||
|
if not isinstance(source_target, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
normalized.append(
|
||||||
|
EntityEdgeSourceTarget(
|
||||||
|
source=str(source_target.get("source", "Entity")) or "Entity",
|
||||||
|
target=str(source_target.get("target", "Entity")) or "Entity",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Zep API allows max 10 source_targets per edge type.
|
||||||
|
return normalized[:10]
|
||||||
|
|
||||||
# 动态创建实体类型
|
# 动态创建实体类型
|
||||||
entity_types = {}
|
entity_types = {}
|
||||||
for entity_def in ontology.get("entity_types", []):
|
for entity_def in ontology.get("entity_types", []):
|
||||||
name = entity_def["name"]
|
if not isinstance(entity_def, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = str(entity_def.get("name", "")).strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
description = entity_def.get("description", f"A {name} entity.")
|
description = entity_def.get("description", f"A {name} entity.")
|
||||||
|
|
||||||
# 创建属性字典和类型注解(Pydantic v2 需要)
|
# 创建属性字典和类型注解(Pydantic v2 需要)
|
||||||
attrs = {"__doc__": description}
|
attrs = {"__doc__": description}
|
||||||
annotations = {}
|
annotations = {}
|
||||||
|
|
||||||
for attr_def in entity_def.get("attributes", []):
|
for attr_def in normalize_attributes(entity_def.get("attributes", [])):
|
||||||
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
||||||
attr_desc = attr_def.get("description", attr_name)
|
attr_desc = attr_def.get("description", attr_name)
|
||||||
# Zep API 需要 Field 的 description,这是必需的
|
# Zep API 需要 Field 的 description,这是必需的
|
||||||
|
|
@ -244,14 +284,20 @@ class GraphBuilderService:
|
||||||
# 动态创建边类型
|
# 动态创建边类型
|
||||||
edge_definitions = {}
|
edge_definitions = {}
|
||||||
for edge_def in ontology.get("edge_types", []):
|
for edge_def in ontology.get("edge_types", []):
|
||||||
name = edge_def["name"]
|
if not isinstance(edge_def, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = str(edge_def.get("name", "")).strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
description = edge_def.get("description", f"A {name} relationship.")
|
description = edge_def.get("description", f"A {name} relationship.")
|
||||||
|
|
||||||
# 创建属性字典和类型注解
|
# 创建属性字典和类型注解
|
||||||
attrs = {"__doc__": description}
|
attrs = {"__doc__": description}
|
||||||
annotations = {}
|
annotations = {}
|
||||||
|
|
||||||
for attr_def in edge_def.get("attributes", []):
|
for attr_def in normalize_attributes(edge_def.get("attributes", [])):
|
||||||
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
||||||
attr_desc = attr_def.get("description", attr_name)
|
attr_desc = attr_def.get("description", attr_name)
|
||||||
# Zep API 需要 Field 的 description,这是必需的
|
# Zep API 需要 Field 的 description,这是必需的
|
||||||
|
|
@ -265,16 +311,7 @@ class GraphBuilderService:
|
||||||
edge_class = type(class_name, (EdgeModel,), attrs)
|
edge_class = type(class_name, (EdgeModel,), attrs)
|
||||||
edge_class.__doc__ = description
|
edge_class.__doc__ = description
|
||||||
|
|
||||||
# 构建source_targets
|
source_targets = normalize_source_targets(edge_def.get("source_targets", []))
|
||||||
source_targets = []
|
|
||||||
for st in edge_def.get("source_targets", []):
|
|
||||||
source_targets.append(
|
|
||||||
EntityEdgeSourceTarget(
|
|
||||||
source=st.get("source", "Entity"),
|
|
||||||
target=st.get("target", "Entity")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if source_targets:
|
if source_targets:
|
||||||
edge_definitions[name] = (edge_class, source_targets)
|
edge_definitions[name] = (edge_class, source_targets)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -257,32 +257,78 @@ class OntologyGenerator:
|
||||||
def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""验证和后处理结果"""
|
"""验证和后处理结果"""
|
||||||
|
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
result = {}
|
||||||
|
|
||||||
# 确保必要字段存在
|
# 确保必要字段存在
|
||||||
if "entity_types" not in result:
|
if not isinstance(result.get("entity_types"), list):
|
||||||
result["entity_types"] = []
|
result["entity_types"] = []
|
||||||
if "edge_types" not in result:
|
if not isinstance(result.get("edge_types"), list):
|
||||||
result["edge_types"] = []
|
result["edge_types"] = []
|
||||||
if "analysis_summary" not in result:
|
if "analysis_summary" not in result:
|
||||||
result["analysis_summary"] = ""
|
result["analysis_summary"] = ""
|
||||||
|
|
||||||
# 验证实体类型
|
# 验证实体类型
|
||||||
|
validated_entities = []
|
||||||
for entity in result["entity_types"]:
|
for entity in result["entity_types"]:
|
||||||
if "attributes" not in entity:
|
if isinstance(entity, str):
|
||||||
entity["attributes"] = []
|
entity = {"name": entity, "description": f"Entity type: {entity}"}
|
||||||
if "examples" not in entity:
|
if not isinstance(entity, dict):
|
||||||
entity["examples"] = []
|
continue
|
||||||
# 确保description不超过100字符
|
|
||||||
if len(entity.get("description", "")) > 100:
|
name = str(entity.get("name", "")).strip()
|
||||||
entity["description"] = entity["description"][:97] + "..."
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
attributes = entity.get("attributes")
|
||||||
|
if not isinstance(attributes, list):
|
||||||
|
attributes = []
|
||||||
|
|
||||||
|
examples = entity.get("examples")
|
||||||
|
if not isinstance(examples, list):
|
||||||
|
examples = []
|
||||||
|
|
||||||
|
normalized = dict(entity)
|
||||||
|
normalized["name"] = name
|
||||||
|
normalized["attributes"] = attributes
|
||||||
|
normalized["examples"] = examples
|
||||||
|
if len(normalized.get("description", "")) > 100:
|
||||||
|
normalized["description"] = normalized["description"][:97] + "..."
|
||||||
|
|
||||||
|
validated_entities.append(normalized)
|
||||||
|
|
||||||
|
result["entity_types"] = validated_entities
|
||||||
|
|
||||||
# 验证关系类型
|
# 验证关系类型
|
||||||
|
validated_edges = []
|
||||||
for edge in result["edge_types"]:
|
for edge in result["edge_types"]:
|
||||||
if "source_targets" not in edge:
|
if isinstance(edge, str):
|
||||||
edge["source_targets"] = []
|
edge = {"name": edge, "description": f"Relationship type: {edge}"}
|
||||||
if "attributes" not in edge:
|
if not isinstance(edge, dict):
|
||||||
edge["attributes"] = []
|
continue
|
||||||
if len(edge.get("description", "")) > 100:
|
|
||||||
edge["description"] = edge["description"][:97] + "..."
|
name = str(edge.get("name", "")).strip()
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
source_targets = edge.get("source_targets")
|
||||||
|
if not isinstance(source_targets, list):
|
||||||
|
source_targets = []
|
||||||
|
|
||||||
|
attributes = edge.get("attributes")
|
||||||
|
if not isinstance(attributes, list):
|
||||||
|
attributes = []
|
||||||
|
|
||||||
|
normalized = dict(edge)
|
||||||
|
normalized["name"] = name
|
||||||
|
normalized["source_targets"] = source_targets
|
||||||
|
normalized["attributes"] = attributes
|
||||||
|
if len(normalized.get("description", "")) > 100:
|
||||||
|
normalized["description"] = normalized["description"][:97] + "..."
|
||||||
|
|
||||||
|
validated_edges.append(normalized)
|
||||||
|
|
||||||
|
result["edge_types"] = validated_edges
|
||||||
|
|
||||||
# Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型
|
# Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型
|
||||||
MAX_ENTITY_TYPES = 10
|
MAX_ENTITY_TYPES = 10
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue