fix: add max_items limit to fetch_all_edges to prevent unbounded memory growth
The fetch_all_nodes function has a max_items parameter (default 2000) to prevent fetching too many items, but fetch_all_edges was missing this safeguard. This could cause memory issues with graphs that have a large number of edges. Added _MAX_EDGES = 5000 as the default limit for edges, matching the pattern already established for nodes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f2805d90a9
commit
64e6520f52
|
|
@ -19,6 +19,7 @@ logger = get_logger('mirofish.zep_paging')
|
|||
|
||||
_DEFAULT_PAGE_SIZE = 100
|
||||
_MAX_NODES = 2000
|
||||
_MAX_EDGES = 5000
|
||||
_DEFAULT_MAX_RETRIES = 3
|
||||
_DEFAULT_RETRY_DELAY = 2.0 # seconds, doubles each retry
|
||||
|
||||
|
|
@ -106,6 +107,7 @@ def fetch_all_edges(
|
|||
client: Zep,
|
||||
graph_id: str,
|
||||
page_size: int = _DEFAULT_PAGE_SIZE,
|
||||
max_items: int = _MAX_EDGES,
|
||||
max_retries: int = _DEFAULT_MAX_RETRIES,
|
||||
retry_delay: float = _DEFAULT_RETRY_DELAY,
|
||||
) -> list[Any]:
|
||||
|
|
@ -132,6 +134,10 @@ def fetch_all_edges(
|
|||
break
|
||||
|
||||
all_edges.extend(batch)
|
||||
if len(all_edges) >= max_items:
|
||||
all_edges = all_edges[:max_items]
|
||||
logger.warning(f"Edge count reached limit ({max_items}), stopping pagination for graph {graph_id}")
|
||||
break
|
||||
if len(batch) < page_size:
|
||||
break
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue