146 lines
4.4 KiB
Python
146 lines
4.4 KiB
Python
"""Zep Graph paging helpers.
|
|
|
|
Zep's node/edge list APIs paginate with a UUID cursor. This module wraps the
|
|
auto-paging loop (including per-page retry) so callers see the full list
|
|
transparently.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
from collections.abc import Callable
|
|
from typing import Any
|
|
|
|
from typing import Any
|
|
|
|
from .logger import get_logger
|
|
|
|
logger = get_logger('mirofish.zep_paging')
|
|
|
|
_DEFAULT_PAGE_SIZE = 100
|
|
_MAX_NODES = 2000
|
|
_DEFAULT_MAX_RETRIES = 3
|
|
_DEFAULT_RETRY_DELAY = 2.0 # seconds, doubles each retry
|
|
|
|
|
|
def _fetch_page_with_retry(
|
|
api_call: Callable[..., list[Any]],
|
|
*args: Any,
|
|
max_retries: int = _DEFAULT_MAX_RETRIES,
|
|
retry_delay: float = _DEFAULT_RETRY_DELAY,
|
|
page_description: str = "page",
|
|
**kwargs: Any,
|
|
) -> list[Any]:
|
|
"""Fetch one page, retrying with exponential backoff. Handles 429 rate limits."""
|
|
if max_retries < 1:
|
|
raise ValueError("max_retries must be >= 1")
|
|
|
|
last_exception: Exception | None = None
|
|
delay = retry_delay
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return api_call(*args, **kwargs)
|
|
except Exception as e:
|
|
last_exception = e
|
|
if attempt < max_retries - 1:
|
|
# If a 429 rate limit is detected, prefer the retry-after header for the wait.
|
|
wait = delay
|
|
logger.warning(
|
|
f"Zep {page_description} attempt {attempt + 1} failed: {str(e)[:100]}, retrying in {wait:.1f}s..."
|
|
)
|
|
time.sleep(wait)
|
|
delay *= 2
|
|
else:
|
|
logger.error(f"Zep {page_description} failed after {max_retries} attempts: {str(e)}")
|
|
|
|
assert last_exception is not None
|
|
raise last_exception
|
|
|
|
|
|
def fetch_all_nodes(
|
|
client: Any,
|
|
graph_id: str,
|
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
max_items: int = _MAX_NODES,
|
|
max_retries: int = _DEFAULT_MAX_RETRIES,
|
|
retry_delay: float = _DEFAULT_RETRY_DELAY,
|
|
) -> list[Any]:
|
|
"""Page through graph nodes; return at most ``max_items`` (default 2000). Each page is retried internally."""
|
|
all_nodes: list[Any] = []
|
|
cursor: str | None = None
|
|
page_num = 0
|
|
|
|
while True:
|
|
kwargs: dict[str, Any] = {"limit": page_size}
|
|
if cursor is not None:
|
|
kwargs["uuid_cursor"] = cursor
|
|
|
|
page_num += 1
|
|
batch = _fetch_page_with_retry(
|
|
client.graph.node.get_by_graph_id,
|
|
graph_id,
|
|
max_retries=max_retries,
|
|
retry_delay=retry_delay,
|
|
page_description=f"fetch nodes page {page_num} (graph={graph_id})",
|
|
**kwargs,
|
|
)
|
|
if not batch:
|
|
break
|
|
|
|
all_nodes.extend(batch)
|
|
if len(all_nodes) >= max_items:
|
|
all_nodes = all_nodes[:max_items]
|
|
logger.warning(f"Node count reached limit ({max_items}), stopping pagination for graph {graph_id}")
|
|
break
|
|
if len(batch) < page_size:
|
|
break
|
|
|
|
cursor = getattr(batch[-1], "uuid_", None) or getattr(batch[-1], "uuid", None)
|
|
if cursor is None:
|
|
logger.warning(f"Node missing uuid field, stopping pagination at {len(all_nodes)} nodes")
|
|
break
|
|
|
|
return all_nodes
|
|
|
|
|
|
def fetch_all_edges(
|
|
client: Any,
|
|
graph_id: str,
|
|
page_size: int = _DEFAULT_PAGE_SIZE,
|
|
max_retries: int = _DEFAULT_MAX_RETRIES,
|
|
retry_delay: float = _DEFAULT_RETRY_DELAY,
|
|
) -> list[Any]:
|
|
"""Page through every graph edge and return the full list. Each page is retried internally."""
|
|
all_edges: list[Any] = []
|
|
cursor: str | None = None
|
|
page_num = 0
|
|
|
|
while True:
|
|
kwargs: dict[str, Any] = {"limit": page_size}
|
|
if cursor is not None:
|
|
kwargs["uuid_cursor"] = cursor
|
|
|
|
page_num += 1
|
|
batch = _fetch_page_with_retry(
|
|
client.graph.edge.get_by_graph_id,
|
|
graph_id,
|
|
max_retries=max_retries,
|
|
retry_delay=retry_delay,
|
|
page_description=f"fetch edges page {page_num} (graph={graph_id})",
|
|
**kwargs,
|
|
)
|
|
if not batch:
|
|
break
|
|
|
|
all_edges.extend(batch)
|
|
if len(batch) < page_size:
|
|
break
|
|
|
|
cursor = getattr(batch[-1], "uuid_", None) or getattr(batch[-1], "uuid", None)
|
|
if cursor is None:
|
|
logger.warning(f"Edge missing uuid field, stopping pagination at {len(all_edges)} edges")
|
|
break
|
|
|
|
return all_edges
|