MicroFish/backend/app/utils/retry.py

238 lines
7.0 KiB
Python

"""
API call retry mechanism
Handles retry logic for external API calls such as LLM services
"""
import time
import random
import functools
from typing import Callable, Any, Optional, Type, Tuple
from ..utils.logger import get_logger
logger = get_logger('mirofish.retry')
def retry_with_backoff(
max_retries: int = 3,
initial_delay: float = 1.0,
max_delay: float = 30.0,
backoff_factor: float = 2.0,
jitter: bool = True,
exceptions: Tuple[Type[Exception], ...] = (Exception,),
on_retry: Optional[Callable[[Exception, int], None]] = None
):
"""
Retry decorator with exponential backoff
Args:
max_retries: Maximum number of retries
initial_delay: Initial delay in seconds
max_delay: Maximum delay in seconds
backoff_factor: Backoff multiplier
jitter: Whether to add random jitter
exceptions: Exception types that should trigger a retry
on_retry: Callback invoked on each retry (exception, retry_count)
Usage:
@retry_with_backoff(max_retries=3)
def call_llm_api():
...
"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(*args, **kwargs) -> Any:
last_exception = None
delay = initial_delay
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt == max_retries:
logger.error(f"Function {func.__name__} failed after {max_retries} retries: {str(e)}")
raise
# Calculate delay
current_delay = min(delay, max_delay)
if jitter:
current_delay = current_delay * (0.5 + random.random())
logger.warning(
f"Function {func.__name__} attempt {attempt + 1} failed: {str(e)}, "
f"retrying in {current_delay:.1f}s..."
)
if on_retry:
on_retry(e, attempt + 1)
time.sleep(current_delay)
delay *= backoff_factor
raise last_exception
return wrapper
return decorator
def retry_with_backoff_async(
max_retries: int = 3,
initial_delay: float = 1.0,
max_delay: float = 30.0,
backoff_factor: float = 2.0,
jitter: bool = True,
exceptions: Tuple[Type[Exception], ...] = (Exception,),
on_retry: Optional[Callable[[Exception, int], None]] = None
):
"""
Async version of the retry decorator
"""
import asyncio
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
async def wrapper(*args, **kwargs) -> Any:
last_exception = None
delay = initial_delay
for attempt in range(max_retries + 1):
try:
return await func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt == max_retries:
logger.error(f"Async function {func.__name__} failed after {max_retries} retries: {str(e)}")
raise
current_delay = min(delay, max_delay)
if jitter:
current_delay = current_delay * (0.5 + random.random())
logger.warning(
f"Async function {func.__name__} attempt {attempt + 1} failed: {str(e)}, "
f"retrying in {current_delay:.1f}s..."
)
if on_retry:
on_retry(e, attempt + 1)
await asyncio.sleep(current_delay)
delay *= backoff_factor
raise last_exception
return wrapper
return decorator
class RetryableAPIClient:
"""
Retryable API client wrapper
"""
def __init__(
self,
max_retries: int = 3,
initial_delay: float = 1.0,
max_delay: float = 30.0,
backoff_factor: float = 2.0
):
self.max_retries = max_retries
self.initial_delay = initial_delay
self.max_delay = max_delay
self.backoff_factor = backoff_factor
def call_with_retry(
self,
func: Callable,
*args,
exceptions: Tuple[Type[Exception], ...] = (Exception,),
**kwargs
) -> Any:
"""
Execute a function call and retry on failure
Args:
func: Function to call
*args: Positional arguments for the function
exceptions: Exception types that should trigger a retry
**kwargs: Keyword arguments for the function
Returns:
Return value of the function
"""
last_exception = None
delay = self.initial_delay
for attempt in range(self.max_retries + 1):
try:
return func(*args, **kwargs)
except exceptions as e:
last_exception = e
if attempt == self.max_retries:
logger.error(f"API call failed after {self.max_retries} retries: {str(e)}")
raise
current_delay = min(delay, self.max_delay)
current_delay = current_delay * (0.5 + random.random())
logger.warning(
f"API call attempt {attempt + 1} failed: {str(e)}, "
f"retrying in {current_delay:.1f}s..."
)
time.sleep(current_delay)
delay *= self.backoff_factor
raise last_exception
def call_batch_with_retry(
self,
items: list,
process_func: Callable,
exceptions: Tuple[Type[Exception], ...] = (Exception,),
continue_on_failure: bool = True
) -> Tuple[list, list]:
"""
Process a batch of items, retrying individually on failure
Args:
items: List of items to process
process_func: Processing function that accepts a single item
exceptions: Exception types that should trigger a retry
continue_on_failure: Whether to continue processing remaining items after a failure
Returns:
(list of successful results, list of failed items)
"""
results = []
failures = []
for idx, item in enumerate(items):
try:
result = self.call_with_retry(
process_func,
item,
exceptions=exceptions
)
results.append(result)
except Exception as e:
logger.error(f"Failed to process item {idx + 1}: {str(e)}")
failures.append({
"index": idx,
"item": item,
"error": str(e)
})
if not continue_on_failure:
raise
return results, failures