MicroFish/backend/app/utils/llm_gate.py

20 lines
416 B
Python

from __future__ import annotations
from contextlib import contextmanager
from threading import BoundedSemaphore
from ..config import Config
_MAIN_LLM_GATE = BoundedSemaphore(max(1, Config.LLM_MAX_CONCURRENCY))
@contextmanager
def main_llm_slot():
"""Limit concurrent calls to the local main LLM endpoint."""
_MAIN_LLM_GATE.acquire()
try:
yield
finally:
_MAIN_LLM_GATE.release()