ProjectTech4DevAI · vprashrex · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 5, 2026
diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py
@@ -8,6 +8,7 @@
 from app.api.deps import SessionDep, AuthContextDep
 from app.api.permissions import Permission, require_permission
 from app.core.telemetry import log_context
+from app.core.rate_monitor import monitor_rate
 from app.crud import (
     CollectionCrud,
     CollectionJobCrud,
@@ -85,7 +86,10 @@ def list_collections(
     description=load_description("collections/create.md"),
     response_model=APIResponse[CollectionJobImmediatePublic],
     callbacks=collection_callback_router.routes,
-    dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
+    dependencies=[
+        Depends(require_permission(Permission.REQUIRE_PROJECT)),
+        Depends(monitor_rate("collections")),
+    ],
 )
 def create_collection(
     session: SessionDep,

diff --git a/backend/app/api/routes/evaluations/evaluation.py b/backend/app/api/routes/evaluations/evaluation.py
@@ -12,6 +12,7 @@
 )
 
 from app.api.deps import AuthContextDep, SessionDep
+from app.core.rate_monitor import monitor_rate
 from app.crud.evaluations import list_evaluation_runs as list_evaluation_runs_crud
 from app.crud.evaluations.core import group_traces_by_question_id
 from app.models.evaluation import EvaluationRunPublic
@@ -34,7 +35,10 @@
     "",
     description=load_description("evaluation/create_evaluation.md"),
     response_model=APIResponse[EvaluationRunPublic],
-    dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
+    dependencies=[
+        Depends(require_permission(Permission.REQUIRE_PROJECT)),
+        Depends(monitor_rate("evaluations")),
+    ],
 )
 def evaluate(
     session: SessionDep,

diff --git a/backend/app/api/routes/llm.py b/backend/app/api/routes/llm.py
@@ -9,6 +9,7 @@
 from app.api.permissions import Permission, require_permission
 from app.core.cloud.storage import get_cloud_storage
 from app.core.telemetry import log_context
+from app.core.rate_monitor import monitor_rate
 from app.crud.jobs import JobCrud
 from app.crud.llm import get_llm_calls_by_job_id
 from app.models import (
@@ -22,7 +23,6 @@
 from app.services.llm.jobs import start_job
 from app.utils import APIResponse, validate_callback_url, load_description
 
-
 logger = logging.getLogger(__name__)
 
 router = APIRouter(tags=["LLM"])
@@ -50,7 +50,10 @@ def llm_callback_notification(body: APIResponse[LLMCallResponse]):
     description=load_description("llm/llm_call.md"),
     response_model=APIResponse[LLMJobImmediatePublic],
     callbacks=llm_callback_router.routes,
-    dependencies=[Depends(require_permission(Permission.REQUIRE_PROJECT))],
+    dependencies=[
+        Depends(require_permission(Permission.REQUIRE_PROJECT)),
+        Depends(monitor_rate("llm_call")),
+    ],
 )
 def llm_call(
     _current_user: AuthContextDep, session: SessionDep, request: LLMCallRequest

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
@@ -151,6 +151,11 @@ def AWS_S3_BUCKET(self) -> str:
     BACKEND_SERVICE_NAME: str = "kaapi-backend"
     CRON_SERVICE_NAME: str = "kaapi-cron"
 
+    # Threshold Request Rate per minute
+    THRESHOLD_LLM_CALL_RATE: int = 15
+    THRESHOLD_COLLECTIONS_RATE: int = 3
+    THRESHOLD_EVALUATIONS_RATE: int = 3
+
     # Celery Configuration
     CELERY_WORKER_CONCURRENCY: int | None = None
     CELERY_WORKER_MAX_TASKS_PER_CHILD: int = 150

diff --git a/backend/app/core/rate_monitor.py b/backend/app/core/rate_monitor.py
@@ -0,0 +1,98 @@
+import logging
+import time
+
+from collections.abc import Callable
+from typing import Literal
+
+import redis
+
+from app.api.deps import AuthContextDep
+from app.core.config import settings
+
+from app.core.telemetry import record_rate_threshold
+
+logger = logging.getLogger(__name__)
+
+# Categories of rates we want to monitor
+RateCategory = Literal["llm_call", "collections", "evaluations"]
+
+# THRESHOLD NUMBERS
+THRESHOLDS: dict[RateCategory, int] = {
+    "llm_call": settings.THRESHOLD_LLM_CALL_RATE,
+    "collections": settings.THRESHOLD_COLLECTIONS_RATE,
+    "evaluations": settings.THRESHOLD_EVALUATIONS_RATE,
+}
+
+# Delete record after 2 minutes from redis
+_EXPIRATION_SECONDS = 120
+
+_redis_client: redis.Redis = redis.from_url(settings.REDIS_URL, decode_responses=True)
+
+
+# count incrementor after each request and get count
+def increment_and_get_count(key: str) -> int | None:
+    """Increment the count for the given key and return the new count.
+    The count will automatically expire after _EXPIRATION_SECONDS.
+    """
+    try:
+        pipe = _redis_client.pipeline()
+        pipe.incr(key)
+        pipe.expire(key, _EXPIRATION_SECONDS)
+        count, _ = pipe.execute()
+        return count
+    except Exception as e:
+        logger.error(
+            f"[increment_and_get_count] Error incrementing count for {key}: {e}"
+        )
+        return None
+
+
+def monitor_rate(category: RateCategory) -> Callable[[AuthContextDep], None]:
+    """Monitor the rate of events for the given category. If the rate exceeds the threshold, record it in telemetry.
+
+    Usage:
+    dependencies=[
+        Depends(require_permission(Permission.REQUIRE_PROJECT)),
+        Depends(monitor_rate("{category}")),
+    ]
+    """
+
+    def _checker(auth_context: AuthContextDep) -> None:
+        project = auth_context.project
+        if project is None:
+            return
+
+        threshold = THRESHOLDS.get(category, None)
+        if threshold is None:
+            logger.warning(
+                f"[monitor_rate] No threshold defined for category {category}"
+            )
+            return
+
+        minute_bucket = int(time.time() // 60)
+        redis_key = f"rate_monitor:{category}:{project.id}:{minute_bucket}"
+
+        try:
+            count = increment_and_get_count(redis_key)
+            if count is not None and count == threshold + 1:
+                logger.warning(
+                    f"[monitor_rate] Rate threshold exceeded for {category} in project {project.id}: count={count}"
+                )
+                record_rate_threshold(
+                    project_id=project.id,
+                    project_name=project.name,
+                    category=category,
+                    request_count=count,
+                    threshold=threshold,
+                )
+
+        except redis.RedisError as e:
+            logger.error(
+                "[monitor_rate] Redis unavailable, skipping rate check "
+                "(project_id=%s category=%s)",
+                project.id,
+                category,
+                exc_info=e,
+            )
+
+    return _checker
diff --git a/backend/app/core/telemetry.py b/backend/app/core/telemetry.py
@@ -453,6 +453,34 @@ def record_stale_pending_jobs(
         )
 
 
+def record_rate_threshold(
+    *,
+    project_id: int,
+    project_name: str | None,
+    category: str,
+    request_count: int,
+    threshold: int,
+) -> None:
+    """Emit rate threshold exceeded event to Sentry."""
+
+    try:
+        if not sentry_sdk.get_client().is_active():
+            return
+        with sentry_sdk.push_scope() as scope:
+            scope.set_tag("alert.type", "threshold_rate_monitor")
+            scope.set_tag("tenant.project_id", project_id)
+            scope.set_tag("route_category", category)
+            scope.set_extra("request_count", request_count)
+            scope.set_extra("threshold", threshold)
+            sentry_sdk.capture_message(
+                f"[Threshold-Monitor] {category} rate limit exceeded for project {project_id} | {project_name}: {request_count} req/min "
+                f"(limit {threshold}/min)",
+                level="warning",
+            )
+    except Exception as e:
+        logger.exception("[record_rate_threshold] Failed to emit alert", exc_info=e)
+
+
 def flush_telemetry(timeout_millis: int = 10000) -> None:
     """Force-flush OTel spans into Sentry, then flush Sentry's transport.