Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ config :loopctl, Oban,
{"0 3 * * 0", Loopctl.Workers.TokenDataArchivalWorker},
{"0 4 * * *", Loopctl.Workers.KnowledgeLintWorker, args: %{"mode" => "all_tenants"}},
{"0 5 * * 0", Loopctl.Workers.KnowledgeMocWorker, args: %{"mode" => "all_tenants"}},
{"30 4 * * *", Loopctl.Workers.RetrievalMetricsWorker, args: %{"mode" => "all_tenants"}},
{"*/5 * * * *", Loopctl.Workers.PendingEnrollmentCleanupWorker},
{"* * * * *", Loopctl.Workers.ComputeSthWorker, args: %{"mode" => "all_tenants"}},
{"* * * * *", Loopctl.Workers.RevokeExpiredDispatchesWorker}
Expand Down
46 changes: 46 additions & 0 deletions lib/loopctl/knowledge/retrieval_metric_snapshot.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
defmodule Loopctl.Knowledge.RetrievalMetricSnapshot do
@moduledoc """
A daily retrieval-precision snapshot (agents' KB #3). `precision` is the share of a
day's search results the agent then opened (search → get/context within
`window_seconds`) — a mechanical proxy for retrieval quality, tracked over time.

`tenant_id` is set programmatically, never cast.
"""

use Loopctl.Schema

@type t :: %__MODULE__{}

schema "retrieval_metric_snapshots" do
tenant_field()

field :day, :date
field :window_seconds, :integer
field :searched, :integer, default: 0
field :followed_through, :integer, default: 0
field :precision, :float, default: 0.0
field :computed_at, :utc_datetime_usec

timestamps(type: :utc_datetime_usec)
end

@cast_fields [:day, :window_seconds, :searched, :followed_through, :precision, :computed_at]

@doc "Changeset for a snapshot. `tenant_id` is set on the struct, not cast."
@spec changeset(%__MODULE__{}, map()) :: Ecto.Changeset.t()
def changeset(snapshot \\ %__MODULE__{}, attrs) do
snapshot
|> cast(attrs, @cast_fields)
|> validate_required([
:day,
:window_seconds,
:searched,
:followed_through,
:precision,
:computed_at
])
|> unique_constraint([:tenant_id, :day, :window_seconds],
name: :retrieval_metric_snapshots_tenant_day_window_index
)
end
end
135 changes: 135 additions & 0 deletions lib/loopctl/knowledge/retrieval_metrics.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
defmodule Loopctl.Knowledge.RetrievalMetrics do
@moduledoc """
Retrieval-precision metric (agents' KB #3) — closes the loop on whether retrieval is
actually improving.

The signal: of the articles a search SURFACED on a given day, how many did the agent
then OPEN (a `get`/`context` on the same article, by the same api_key, within a
follow-through window)? That share is `precision`. It's a mechanical proxy, computed
purely from `article_access_events` — no LLM, no labels — and it should trend UP as
dedup (#1), navigation (#5) and conflict resolution (#4) make the corpus cleaner and
the top results more on-target.

Honest caveat: it measures search → *open*, not search → *useful*. An agent that uses
a snippet without opening the article counts as a miss, so the absolute number
undercounts precision. The bias is consistent, so the TREND is the meaningful thing.
"""

import Ecto.Query

alias Loopctl.AdminRepo
alias Loopctl.Knowledge.ArticleAccessEvent
alias Loopctl.Knowledge.RetrievalMetricSnapshot

@default_window_seconds 1800

@doc """
Compute precision for a single `day` (a `Date`) and follow-through `window_seconds`.
Returns `%{searched, followed_through, precision, day, window_seconds}`.
"""
@spec compute(Ecto.UUID.t(), Date.t(), pos_integer()) :: map()
def compute(tenant_id, %Date{} = day, window_seconds \\ @default_window_seconds) do
day_start = DateTime.new!(day, ~T[00:00:00.000000], "Etc/UTC")
day_end = DateTime.add(day_start, 1, :day)

searched_q =
from(s in ArticleAccessEvent,
as: :s,
where: s.tenant_id == ^tenant_id,
where: s.access_type == "search",
where: s.accessed_at >= ^day_start and s.accessed_at < ^day_end
)

searched = AdminRepo.aggregate(searched_q, :count, :id)

followed =
searched_q
|> where(
[s],
exists(
from(o in ArticleAccessEvent,
where:
o.tenant_id == parent_as(:s).tenant_id and
o.api_key_id == parent_as(:s).api_key_id and
o.article_id == parent_as(:s).article_id and
o.access_type in ["get", "context"] and
o.accessed_at > parent_as(:s).accessed_at and
fragment(
"? <= ? + (? * interval '1 second')",
o.accessed_at,
parent_as(:s).accessed_at,
^window_seconds
)
)
)
)
|> AdminRepo.aggregate(:count, :id)

precision = if searched > 0, do: followed / searched, else: 0.0

%{
day: day,
window_seconds: window_seconds,
searched: searched,
followed_through: followed,
precision: precision
}
end

@doc """
Compute a day's precision and upsert the snapshot (idempotent per tenant/day/window).
Returns `{:ok, %RetrievalMetricSnapshot{}}`.
"""
@spec snapshot(Ecto.UUID.t(), Date.t(), pos_integer()) ::
{:ok, RetrievalMetricSnapshot.t()} | {:error, Ecto.Changeset.t()}
def snapshot(tenant_id, %Date{} = day, window_seconds \\ @default_window_seconds) do
m = compute(tenant_id, day, window_seconds)

attrs = %{
day: m.day,
window_seconds: m.window_seconds,
searched: m.searched,
followed_through: m.followed_through,
precision: m.precision,
computed_at: DateTime.utc_now()
}

%RetrievalMetricSnapshot{tenant_id: tenant_id}
|> RetrievalMetricSnapshot.changeset(attrs)
|> AdminRepo.insert(
on_conflict:
{:replace, [:searched, :followed_through, :precision, :computed_at, :updated_at]},
conflict_target: [:tenant_id, :day, :window_seconds]
)
end

@doc """
The precision time series, most recent day first. Opts: `:limit` (default 30),
`:offset`. Returns `%{data: [snapshot maps], meta: %{limit, offset, total_count}}`.
"""
@spec list_snapshots(Ecto.UUID.t(), keyword()) :: %{data: [map()], meta: map()}
def list_snapshots(tenant_id, opts \\ []) do
limit = opts |> Keyword.get(:limit, 30) |> max(1) |> min(365)
offset = opts |> Keyword.get(:offset, 0) |> max(0)

base = from(s in RetrievalMetricSnapshot, where: s.tenant_id == ^tenant_id)
total_count = AdminRepo.aggregate(base, :count, :id)

data =
from(s in base,
order_by: [desc: s.day, desc: s.window_seconds],
limit: ^limit,
offset: ^offset,
select: %{
day: s.day,
window_seconds: s.window_seconds,
searched: s.searched,
followed_through: s.followed_through,
precision: s.precision
}
)
|> AdminRepo.all()

%{data: data, meta: %{limit: limit, offset: offset, total_count: total_count}}
end
end
65 changes: 65 additions & 0 deletions lib/loopctl/workers/retrieval_metrics_worker.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
defmodule Loopctl.Workers.RetrievalMetricsWorker do
@moduledoc """
Daily snapshot of retrieval precision (agents' KB #3). Fans out over active tenants and
records yesterday's `RetrievalMetrics.snapshot/3` — the share of surfaced search results
the agent then opened. Additive/idempotent (upsert per tenant/day/window); computes the
previous FULL day so the window is complete.

Scheduled daily via the Oban Cron plugin.
"""

use Oban.Worker,
queue: :knowledge,
max_attempts: 3,
unique: [fields: [:worker, :args], period: 300]

require Logger

import Ecto.Query

alias Loopctl.AdminRepo
alias Loopctl.Knowledge.RetrievalMetrics
alias Loopctl.Tenants.Tenant

@impl Oban.Worker
def perform(%Oban.Job{args: %{"mode" => "all_tenants"}}) do
from(t in Tenant, where: t.status == :active, select: t.id)
|> AdminRepo.all()
|> Enum.each(fn tenant_id ->
%{"tenant_id" => tenant_id} |> __MODULE__.new() |> Oban.insert()
end)

:ok
end

def perform(%Oban.Job{args: %{"tenant_id" => tenant_id} = args}) do
day = day_arg(args)

case RetrievalMetrics.snapshot(tenant_id, day) do
{:ok, snap} ->
Logger.info(
"RetrievalMetricsWorker: tenant=#{tenant_id} day=#{day} " <>
"searched=#{snap.searched} followed=#{snap.followed_through} " <>
"precision=#{Float.round(snap.precision, 3)}"
)

:ok

{:error, reason} ->
{:error, reason}
end
end

# Default: yesterday (the last complete UTC day). An explicit "day" arg (ISO8601)
# allows backfilling a specific day.
defp day_arg(%{"day" => iso}) when is_binary(iso) do
case Date.from_iso8601(iso) do
{:ok, d} -> d
_ -> yesterday()
end
end

defp day_arg(_), do: yesterday()

defp yesterday, do: Date.add(DateTime.utc_now() |> DateTime.to_date(), -1)
end
42 changes: 42 additions & 0 deletions lib/loopctl_web/controllers/knowledge_analytics_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ defmodule LoopctlWeb.KnowledgeAnalyticsController do

alias Loopctl.ApiSpec.Schemas
alias Loopctl.Knowledge
alias Loopctl.Knowledge.RetrievalMetrics

action_fallback LoopctlWeb.FallbackController

Expand Down Expand Up @@ -280,6 +281,47 @@ defmodule LoopctlWeb.KnowledgeAnalyticsController do
json(conn, LoopctlWeb.KnowledgeAnalyticsJSON.unused_articles(rows, opts))
end

operation(:retrieval_metrics,
summary: "Retrieval precision time series",
description:
"Daily retrieval PRECISION (agents' KB #3): the share of a day's search results the " <>
"agent then opened (search → get/context within a window). A proxy for retrieval " <>
"quality that trends up as the corpus is de-duplicated and better navigated. Most " <>
"recent day first. Role: orchestrator+.",
parameters: [
limit: [
in: :query,
type: :integer,
description: "Days per page (default 30, max 365). Clamped, never rejected.",
required: false
],
offset: [
in: :query,
type: :integer,
description: "Days to skip (default 0)",
required: false
]
],
responses: %{
200 =>
{"Retrieval metrics", "application/json",
%OpenApiSpex.Schema{type: :object, additionalProperties: true}},
429 => {"Rate limit exceeded", "application/json", Schemas.RateLimitError}
}
)

@doc "GET /api/v1/knowledge/analytics/retrieval-metrics"
def retrieval_metrics(conn, params) do
tenant_id = conn.assigns.current_api_key.tenant_id

opts =
[]
|> put_limit(params["limit"], 30, 365)
|> put_offset(params["offset"])

json(conn, RetrievalMetrics.list_snapshots(tenant_id, opts))
end

# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions lib/loopctl_web/router.ex
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,10 @@ defmodule LoopctlWeb.Router do
KnowledgeAnalyticsController,
:unused_articles

get "/knowledge/analytics/retrieval-metrics",
KnowledgeAnalyticsController,
:retrieval_metrics

get "/knowledge/analytics/agents/:agent_id",
KnowledgeAnalyticsController,
:agent_usage
Expand Down
10 changes: 10 additions & 0 deletions mcp-server/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to `loopctl-mcp-server` are documented here.
Format: [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
Versioning: [Semantic Versioning](https://semver.org/spec/v2.0.0.html)

## 2.29.0 — 2026-07-01 (retrieval precision metric)

### Added

- **`knowledge_retrieval_metrics`** — the daily retrieval-precision time series (agents'
KB #3): per day, the share of search results the agent then opened (search →
get/context within a window). A mechanical proxy for whether retrieval is improving;
trends up as the corpus is de-duplicated, better navigated, and conflict-resolved.
Orchestrator role.

## 2.28.0 — 2026-07-01 (route-the-findings: conflict merge)

### Changed
Expand Down
Loading
Loading