Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 95 additions & 17 deletions lib/loopctl/knowledge.ex
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ defmodule Loopctl.Knowledge do
alias Loopctl.Knowledge.Article
alias Loopctl.Knowledge.ArticleLink
alias Loopctl.Knowledge.ConflictResolution
alias Loopctl.Knowledge.KbCuration
alias Loopctl.Knowledge.VectorSearch
alias Loopctl.Projects.Project
alias Loopctl.Webhooks.EventGenerator
Expand Down Expand Up @@ -377,6 +378,7 @@ defmodule Loopctl.Knowledge do
defp gate_proposal(tenant_id, attrs, %{verdict: :duplicate} = assessment, opts) do
case canonical_neighbor(tenant_id, assessment, opts) do
{:ok, existing} ->
log_gate(tenant_id, "gate_duplicate", "rejected duplicate", existing, assessment, opts)
{:ok, %{verdict: :duplicate, article: existing, created: false, assessment: assessment}}

# The canonical neighbor vanished (deleted/unpublished) between assess and now —
Expand All @@ -392,6 +394,8 @@ defmodule Loopctl.Knowledge do
|> Map.put("status", "draft")
|> stamp_proposal_metadata(assessment)

neighbor = List.first(assessment.neighbors)
log_gate(tenant_id, "gate_draft", "drafted (high overlap)", neighbor, assessment, opts)
create_proposal(tenant_id, gated_attrs, assessment, opts, :gated_to_draft)
end

Expand All @@ -416,6 +420,31 @@ defmodule Loopctl.Knowledge do
end
end

# Concise curation-log line for a gate decision (only written when the tenant has
# kb_curation_log on — KbCuration.record no-ops otherwise).
defp log_gate(tenant_id, kind, prefix, neighbor, assessment, opts) do
{nid, ntitle} =
case neighbor do
%Article{id: id, title: title} -> {id, title}
%{id: id, title: title} -> {id, title}
_ -> {nil, nil}
end

summary =
prefix <>
if(ntitle, do: " of \"#{ntitle}\"", else: "") <>
if(assessment.score, do: " (sim=#{fmt_sim(assessment.score)})", else: "")

KbCuration.record(tenant_id, kind, summary,
refs: Enum.reject([nid], &is_nil/1),
actor: Keyword.get(opts, :actor_label) || Keyword.get(opts, :actor_id),
metadata: %{"similarity" => assessment.score}
)
end

defp fmt_sim(s) when is_float(s), do: :erlang.float_to_binary(s, decimals: 3)
defp fmt_sim(s), do: to_string(s)

defp canonical_neighbor(tenant_id, %{neighbors: [%{id: id} | _]}, opts) do
case get_article(tenant_id, id, Keyword.take(opts, [:visibility_agent_id])) do
{:ok, article} -> {:ok, article}
Expand Down Expand Up @@ -3324,23 +3353,36 @@ defmodule Loopctl.Knowledge do
%ConflictResolution{tenant_id: tenant_id}
|> ConflictResolution.changeset(row_attrs)

AdminRepo.insert(changeset,
on_conflict:
{:replace,
[
:authoritative_article_id,
:classification,
:disposition,
:confidence,
:evidence,
:annotated_by,
:annotated_at,
:executed_at,
:execution_result,
:updated_at
]},
conflict_target: [:tenant_id, :source_article_id, :target_article_id]
)
result =
AdminRepo.insert(changeset,
on_conflict:
{:replace,
[
:authoritative_article_id,
:classification,
:disposition,
:confidence,
:evidence,
:annotated_by,
:annotated_at,
:executed_at,
:execution_result,
:updated_at
]},
conflict_target: [:tenant_id, :source_article_id, :target_article_id]
)

with {:ok, %ConflictResolution{disposition: :dismiss} = res} <- result do
log_resolution(
tenant_id,
res,
"dismiss",
"dismissed as #{res.classification || "not-a-conflict"}",
[res.source_article_id, res.target_article_id]
)
end

result
end

@doc """
Expand Down Expand Up @@ -3410,6 +3452,17 @@ defmodule Loopctl.Knowledge do
"loser" => loser
})

log_resolution(
tenant_id,
r,
"supersede",
"\"#{title_of(loser)}\" retired for \"#{title_of(winner)}\"",
[
winner,
loser
]
)

true

# Already superseded / link exists → the disposition is effectively done; record
Expand Down Expand Up @@ -3478,6 +3531,13 @@ defmodule Loopctl.Knowledge do
end)

mark_resolution_executed(r, %{"action" => "merged_draft", "draft_id" => draft.id})

log_resolution(tenant_id, r, "merge", "drafted \"#{draft.title}\" from 2 sources", [
draft.id,
r.source_article_id,
r.target_article_id
])

true

# A draft with this title already exists (likely a prior run) — stop retrying.
Expand Down Expand Up @@ -3509,6 +3569,24 @@ defmodule Loopctl.Knowledge do
|> AdminRepo.update()
end

# Concise curation-log line for a conflict resolution (no-ops unless the tenant has
# kb_curation_log on). `refs` are the article ids involved; actor/confidence come from
# the recorded verdict.
defp log_resolution(tenant_id, %ConflictResolution{} = r, kind, summary, refs) do
KbCuration.record(tenant_id, kind, summary,
refs: refs,
actor: r.annotated_by,
confidence: r.confidence && to_string(r.confidence)
)
end

defp title_of(article_id) do
case AdminRepo.get(Article, article_id) do
%Article{title: title} -> title
_ -> article_id
end
end

@doc """
Lists all links for an article (both outgoing and incoming),
with linked articles preloaded.
Expand Down
118 changes: 118 additions & 0 deletions lib/loopctl/knowledge/kb_curation.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
defmodule Loopctl.Knowledge.KbCuration do
@moduledoc """
Toggleable, concise, human-readable log of KB CURATION adjustments — the "what did the KB
change" feed for analyzing the agents'-KB rollout, distinct from the verbose immutable
`audit_log`.

**Toggle:** per-tenant, via `tenant.settings["kb_curation_log"]` (default off). Flip it
with the admin tenant API (`PATCH /api/v1/admin/tenants/:id` with
`settings: {"kb_curation_log": true}`); read the current value from
`GET /api/v1/admin/tenants/:id`. When off, `record/4` is a no-op (no rows, no overhead),
so you turn it on for the rollout months and off after.

Call sites `record/4` at each mutation (gate decisions, conflict supersede/merge/dismiss);
the log is read back via `list/2` (`GET /api/v1/knowledge/curation-log`).
"""

import Ecto.Query

alias Loopctl.AdminRepo
alias Loopctl.Knowledge.KbCurationEvent
alias Loopctl.Tenants.Tenant

@max_summary 500

@doc """
Record one curation adjustment — but ONLY when the tenant has `kb_curation_log` on.
Fire-and-forget: always returns `:ok`, never raises or blocks the caller.

Opts: `:refs` (article ids), `:actor`, `:confidence`, `:metadata`, `:at`.
"""
@spec record(Ecto.UUID.t() | nil, String.t(), String.t(), keyword()) :: :ok
def record(tenant_id, kind, summary, opts \\ [])

def record(nil, _kind, _summary, _opts), do: :ok

def record(tenant_id, kind, summary, opts) when is_binary(tenant_id) do
if enabled?(tenant_id) do
insert_event(tenant_id, kind, summary, opts)
end

:ok
end

@doc "Whether curation logging is on for a tenant (its `settings[\"kb_curation_log\"]`)."
@spec enabled?(Ecto.UUID.t()) :: boolean()
def enabled?(tenant_id) do
case AdminRepo.get(Tenant, tenant_id) do
%Tenant{settings: settings} when is_map(settings) ->
Map.get(settings, "kb_curation_log", false) == true

_ ->
false
end
end

@doc """
The curation feed, most recent first. Opts: `:kind` (filter), `:since` (a `Date` or
`DateTime` lower bound), `:limit` (default 50, max 500), `:offset`. Returns
`%{data: [event maps], meta: %{limit, offset, total_count}}`.
"""
@spec list(Ecto.UUID.t(), keyword()) :: %{data: [map()], meta: map()}
def list(tenant_id, opts \\ []) do
limit = opts |> Keyword.get(:limit, 50) |> max(1) |> min(500)
offset = opts |> Keyword.get(:offset, 0) |> max(0)

base =
from(e in KbCurationEvent, where: e.tenant_id == ^tenant_id)
|> filter_kind(Keyword.get(opts, :kind))
|> filter_since(Keyword.get(opts, :since))

total_count = AdminRepo.aggregate(base, :count, :id)

data =
from(e in base,
order_by: [desc: e.at, desc: e.id],
limit: ^limit,
offset: ^offset,
select: %{
at: e.at,
kind: e.kind,
summary: e.summary,
refs: e.refs,
actor: e.actor,
confidence: e.confidence
}
)
|> AdminRepo.all()

%{data: data, meta: %{limit: limit, offset: offset, total_count: total_count}}
end

defp insert_event(tenant_id, kind, summary, opts) do
attrs = %{
kind: kind,
summary: String.slice(summary, 0, @max_summary),
refs: Keyword.get(opts, :refs, []),
actor: Keyword.get(opts, :actor),
confidence: Keyword.get(opts, :confidence),
metadata: Keyword.get(opts, :metadata, %{}),
at: Keyword.get(opts, :at) || DateTime.utc_now()
}

%KbCurationEvent{tenant_id: tenant_id}
|> KbCurationEvent.changeset(attrs)
|> AdminRepo.insert()
end

defp filter_kind(query, nil), do: query
defp filter_kind(query, kind), do: where(query, [e], e.kind == ^kind)

defp filter_since(query, nil), do: query

defp filter_since(query, %Date{} = d) do
filter_since(query, DateTime.new!(d, ~T[00:00:00.000000], "Etc/UTC"))
end

defp filter_since(query, %DateTime{} = dt), do: where(query, [e], e.at >= ^dt)
end
37 changes: 37 additions & 0 deletions lib/loopctl/knowledge/kb_curation_event.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
defmodule Loopctl.Knowledge.KbCurationEvent do
@moduledoc """
One concise, human-readable KB curation adjustment (a novelty-gate decision, a conflict
supersede/merge/dismiss, ...). The skimmable "what did the KB change" feed, distinct from
the verbose immutable `audit_log`. Written only when the tenant has
`settings["kb_curation_log"]` on (rollout observability). `tenant_id` is set
programmatically, never cast.
"""

use Loopctl.Schema

@type t :: %__MODULE__{}

schema "kb_curation_events" do
tenant_field()

field :kind, :string
field :summary, :string
field :refs, {:array, :binary_id}, default: []
field :actor, :string
field :confidence, :string
field :metadata, :map, default: %{}
field :at, :utc_datetime_usec

timestamps(type: :utc_datetime_usec, updated_at: false)
end

@cast_fields [:kind, :summary, :refs, :actor, :confidence, :metadata, :at]

@doc "Changeset for a curation event. `tenant_id` is set on the struct, not cast."
@spec changeset(%__MODULE__{}, map()) :: Ecto.Changeset.t()
def changeset(event \\ %__MODULE__{}, attrs) do
event
|> cast(attrs, @cast_fields)
|> validate_required([:kind, :summary, :at])
end
end
10 changes: 7 additions & 3 deletions lib/loopctl/tenants/tenant.ex
Original file line number Diff line number Diff line change
Expand Up @@ -163,17 +163,21 @@ defmodule Loopctl.Tenants.Tenant do
not is_map(value) or is_struct(value) ->
[settings: "must be a map"]

not valid_knowledge_auto_extract?(value) ->
not valid_boolean_setting?(value, "knowledge_auto_extract") ->
[settings: "knowledge_auto_extract must be a boolean"]

not valid_boolean_setting?(value, "kb_curation_log") ->
[settings: "kb_curation_log must be a boolean"]

true ->
[]
end
end)
end

defp valid_knowledge_auto_extract?(settings) do
case Map.get(settings, "knowledge_auto_extract") do
# A recognized boolean setting is either absent or an actual boolean.
defp valid_boolean_setting?(settings, key) do
case Map.get(settings, key) do
nil -> true
val when is_boolean(val) -> true
_ -> false
Expand Down
Loading
Loading