Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,13 @@ config :loopctl,
# These are source COLLECTIONS, not topics, so a per-tag MOC for them is noise.
config :loopctl, :knowledge_moc_excluded_tags, ~w(synology-docs synology-netbackup)

# Novelty-gated write-back (ProposalGate): cosine-similarity bands for an agent's
# proposed article vs. the published corpus. >= duplicate → reject in favour of the
# canonical article; >= overlap → route to a draft for the consumer to resolve;
# below → novel, created on the requested path.
config :loopctl, :knowledge_proposal_duplicate_threshold, 0.97
config :loopctl, :knowledge_proposal_overlap_threshold, 0.88

# DI: WebAuthn adapter — defaults to Wax (overridden in test env)
config :loopctl, :webauthn_adapter, Loopctl.WebAuthn.Wax

Expand Down
5 changes: 5 additions & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ config :loopctl, :secrets_adapter, Loopctl.MockSecrets
# overrides it with Mox.expect/3 to inject a deterministic Postgrex.Error.
config :loopctl, :knowledge_suggest_links, Loopctl.MockSuggestLinks

# Novelty-gated write-back: swap the proposal assessor for a mock so propose_article
# tests choose the verdict deterministically. DataCase default-stubs it to `:novel`
# (gate is a no-op); ProposalGate's own tests call it directly with MockEmbeddingClient.
config :loopctl, :proposal_assessor, Loopctl.MockProposalAssessor

# DI (US-27.3): the router wrapped by LoopctlWeb.Plugs.DBErrorBackstop. A thin
# REAL plug (Loopctl.Test.BackstopRouter) that delegates to LoopctlWeb.Router for
# every request — so the production router stays on the hot path with no global
Expand Down
115 changes: 115 additions & 0 deletions lib/loopctl/knowledge.ex
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,121 @@ defmodule Loopctl.Knowledge do
end
end

@doc """
Novelty-gated write-back. Wraps `create_article/3` with a semantic dedup gate so
an agent proposing knowledge can't silently bloat the corpus with near-duplicates.

The proposal is assessed against the published corpus (see
`Loopctl.Knowledge.ProposalAssessorBehaviour`); then, by verdict:

* `:duplicate` — a near-identical article already exists. Nothing is created;
the canonical article is returned so the caller can read/update it instead.
* `:low_novelty` — high overlap with existing knowledge. The article is created
as a **draft** (downgraded from publish if needed) with the near-neighbors
stamped into `metadata.proposal_novelty`, so the smarter consuming agent (or a
human) resolves merge-vs-keep from the drafts review queue.
* `:novel` / `:unknown` (gate fell open) — created on the requested path.

The gate is mechanical and non-destructive: it never edits or deletes existing
articles, and it falls open (`:unknown`) rather than blocking a write when the
embedding backend is unavailable.

Returns `{:ok, result}` where `result` is a map:

%{
verdict: :created | :gated_to_draft | :duplicate | :deduplicated,
article: %Article{}, # the created article, or the canonical existing one
created: boolean(), # false for :duplicate / :deduplicated
assessment: %{verdict:, score:, neighbors:}
}

or `{:error, :duplicate_title, %Article{}}` / `{:error, %Ecto.Changeset{}}`,
forwarded unchanged from `create_article/3`.
"""
@spec propose_article(Ecto.UUID.t() | nil, map(), keyword()) ::
{:ok, map()}
| {:error, :duplicate_title, Article.t()}
| {:error, Ecto.Changeset.t()}
def propose_article(tenant_id, attrs, opts \\ []) do
attrs = stringify_top_keys(attrs)
assessment = proposal_assessor().assess(tenant_id, attrs, opts)
gate_proposal(tenant_id, attrs, assessment, opts)
end

defp proposal_assessor do
Application.get_env(:loopctl, :proposal_assessor, Loopctl.Knowledge.ProposalGate)
end

defp gate_proposal(tenant_id, attrs, %{verdict: :duplicate} = assessment, opts) do
case canonical_neighbor(tenant_id, assessment, opts) do
{:ok, existing} ->
{:ok, %{verdict: :duplicate, article: existing, created: false, assessment: assessment}}

# The canonical neighbor vanished (deleted/unpublished) between assess and now —
# there is nothing to dedup against, so create on the normal path.
:error ->
create_proposal(tenant_id, attrs, %{assessment | verdict: :novel}, opts, :created)
end
end

defp gate_proposal(tenant_id, attrs, %{verdict: :low_novelty} = assessment, opts) do
gated_attrs =
attrs
|> Map.put("status", "draft")
|> stamp_proposal_metadata(assessment)

create_proposal(tenant_id, gated_attrs, assessment, opts, :gated_to_draft)
end

# :novel or :unknown (gate fell open) — proceed on the requested path.
defp gate_proposal(tenant_id, attrs, assessment, opts) do
create_proposal(tenant_id, attrs, assessment, opts, :created)
end

defp create_proposal(tenant_id, attrs, assessment, opts, verdict) do
case create_article(tenant_id, attrs, opts) do
{:ok, article} ->
{:ok, %{verdict: verdict, article: article, created: true, assessment: assessment}}

{:ok, :deduplicated, article} ->
{:ok, %{verdict: :deduplicated, article: article, created: false, assessment: assessment}}

{:error, :duplicate_title, existing} ->
{:error, :duplicate_title, existing}

{:error, %Ecto.Changeset{} = changeset} ->
{:error, changeset}
end
end

defp canonical_neighbor(tenant_id, %{neighbors: [%{id: id} | _]}, opts) do
case get_article(tenant_id, id, Keyword.take(opts, [:visibility_agent_id])) do
{:ok, article} -> {:ok, article}
_ -> :error
end
end

defp canonical_neighbor(_tenant_id, _assessment, _opts), do: :error

defp stamp_proposal_metadata(attrs, %{score: score, neighbors: neighbors}) do
existing = stringify_top_keys(attrs["metadata"] || %{})

novelty = %{
"verdict" => "low_novelty",
"score" => score,
"nearest" =>
Enum.map(neighbors, fn n ->
%{"id" => n.id, "title" => n.title, "score" => n.similarity_score}
end)
}

Map.put(attrs, "metadata", Map.put(existing, "proposal_novelty", novelty))
end

defp stringify_top_keys(map) when is_map(map) do
Map.new(map, fn {k, v} -> {to_string(k), v} end)
end

# Make concurrent/retried creates safe on the (tenant_id, title) active unique
# index. By the time the insert fails the constraint, the winning transaction
# has committed, so the existing row is visible (the recovery SELECT below
Expand Down
42 changes: 42 additions & 0 deletions lib/loopctl/knowledge/proposal_assessor_behaviour.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
defmodule Loopctl.Knowledge.ProposalAssessorBehaviour do
@moduledoc """
Behaviour for assessing the NOVELTY of a *proposed* (not-yet-persisted) knowledge
article against the existing published corpus, so agent write-back can be gated:
a near-identical proposal is rejected in favour of the canonical article, a
high-overlap proposal is routed to a draft for the (smarter) consuming agent to
resolve, and a genuinely novel proposal flows through normally.

This is **distinct** from the creativity "novelty" endpoint
(`KnowledgeCreativityController`), which measures idea-distance for generation.
Here, novelty == "does this add anything the corpus doesn't already hold?".

## Config-based DI

`Loopctl.Knowledge.propose_article/3` resolves the implementation at runtime via
`Application.get_env(:loopctl, :proposal_assessor, Loopctl.Knowledge.ProposalGate)`.
`config/test.exs` swaps in `Loopctl.MockProposalAssessor`.
"""

@type neighbor :: %{
id: Ecto.UUID.t(),
title: String.t() | nil,
similarity_score: float()
}

@type assessment :: %{
verdict: :duplicate | :low_novelty | :novel | :unknown,
score: float() | nil,
neighbors: [neighbor()]
}

@doc """
Assess a proposal. `attrs` carries at least `"title"`/`"body"` (string or atom
keys). Returns the verdict, the top nearest-neighbor similarity `score` (or `nil`
when nothing crosses the overlap floor), and the `neighbors` list.

Implementations MUST fall open — on any embedding/search failure, return
`%{verdict: :unknown, score: nil, neighbors: []}` so the gate never blocks a write.
"""
@callback assess(tenant_id :: Ecto.UUID.t() | nil, attrs :: map(), opts :: keyword()) ::
assessment()
end
92 changes: 92 additions & 0 deletions lib/loopctl/knowledge/proposal_gate.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
defmodule Loopctl.Knowledge.ProposalGate do
@moduledoc """
Default `ProposalAssessorBehaviour` — scores a proposed article's novelty against
the tenant's published corpus by embedding the proposal text and finding its
nearest neighbors via pgvector cosine similarity.

Mechanical only: it embeds, searches, and classifies by threshold. It does NOT
decide merges or edit anything — that judgment belongs to the consuming agent,
which is a step smarter than the KB. The gate just answers "is this novel?" and
surfaces the near-neighbors.

## Bands (config-tunable)

* `score >= :knowledge_proposal_duplicate_threshold` (default `0.97`) → `:duplicate`
* `score >= :knowledge_proposal_overlap_threshold` (default `0.88`) → `:low_novelty`
* otherwise (incl. nothing above the overlap floor) → `:novel`

## Resilience

Embedding requires a network call. On ANY failure — API down, power/internet
outage, system-scoped proposal with no tenant — `assess/3` falls **open**:
`%{verdict: :unknown, ...}`, so write-back is never blocked by the gate.
"""

@behaviour Loopctl.Knowledge.ProposalAssessorBehaviour

require Logger

alias Loopctl.Knowledge.VectorSearch

@embedding_client Application.compile_env(
:loopctl,
:embedding_client,
Loopctl.Knowledge.EmbeddingClient
)

@default_duplicate_threshold 0.97
@default_overlap_threshold 0.88
@neighbors_k 5
@max_text_length 32_000

@impl true
def assess(tenant_id, attrs, opts \\ [])

# System-scoped (no tenant) proposals are superadmin-only and rare — skip the gate.
def assess(nil, _attrs, _opts), do: open_verdict()

def assess(tenant_id, attrs, opts) when is_binary(tenant_id) do
dup = config(:knowledge_proposal_duplicate_threshold, @default_duplicate_threshold)
overlap = config(:knowledge_proposal_overlap_threshold, @default_overlap_threshold)

case @embedding_client.generate_embedding(build_text(attrs)) do
{:ok, vector} when is_list(vector) and vector != [] ->
neighbors =
VectorSearch.nearest(tenant_id, vector, @neighbors_k,
threshold: overlap,
visibility_agent_id: Keyword.get(opts, :visibility_agent_id)
)

score = neighbors |> List.first() |> neighbor_score()
%{verdict: classify(score, dup, overlap), score: score, neighbors: neighbors}

other ->
Logger.warning("ProposalGate: embedding failed, falling open: #{inspect(other)}")
open_verdict()
end
end

@doc """
Pure threshold classification — the heart of the gate, unit-tested in isolation.
`score` is the top neighbor's `similarity_score` (or `nil` when none cleared the
overlap floor).
"""
@spec classify(float() | nil, float(), float()) :: :duplicate | :low_novelty | :novel
def classify(nil, _dup, _overlap), do: :novel
def classify(score, dup, _overlap) when score >= dup, do: :duplicate
def classify(score, _dup, overlap) when score >= overlap, do: :low_novelty
def classify(_score, _dup, _overlap), do: :novel

defp neighbor_score(nil), do: nil
defp neighbor_score(%{similarity_score: s}), do: s

defp build_text(attrs) do
title = attrs["title"] || attrs[:title] || ""
body = attrs["body"] || attrs[:body] || ""
String.slice("#{title}\n\n#{body}", 0, @max_text_length)
end

defp open_verdict, do: %{verdict: :unknown, score: nil, neighbors: []}

defp config(key, default), do: Application.get_env(:loopctl, key, default)
end
Loading
Loading