Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions documentcloud/addons/migrations/0030_addonevent_site_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Generated by Django 4.2.2 on 2026-06-08

from django.contrib.postgres.operations import (
AddIndexConcurrently,
RemoveIndexConcurrently,
)
from django.db import migrations, models
import django.db.models.fields.json
import django.db.models.functions.text
import documentcloud.addons.models


class Migration(migrations.Migration):

atomic = False

dependencies = [
("addons", "0029_addonrun_data"),
]

operations = [
# index the `site` origin (scheme + host) for the `domain` filter
AddIndexConcurrently(
model_name="addonevent",
index=models.Index(
documentcloud.addons.models.SiteOrigin("parameters"),
condition=models.Q(("parameters__has_key", "site")),
name="addonevent_site_origin_idx",
),
),
# rebuild the `site` index as UPPER(...) so the case-insensitive
# (iexact) `site` filter can use it
RemoveIndexConcurrently(
model_name="addonevent",
name="addonevent_param_site_idx",
),
AddIndexConcurrently(
model_name="addonevent",
index=models.Index(
django.db.models.functions.text.Upper(
django.db.models.fields.json.KeyTextTransform("site", "parameters")
),
condition=models.Q(("parameters__has_key", "site")),
name="addonevent_param_site_idx",
),
),
]
28 changes: 26 additions & 2 deletions documentcloud/addons/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from django.conf import settings
from django.core.cache import cache
from django.db import models, transaction
from django.db.models import F, Q
from django.db.models import Func, Q
from django.db.models.fields.json import KeyTextTransform
from django.db.models.functions import Upper
from django.utils.translation import gettext_lazy as _

# Standard Library
Expand Down Expand Up @@ -34,6 +36,21 @@
logger = logging.getLogger(__name__)


class SiteOrigin(Func):
# pylint: disable=abstract-method
"""Extract the lowercased origin (scheme + host) from a JSON `site` URL.

Captures the scheme and authority of the stored URL, stopping at the path,
query or fragment, so `https://www.nifc.gov/foo` yields `https://www.nifc.gov`.
Used both to build the expression index on `AddOnEvent` and to filter by it,
so the index and the query share one definition and cannot drift apart. All
component functions are IMMUTABLE, so this is safe to index.
"""

template = "LOWER(SUBSTRING(%(expressions)s ->> 'site' FROM '^(https?://[^/?#]+)'))"
output_field = models.TextField()


class AddOn(models.Model):
objects = AddOnQuerySet.as_manager()

Expand Down Expand Up @@ -579,10 +596,17 @@ class AddOnEvent(models.Model):
class Meta:
indexes = [
models.Index(
F("parameters__site"),
# matches the `UPPER(parameters ->> 'site')` the `site` filter's
# iexact lookup emits, so a case-insensitive match can use it
Upper(KeyTextTransform("site", "parameters")),
name="addonevent_param_site_idx",
condition=Q(parameters__has_key="site"),
),
models.Index(
SiteOrigin("parameters"),
name="addonevent_site_origin_idx",
condition=Q(parameters__has_key="site"),
),
]

def __str__(self):
Expand Down
65 changes: 65 additions & 0 deletions documentcloud/addons/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,48 @@ def test_filter_site_absent_is_noop(self, client):
assert response.status_code == status.HTTP_200_OK
assert len(response.json()["results"]) == 3

def test_filter_domain(self, client):
"""Filter runs by the origin of the event's parameters.site"""
user = UserFactory()
matching_event = AddOnEventFactory(
user=user,
parameters={
"site": "https://www.nifc.gov/fire-information/statistics/wildfires",
"selector": "*",
},
)
other_event = AddOnEventFactory(
user=user, parameters={"site": "https://www.other.com/path"}
)
no_site_event = AddOnEventFactory(user=user, parameters={"selector": "*"})
matching_run = AddOnRunFactory(user=user, event=matching_event)
AddOnRunFactory(user=user, event=other_event)
AddOnRunFactory(user=user, event=no_site_event)
AddOnRunFactory(user=user, event=None)
client.force_authenticate(user=user)
# the origin matches regardless of path
response = client.get("/api/addon_runs/", {"domain": "https://www.nifc.gov"})
assert response.status_code == status.HTTP_200_OK
uuids = [r["uuid"] for r in response.json()["results"]]
assert uuids == [str(matching_run.uuid)]
# a bare host and a mismatched scheme do not match
for domain in ("www.nifc.gov", "http://www.nifc.gov"):
response = client.get("/api/addon_runs/", {"domain": domain})
assert response.status_code == status.HTTP_200_OK
assert response.json()["results"] == [], domain

def test_filter_domain_no_partial_host_match(self, client):
"""The domain filter matches whole origins, not substrings"""
user = UserFactory()
event = AddOnEventFactory(
user=user, parameters={"site": "https://www.nifc.gov.evil.com/path"}
)
AddOnRunFactory(user=user, event=event)
client.force_authenticate(user=user)
response = client.get("/api/addon_runs/", {"domain": "https://www.nifc.gov"})
assert response.status_code == status.HTTP_200_OK
assert response.json()["results"] == []


@pytest.mark.django_db()
class TestAddOnEventAPI:
Expand Down Expand Up @@ -351,6 +393,29 @@ def test_filter_site_no_match(self, client):
assert response.status_code == status.HTTP_200_OK
assert response.json()["results"] == []

def test_filter_domain(self, client):
"""Filter events by the origin of their parameters.site"""
user = UserFactory()
matching = AddOnEventFactory(
user=user,
parameters={
"site": "https://www.nifc.gov/fire-information/statistics/wildfires",
"selector": "*",
},
)
AddOnEventFactory(user=user, parameters={"site": "https://www.other.com/path"})
AddOnEventFactory(user=user, parameters={"selector": "*"})
client.force_authenticate(user=user)
response = client.get("/api/addon_events/", {"domain": "https://www.nifc.gov"})
assert response.status_code == status.HTTP_200_OK
ids = [r["id"] for r in response.json()["results"]]
assert ids == [matching.pk]
# a bare host and a mismatched scheme do not match
for domain in ("www.nifc.gov", "http://www.nifc.gov"):
response = client.get("/api/addon_events/", {"domain": domain})
assert response.status_code == status.HTTP_200_OK
assert response.json()["results"] == [], domain

def test_filter_message(self, client):
"""Filter runs by message"""
user = UserFactory()
Expand Down
81 changes: 77 additions & 4 deletions documentcloud/addons/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
AddOnRun,
GitHubAccount,
GitHubInstallation,
SiteOrigin,
VisualAddOn,
)
from documentcloud.addons.serializers import (
Expand All @@ -65,6 +66,20 @@
logger = logging.getLogger(__name__)


def domain_to_origin(value):
"""Normalize a domain filter value to a lowercased origin (scheme + host).

Expects an origin such as `https://www.nifc.gov`; a value without a scheme
will not match. A full URL is accepted and its path is discarded. Returns
None if no scheme + host can be parsed, lowercased to match the `SiteOrigin`
expression index.
"""
parsed = furl(value.strip())
if not parsed.scheme or not parsed.host:
return None
return f"{parsed.scheme}://{parsed.netloc}".lower()
Comment thread
eyeseast marked this conversation as resolved.


class AddOnViewSet(viewsets.ModelViewSet):
serializer_class = AddOnSerializer
queryset = AddOn.objects.none()
Expand Down Expand Up @@ -741,18 +756,47 @@ class Filter(django_filters.FilterSet):
)
dismissed = django_filters.BooleanFilter(help_text="Was this run dismissed?")
site = django_filters.CharFilter(
field_name="event__parameters__site",
lookup_expr="iexact",
method="site_filter",
label="Site",
help_text="Filter runs by the `site` value in the event's parameters.",
)
domain = django_filters.CharFilter(
method="domain_filter",
label="Domain",
help_text=(
"Filter runs by the origin of the event's `site` parameter, e.g. "
"`https://www.nifc.gov`."
),
)
message = django_filters.CharFilter(
field_name="message",
lookup_expr="exact",
label="Message",
help_text="Filter runs by their progress message.",
)

def site_filter(self, queryset, name, value):
# pylint: disable=unused-argument
# the has_key clause lets the partial `addonevent_param_site_idx`
# index on AddOnEvent serve the case-insensitive match
return queryset.filter(
event__parameters__has_key="site",
event__parameters__site__iexact=value,
)

def domain_filter(self, queryset, name, value):
# pylint: disable=unused-argument
origin = domain_to_origin(value)
if origin is None:
return queryset.none()
# filter on has_key + the SiteOrigin expression so the partial
# `addonevent_site_origin_idx` index on AddOnEvent is usable
return (
queryset.filter(event__parameters__has_key="site")
.annotate(_site_origin=SiteOrigin("event__parameters"))
.filter(_site_origin=origin)
)

class Meta:
model = AddOnRun
fields = {
Expand Down Expand Up @@ -983,11 +1027,40 @@ class Filter(django_filters.FilterSet):
help_text="Filter events by a specific add-on ID.",
)
site = django_filters.CharFilter(
field_name="parameters__site",
lookup_expr="iexact",
method="site_filter",
label="Site",
help_text="Filter events by the `site` value in their parameters.",
)
domain = django_filters.CharFilter(
method="domain_filter",
label="Domain",
help_text=(
"Filter events by the origin of their `site` parameter, e.g. "
"`https://www.nifc.gov`."
),
)

def site_filter(self, queryset, name, value):
# pylint: disable=unused-argument
# the has_key clause lets the partial `addonevent_param_site_idx`
# index serve the case-insensitive match
return queryset.filter(
parameters__has_key="site",
parameters__site__iexact=value,
)

def domain_filter(self, queryset, name, value):
# pylint: disable=unused-argument
origin = domain_to_origin(value)
if origin is None:
return queryset.none()
# filter on has_key + the SiteOrigin expression so the partial
# `addonevent_site_origin_idx` index is usable
return (
queryset.filter(parameters__has_key="site")
.annotate(_site_origin=SiteOrigin("parameters"))
.filter(_site_origin=origin)
)

class Meta:
model = AddOnEvent
Expand Down
Loading