From 46cda5df5d2d735f7d7fced7526aa1c9c79f0435 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Thu, 4 Jun 2026 14:50:23 -0400 Subject: [PATCH 1/6] Basic domain filter using regex --- documentcloud/addons/tests/test_views.py | 57 ++++++++++++++++++++++++ documentcloud/addons/views.py | 46 +++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/documentcloud/addons/tests/test_views.py b/documentcloud/addons/tests/test_views.py index 21a285f4..da6cf4a9 100644 --- a/documentcloud/addons/tests/test_views.py +++ b/documentcloud/addons/tests/test_views.py @@ -309,6 +309,44 @@ def test_filter_site_absent_is_noop(self, client): assert response.status_code == status.HTTP_200_OK assert len(response.json()["results"]) == 3 + def test_filter_domain(self, client): + """Filter runs by the host of the event's parameters.site""" + user = UserFactory() + matching_event = AddOnEventFactory( + user=user, + parameters={ + "site": "https://www.nifc.gov/fire-information/statistics/wildfires", + "selector": "*", + }, + ) + other_event = AddOnEventFactory( + user=user, parameters={"site": "https://www.other.com/path"} + ) + no_site_event = AddOnEventFactory(user=user, parameters={"selector": "*"}) + matching_run = AddOnRunFactory(user=user, event=matching_event) + AddOnRunFactory(user=user, event=other_event) + AddOnRunFactory(user=user, event=no_site_event) + AddOnRunFactory(user=user, event=None) + client.force_authenticate(user=user) + # bare host and full origin both match, regardless of path + for domain in ("www.nifc.gov", "https://www.nifc.gov"): + response = client.get("/api/addon_runs/", {"domain": domain}) + assert response.status_code == status.HTTP_200_OK + uuids = [r["uuid"] for r in response.json()["results"]] + assert uuids == [str(matching_run.uuid)], domain + + def test_filter_domain_no_partial_host_match(self, client): + """The domain filter matches whole hosts, not substrings""" + user = UserFactory() + event = AddOnEventFactory( + user=user, parameters={"site": "https://www.nifc.gov.evil.com/path"} + ) + AddOnRunFactory(user=user, event=event) + client.force_authenticate(user=user) + response = client.get("/api/addon_runs/", {"domain": "www.nifc.gov"}) + assert response.status_code == status.HTTP_200_OK + assert response.json()["results"] == [] + @pytest.mark.django_db() class TestAddOnEventAPI: @@ -351,6 +389,25 @@ def test_filter_site_no_match(self, client): assert response.status_code == status.HTTP_200_OK assert response.json()["results"] == [] + def test_filter_domain(self, client): + """Filter events by the host of their parameters.site""" + user = UserFactory() + matching = AddOnEventFactory( + user=user, + parameters={ + "site": "https://www.nifc.gov/fire-information/statistics/wildfires", + "selector": "*", + }, + ) + AddOnEventFactory(user=user, parameters={"site": "https://www.other.com/path"}) + AddOnEventFactory(user=user, parameters={"selector": "*"}) + client.force_authenticate(user=user) + for domain in ("www.nifc.gov", "https://www.nifc.gov"): + response = client.get("/api/addon_events/", {"domain": domain}) + assert response.status_code == status.HTTP_200_OK + ids = [r["id"] for r in response.json()["results"]] + assert ids == [matching.pk], domain + def test_filter_message(self, client): """Filter runs by message""" user = UserFactory() diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index acd03fd0..3e643b4f 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -30,6 +30,7 @@ import hmac import json import logging +import re from collections import defaultdict from datetime import timedelta from functools import lru_cache @@ -65,6 +66,21 @@ logger = logging.getLogger(__name__) +def domain_site_regex(value): + """Build a regex matching `site` URLs whose host equals the given domain. + + Accepts either a bare host (`www.nifc.gov`) or a full URL + (`https://www.nifc.gov/path`) and matches stored site values regardless of + scheme, port, path or query string. Returns None if no host can be parsed. + """ + value = value.strip() + # furl only parses the host when an authority is present, so ensure one + host = furl(value if "//" in value else "//" + value).host + if not host: + return None + return r"^(https?://)?" + re.escape(host) + r"(:\d+)?($|[/?#])" + + class AddOnViewSet(viewsets.ModelViewSet): serializer_class = AddOnSerializer queryset = AddOn.objects.none() @@ -746,6 +762,14 @@ class Filter(django_filters.FilterSet): label="Site", help_text="Filter runs by the `site` value in the event's parameters.", ) + domain = django_filters.CharFilter( + method="domain_filter", + label="Domain", + help_text=( + "Filter runs by the host of the event's `site` parameter, e.g. " + "`www.nifc.gov` or `https://www.nifc.gov`." + ), + ) message = django_filters.CharFilter( field_name="message", lookup_expr="exact", @@ -753,6 +777,13 @@ class Filter(django_filters.FilterSet): help_text="Filter runs by their progress message.", ) + def domain_filter(self, queryset, name, value): + # pylint: disable=unused-argument + pattern = domain_site_regex(value) + if pattern is None: + return queryset.none() + return queryset.filter(event__parameters__site__iregex=pattern) + class Meta: model = AddOnRun fields = { @@ -988,6 +1019,21 @@ class Filter(django_filters.FilterSet): label="Site", help_text="Filter events by the `site` value in their parameters.", ) + domain = django_filters.CharFilter( + method="domain_filter", + label="Domain", + help_text=( + "Filter events by the host of their `site` parameter, e.g. " + "`www.nifc.gov` or `https://www.nifc.gov`." + ), + ) + + def domain_filter(self, queryset, name, value): + # pylint: disable=unused-argument + pattern = domain_site_regex(value) + if pattern is None: + return queryset.none() + return queryset.filter(parameters__site__iregex=pattern) class Meta: model = AddOnEvent From 3bc91baf4fd9883b43dbe37a1f46fbad66cc0946 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Mon, 8 Jun 2026 12:38:26 -0400 Subject: [PATCH 2/6] Filter on domain using an index --- ...030_addonevent_addonevent_site_host_idx.py | 22 +++++++++++ documentcloud/addons/models.py | 25 +++++++++++- documentcloud/addons/views.py | 38 ++++++++++++------- 3 files changed, 70 insertions(+), 15 deletions(-) create mode 100644 documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py diff --git a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py new file mode 100644 index 00000000..b18c2381 --- /dev/null +++ b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.2 on 2026-06-08 16:21 + +from django.db import migrations, models +import documentcloud.addons.models + + +class Migration(migrations.Migration): + + dependencies = [ + ("addons", "0029_addonrun_data"), + ] + + operations = [ + migrations.AddIndex( + model_name="addonevent", + index=models.Index( + documentcloud.addons.models.SiteHost("parameters"), + condition=models.Q(("parameters__has_key", "site")), + name="addonevent_site_host_idx", + ), + ), + ] diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 71884a8e..9e4e8cc2 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -2,7 +2,7 @@ from django.conf import settings from django.core.cache import cache from django.db import models, transaction -from django.db.models import F, Q +from django.db.models import F, Func, Q from django.utils.translation import gettext_lazy as _ # Standard Library @@ -34,6 +34,24 @@ logger = logging.getLogger(__name__) +class SiteHost(Func): + # pylint: disable=abstract-method + """Extract the lowercased host from the `site` URL of a JSON parameters field. + + Captures the authority (host) of the stored URL, stopping at a port, path, + query or fragment, with or without a leading scheme. Used both to build the + expression index on `AddOnEvent` and to filter by it, so the index and the + query share one definition and cannot drift apart. All component functions + are IMMUTABLE, so this is safe to index. + """ + + template = ( + "LOWER(SUBSTRING(%(expressions)s ->> 'site' " + "FROM '^(?:https?://)?([^/:?#]+)'))" + ) + output_field = models.TextField() + + class AddOn(models.Model): objects = AddOnQuerySet.as_manager() @@ -583,6 +601,11 @@ class Meta: name="addonevent_param_site_idx", condition=Q(parameters__has_key="site"), ), + models.Index( + SiteHost("parameters"), + name="addonevent_site_host_idx", + condition=Q(parameters__has_key="site"), + ), ] def __str__(self): diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index 3e643b4f..25955816 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -30,7 +30,6 @@ import hmac import json import logging -import re from collections import defaultdict from datetime import timedelta from functools import lru_cache @@ -52,6 +51,7 @@ AddOnRun, GitHubAccount, GitHubInstallation, + SiteHost, VisualAddOn, ) from documentcloud.addons.serializers import ( @@ -66,19 +66,17 @@ logger = logging.getLogger(__name__) -def domain_site_regex(value): - """Build a regex matching `site` URLs whose host equals the given domain. +def domain_to_host(value): + """Normalize a domain filter value to a bare, lowercased host. Accepts either a bare host (`www.nifc.gov`) or a full URL - (`https://www.nifc.gov/path`) and matches stored site values regardless of - scheme, port, path or query string. Returns None if no host can be parsed. + (`https://www.nifc.gov/path`) and returns just the host, lowercased to match + the `SiteHost` expression index. Returns None if no host can be parsed. """ value = value.strip() # furl only parses the host when an authority is present, so ensure one host = furl(value if "//" in value else "//" + value).host - if not host: - return None - return r"^(https?://)?" + re.escape(host) + r"(:\d+)?($|[/?#])" + return host.lower() if host else None class AddOnViewSet(viewsets.ModelViewSet): @@ -779,10 +777,16 @@ class Filter(django_filters.FilterSet): def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument - pattern = domain_site_regex(value) - if pattern is None: + host = domain_to_host(value) + if host is None: return queryset.none() - return queryset.filter(event__parameters__site__iregex=pattern) + # filter on has_key + the SiteHost expression so the partial + # `addonevent_site_host_idx` index on AddOnEvent is usable + return ( + queryset.filter(event__parameters__has_key="site") + .annotate(_site_host=SiteHost("event__parameters")) + .filter(_site_host=host) + ) class Meta: model = AddOnRun @@ -1030,10 +1034,16 @@ class Filter(django_filters.FilterSet): def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument - pattern = domain_site_regex(value) - if pattern is None: + host = domain_to_host(value) + if host is None: return queryset.none() - return queryset.filter(parameters__site__iregex=pattern) + # filter on has_key + the SiteHost expression so the partial + # `addonevent_site_host_idx` index is usable + return ( + queryset.filter(parameters__has_key="site") + .annotate(_site_host=SiteHost("parameters")) + .filter(_site_host=host) + ) class Meta: model = AddOnEvent From 66bec189e6697ab974724ac62278b9947a5864cf Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Mon, 8 Jun 2026 15:21:29 -0400 Subject: [PATCH 3/6] Match on domain, not host --- ..._addonevent_addonevent_site_origin_idx.py} | 6 +-- documentcloud/addons/models.py | 23 ++++---- documentcloud/addons/tests/test_views.py | 30 +++++++---- documentcloud/addons/views.py | 53 ++++++++++--------- 4 files changed, 59 insertions(+), 53 deletions(-) rename documentcloud/addons/migrations/{0030_addonevent_addonevent_site_host_idx.py => 0030_addonevent_addonevent_site_origin_idx.py} (70%) diff --git a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py similarity index 70% rename from documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py rename to documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py index b18c2381..08af6c0f 100644 --- a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_host_idx.py +++ b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.2 on 2026-06-08 16:21 +# Generated by Django 4.2.2 on 2026-06-08 19:10 from django.db import migrations, models import documentcloud.addons.models @@ -14,9 +14,9 @@ class Migration(migrations.Migration): migrations.AddIndex( model_name="addonevent", index=models.Index( - documentcloud.addons.models.SiteHost("parameters"), + documentcloud.addons.models.SiteOrigin("parameters"), condition=models.Q(("parameters__has_key", "site")), - name="addonevent_site_host_idx", + name="addonevent_site_origin_idx", ), ), ] diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 9e4e8cc2..1a2b5187 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -34,21 +34,18 @@ logger = logging.getLogger(__name__) -class SiteHost(Func): +class SiteOrigin(Func): # pylint: disable=abstract-method - """Extract the lowercased host from the `site` URL of a JSON parameters field. + """Extract the lowercased origin (scheme + host) from a JSON `site` URL. - Captures the authority (host) of the stored URL, stopping at a port, path, - query or fragment, with or without a leading scheme. Used both to build the - expression index on `AddOnEvent` and to filter by it, so the index and the - query share one definition and cannot drift apart. All component functions - are IMMUTABLE, so this is safe to index. + Captures the scheme and authority of the stored URL, stopping at the path, + query or fragment, so `https://www.nifc.gov/foo` yields `https://www.nifc.gov`. + Used both to build the expression index on `AddOnEvent` and to filter by it, + so the index and the query share one definition and cannot drift apart. All + component functions are IMMUTABLE, so this is safe to index. """ - template = ( - "LOWER(SUBSTRING(%(expressions)s ->> 'site' " - "FROM '^(?:https?://)?([^/:?#]+)'))" - ) + template = "LOWER(SUBSTRING(%(expressions)s ->> 'site' FROM '^(https?://[^/?#]+)'))" output_field = models.TextField() @@ -602,8 +599,8 @@ class Meta: condition=Q(parameters__has_key="site"), ), models.Index( - SiteHost("parameters"), - name="addonevent_site_host_idx", + SiteOrigin("parameters"), + name="addonevent_site_origin_idx", condition=Q(parameters__has_key="site"), ), ] diff --git a/documentcloud/addons/tests/test_views.py b/documentcloud/addons/tests/test_views.py index da6cf4a9..54d66894 100644 --- a/documentcloud/addons/tests/test_views.py +++ b/documentcloud/addons/tests/test_views.py @@ -310,7 +310,7 @@ def test_filter_site_absent_is_noop(self, client): assert len(response.json()["results"]) == 3 def test_filter_domain(self, client): - """Filter runs by the host of the event's parameters.site""" + """Filter runs by the origin of the event's parameters.site""" user = UserFactory() matching_event = AddOnEventFactory( user=user, @@ -328,22 +328,26 @@ def test_filter_domain(self, client): AddOnRunFactory(user=user, event=no_site_event) AddOnRunFactory(user=user, event=None) client.force_authenticate(user=user) - # bare host and full origin both match, regardless of path - for domain in ("www.nifc.gov", "https://www.nifc.gov"): + # the origin matches regardless of path + response = client.get("/api/addon_runs/", {"domain": "https://www.nifc.gov"}) + assert response.status_code == status.HTTP_200_OK + uuids = [r["uuid"] for r in response.json()["results"]] + assert uuids == [str(matching_run.uuid)] + # a bare host and a mismatched scheme do not match + for domain in ("www.nifc.gov", "http://www.nifc.gov"): response = client.get("/api/addon_runs/", {"domain": domain}) assert response.status_code == status.HTTP_200_OK - uuids = [r["uuid"] for r in response.json()["results"]] - assert uuids == [str(matching_run.uuid)], domain + assert response.json()["results"] == [], domain def test_filter_domain_no_partial_host_match(self, client): - """The domain filter matches whole hosts, not substrings""" + """The domain filter matches whole origins, not substrings""" user = UserFactory() event = AddOnEventFactory( user=user, parameters={"site": "https://www.nifc.gov.evil.com/path"} ) AddOnRunFactory(user=user, event=event) client.force_authenticate(user=user) - response = client.get("/api/addon_runs/", {"domain": "www.nifc.gov"}) + response = client.get("/api/addon_runs/", {"domain": "https://www.nifc.gov"}) assert response.status_code == status.HTTP_200_OK assert response.json()["results"] == [] @@ -390,7 +394,7 @@ def test_filter_site_no_match(self, client): assert response.json()["results"] == [] def test_filter_domain(self, client): - """Filter events by the host of their parameters.site""" + """Filter events by the origin of their parameters.site""" user = UserFactory() matching = AddOnEventFactory( user=user, @@ -402,11 +406,15 @@ def test_filter_domain(self, client): AddOnEventFactory(user=user, parameters={"site": "https://www.other.com/path"}) AddOnEventFactory(user=user, parameters={"selector": "*"}) client.force_authenticate(user=user) - for domain in ("www.nifc.gov", "https://www.nifc.gov"): + response = client.get("/api/addon_events/", {"domain": "https://www.nifc.gov"}) + assert response.status_code == status.HTTP_200_OK + ids = [r["id"] for r in response.json()["results"]] + assert ids == [matching.pk] + # a bare host and a mismatched scheme do not match + for domain in ("www.nifc.gov", "http://www.nifc.gov"): response = client.get("/api/addon_events/", {"domain": domain}) assert response.status_code == status.HTTP_200_OK - ids = [r["id"] for r in response.json()["results"]] - assert ids == [matching.pk], domain + assert response.json()["results"] == [], domain def test_filter_message(self, client): """Filter runs by message""" diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index 25955816..cb5134f2 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -51,7 +51,7 @@ AddOnRun, GitHubAccount, GitHubInstallation, - SiteHost, + SiteOrigin, VisualAddOn, ) from documentcloud.addons.serializers import ( @@ -66,17 +66,18 @@ logger = logging.getLogger(__name__) -def domain_to_host(value): - """Normalize a domain filter value to a bare, lowercased host. +def domain_to_origin(value): + """Normalize a domain filter value to a lowercased origin (scheme + host). - Accepts either a bare host (`www.nifc.gov`) or a full URL - (`https://www.nifc.gov/path`) and returns just the host, lowercased to match - the `SiteHost` expression index. Returns None if no host can be parsed. + Expects an origin such as `https://www.nifc.gov`; a value without a scheme + will not match. A full URL is accepted and its path is discarded. Returns + None if no scheme + host can be parsed, lowercased to match the `SiteOrigin` + expression index. """ - value = value.strip() - # furl only parses the host when an authority is present, so ensure one - host = furl(value if "//" in value else "//" + value).host - return host.lower() if host else None + parsed = furl(value.strip()) + if not parsed.scheme or not parsed.host: + return None + return f"{parsed.scheme}://{parsed.netloc}".lower() class AddOnViewSet(viewsets.ModelViewSet): @@ -764,8 +765,8 @@ class Filter(django_filters.FilterSet): method="domain_filter", label="Domain", help_text=( - "Filter runs by the host of the event's `site` parameter, e.g. " - "`www.nifc.gov` or `https://www.nifc.gov`." + "Filter runs by the origin of the event's `site` parameter, e.g. " + "`https://www.nifc.gov`." ), ) message = django_filters.CharFilter( @@ -777,15 +778,15 @@ class Filter(django_filters.FilterSet): def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument - host = domain_to_host(value) - if host is None: + origin = domain_to_origin(value) + if origin is None: return queryset.none() - # filter on has_key + the SiteHost expression so the partial - # `addonevent_site_host_idx` index on AddOnEvent is usable + # filter on has_key + the SiteOrigin expression so the partial + # `addonevent_site_origin_idx` index on AddOnEvent is usable return ( queryset.filter(event__parameters__has_key="site") - .annotate(_site_host=SiteHost("event__parameters")) - .filter(_site_host=host) + .annotate(_site_origin=SiteOrigin("event__parameters")) + .filter(_site_origin=origin) ) class Meta: @@ -1027,22 +1028,22 @@ class Filter(django_filters.FilterSet): method="domain_filter", label="Domain", help_text=( - "Filter events by the host of their `site` parameter, e.g. " - "`www.nifc.gov` or `https://www.nifc.gov`." + "Filter events by the origin of their `site` parameter, e.g. " + "`https://www.nifc.gov`." ), ) def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument - host = domain_to_host(value) - if host is None: + origin = domain_to_origin(value) + if origin is None: return queryset.none() - # filter on has_key + the SiteHost expression so the partial - # `addonevent_site_host_idx` index is usable + # filter on has_key + the SiteOrigin expression so the partial + # `addonevent_site_origin_idx` index is usable return ( queryset.filter(parameters__has_key="site") - .annotate(_site_host=SiteHost("parameters")) - .filter(_site_host=host) + .annotate(_site_origin=SiteOrigin("parameters")) + .filter(_site_origin=origin) ) class Meta: From 7153fa0b83a24a9128f69fde392f429934fe1a9b Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Mon, 8 Jun 2026 15:42:38 -0400 Subject: [PATCH 4/6] Fix indexing on site lookup --- ...vent_addonevent_param_site_idx_and_more.py | 29 +++++++++++++++++++ documentcloud/addons/models.py | 8 +++-- documentcloud/addons/views.py | 24 ++++++++++++--- 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py diff --git a/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py b/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py new file mode 100644 index 00000000..aab19125 --- /dev/null +++ b/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.2 on 2026-06-08 19:30 + +from django.db import migrations, models +import django.db.models.fields.json +import django.db.models.functions.text + + +class Migration(migrations.Migration): + + dependencies = [ + ("addons", "0030_addonevent_addonevent_site_origin_idx"), + ] + + operations = [ + migrations.RemoveIndex( + model_name="addonevent", + name="addonevent_param_site_idx", + ), + migrations.AddIndex( + model_name="addonevent", + index=models.Index( + django.db.models.functions.text.Upper( + django.db.models.fields.json.KeyTextTransform("site", "parameters") + ), + condition=models.Q(("parameters__has_key", "site")), + name="addonevent_param_site_idx", + ), + ), + ] diff --git a/documentcloud/addons/models.py b/documentcloud/addons/models.py index 1a2b5187..0af4ca61 100644 --- a/documentcloud/addons/models.py +++ b/documentcloud/addons/models.py @@ -2,7 +2,9 @@ from django.conf import settings from django.core.cache import cache from django.db import models, transaction -from django.db.models import F, Func, Q +from django.db.models import Func, Q +from django.db.models.fields.json import KeyTextTransform +from django.db.models.functions import Upper from django.utils.translation import gettext_lazy as _ # Standard Library @@ -594,7 +596,9 @@ class AddOnEvent(models.Model): class Meta: indexes = [ models.Index( - F("parameters__site"), + # matches the `UPPER(parameters ->> 'site')` the `site` filter's + # iexact lookup emits, so a case-insensitive match can use it + Upper(KeyTextTransform("site", "parameters")), name="addonevent_param_site_idx", condition=Q(parameters__has_key="site"), ), diff --git a/documentcloud/addons/views.py b/documentcloud/addons/views.py index cb5134f2..375dd03b 100644 --- a/documentcloud/addons/views.py +++ b/documentcloud/addons/views.py @@ -756,8 +756,7 @@ class Filter(django_filters.FilterSet): ) dismissed = django_filters.BooleanFilter(help_text="Was this run dismissed?") site = django_filters.CharFilter( - field_name="event__parameters__site", - lookup_expr="iexact", + method="site_filter", label="Site", help_text="Filter runs by the `site` value in the event's parameters.", ) @@ -776,6 +775,15 @@ class Filter(django_filters.FilterSet): help_text="Filter runs by their progress message.", ) + def site_filter(self, queryset, name, value): + # pylint: disable=unused-argument + # the has_key clause lets the partial `addonevent_param_site_idx` + # index on AddOnEvent serve the case-insensitive match + return queryset.filter( + event__parameters__has_key="site", + event__parameters__site__iexact=value, + ) + def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument origin = domain_to_origin(value) @@ -1019,8 +1027,7 @@ class Filter(django_filters.FilterSet): help_text="Filter events by a specific add-on ID.", ) site = django_filters.CharFilter( - field_name="parameters__site", - lookup_expr="iexact", + method="site_filter", label="Site", help_text="Filter events by the `site` value in their parameters.", ) @@ -1033,6 +1040,15 @@ class Filter(django_filters.FilterSet): ), ) + def site_filter(self, queryset, name, value): + # pylint: disable=unused-argument + # the has_key clause lets the partial `addonevent_param_site_idx` + # index serve the case-insensitive match + return queryset.filter( + parameters__has_key="site", + parameters__site__iexact=value, + ) + def domain_filter(self, queryset, name, value): # pylint: disable=unused-argument origin = domain_to_origin(value) From 88a1c0c9d82ba067cba17c63d3c1db96caa7ea84 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Mon, 8 Jun 2026 15:48:58 -0400 Subject: [PATCH 5/6] Add indexes concurrently --- .../0030_addonevent_addonevent_site_origin_idx.py | 5 ++++- ...ve_addonevent_addonevent_param_site_idx_and_more.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py index 08af6c0f..77af7ed4 100644 --- a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py +++ b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py @@ -1,17 +1,20 @@ # Generated by Django 4.2.2 on 2026-06-08 19:10 +from django.contrib.postgres.operations import AddIndexConcurrently from django.db import migrations, models import documentcloud.addons.models class Migration(migrations.Migration): + atomic = False + dependencies = [ ("addons", "0029_addonrun_data"), ] operations = [ - migrations.AddIndex( + AddIndexConcurrently( model_name="addonevent", index=models.Index( documentcloud.addons.models.SiteOrigin("parameters"), diff --git a/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py b/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py index aab19125..82139ac9 100644 --- a/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py +++ b/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py @@ -1,5 +1,9 @@ # Generated by Django 4.2.2 on 2026-06-08 19:30 +from django.contrib.postgres.operations import ( + AddIndexConcurrently, + RemoveIndexConcurrently, +) from django.db import migrations, models import django.db.models.fields.json import django.db.models.functions.text @@ -7,16 +11,18 @@ class Migration(migrations.Migration): + atomic = False + dependencies = [ ("addons", "0030_addonevent_addonevent_site_origin_idx"), ] operations = [ - migrations.RemoveIndex( + RemoveIndexConcurrently( model_name="addonevent", name="addonevent_param_site_idx", ), - migrations.AddIndex( + AddIndexConcurrently( model_name="addonevent", index=models.Index( django.db.models.functions.text.Upper( From 81c76b3cd13ab9c76ea08c021de50acf9c5b1ef8 Mon Sep 17 00:00:00 2001 From: Chris Amico Date: Mon, 8 Jun 2026 16:02:27 -0400 Subject: [PATCH 6/6] one migration not two --- ...0_addonevent_addonevent_site_origin_idx.py | 25 ------------------- ...ore.py => 0030_addonevent_site_indexes.py} | 16 ++++++++++-- 2 files changed, 14 insertions(+), 27 deletions(-) delete mode 100644 documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py rename documentcloud/addons/migrations/{0031_remove_addonevent_addonevent_param_site_idx_and_more.py => 0030_addonevent_site_indexes.py} (58%) diff --git a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py b/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py deleted file mode 100644 index 77af7ed4..00000000 --- a/documentcloud/addons/migrations/0030_addonevent_addonevent_site_origin_idx.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 4.2.2 on 2026-06-08 19:10 - -from django.contrib.postgres.operations import AddIndexConcurrently -from django.db import migrations, models -import documentcloud.addons.models - - -class Migration(migrations.Migration): - - atomic = False - - dependencies = [ - ("addons", "0029_addonrun_data"), - ] - - operations = [ - AddIndexConcurrently( - model_name="addonevent", - index=models.Index( - documentcloud.addons.models.SiteOrigin("parameters"), - condition=models.Q(("parameters__has_key", "site")), - name="addonevent_site_origin_idx", - ), - ), - ] diff --git a/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py b/documentcloud/addons/migrations/0030_addonevent_site_indexes.py similarity index 58% rename from documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py rename to documentcloud/addons/migrations/0030_addonevent_site_indexes.py index 82139ac9..3cd8725a 100644 --- a/documentcloud/addons/migrations/0031_remove_addonevent_addonevent_param_site_idx_and_more.py +++ b/documentcloud/addons/migrations/0030_addonevent_site_indexes.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.2 on 2026-06-08 19:30 +# Generated by Django 4.2.2 on 2026-06-08 from django.contrib.postgres.operations import ( AddIndexConcurrently, @@ -7,6 +7,7 @@ from django.db import migrations, models import django.db.models.fields.json import django.db.models.functions.text +import documentcloud.addons.models class Migration(migrations.Migration): @@ -14,10 +15,21 @@ class Migration(migrations.Migration): atomic = False dependencies = [ - ("addons", "0030_addonevent_addonevent_site_origin_idx"), + ("addons", "0029_addonrun_data"), ] operations = [ + # index the `site` origin (scheme + host) for the `domain` filter + AddIndexConcurrently( + model_name="addonevent", + index=models.Index( + documentcloud.addons.models.SiteOrigin("parameters"), + condition=models.Q(("parameters__has_key", "site")), + name="addonevent_site_origin_idx", + ), + ), + # rebuild the `site` index as UPPER(...) so the case-insensitive + # (iexact) `site` filter can use it RemoveIndexConcurrently( model_name="addonevent", name="addonevent_param_site_idx",