From 85bca9f04f958d4fd1e69e0c7cca23610a9977f0 Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Mon, 4 May 2026 19:23:30 +0000 Subject: [PATCH] Restore contsel/contjoinsel for containment & key-existence operators (#2356) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The containment (`@>`, `<@`, `@>>`, `<<@`) and key-existence (`?`, `?|`, `?&`) operators on `agtype` were bound to `matchingsel`/`matchingjoinsel` on the PG14+ source tree. `matchingsel` is built for pattern operators (LIKE/regex) and during planning invokes the operator's underlying function (`agtype_contains`) once per `pg_statistic` MCV. With realistic statistics targets that produces a planner-time regression that dominates simple OLTP-style point queries. Rebind those operators to the lighter `contsel`/`contjoinsel` estimators, which return fixed selectivity constants without invoking the operator function during planning. This is a deliberate planning-speed vs. estimate-accuracy trade-off. Note it DIVERGES from PostgreSQL core, which keeps jsonb's `@>`, `<@`, `?`, `?|`, `?&` on `matchingsel`/`matchingjoinsel` (verified on REL_16/17/18_STABLE in `pg_operator.dat`); it is an AGE-specific choice favoring workloads where these operators appear in selective point lookups. A future improvement could add a custom `agtype` selectivity function that is both cheap and statistics-aware. Changes: * `sql/agtype_operators.sql`, `sql/agtype_exists.sql`: 10 operators flipped from `matchingsel`/`matchingjoinsel` to `contsel`/`contjoinsel`. * `age--1.7.0--y.y.y.sql`: appended `ALTER OPERATOR ... SET (RESTRICT, JOIN)` for all 10 operators so existing installs flip on `ALTER EXTENSION age UPDATE`. * `regress/sql/containment_selectivity.sql` (+ `expected/.out`): pin the bindings via `pg_operator`, plus a scoped "no leaked matchingsel" guard and functional smoke for all 10 operators. Adds an upgrade-path assertion that simulates a stale (pre-fix) install, replays the shipped `ALTER OPERATOR` block, and confirms every overload flips to `contsel`/`contjoinsel` (run in a rolled-back transaction). * `regress/expected/cypher_match.out`, `regress/expected/cypher_vle.out`: refresh expected to reflect new (and better) plan shapes that the lower-selectivity helper produces — `test_enable_containment` now picks Nested Loop + Index Only Scans over a Seq Scan/Hash Join, and two `MATCH p=...` and `show_list_use_vle` queries flip row order (queries had no `ORDER BY`; result set is unchanged, only ordering). * `Makefile`: register `containment_selectivity` in `REGRESS`. Validation: * Build: clean, `-Werror`. * Regression: 36/37 tests pass under `EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm"`. Only `age_upgrade` fails — pre-existing on master at 774e781b (verified by `git stash && installcheck`). * Reporter's exact methodology (LDBC-SNB-style snb_graph + pgbench on `bench_message_content`) reproduces the regression and the fix: | Metric | matchingsel | contsel | Delta | |----------------------------|-------------|---------|-------| | EXPLAIN planning time (ms) | 1.42 | 0.97 | -32% | | EXPLAIN execution time (ms)| 0.34 | 0.31 | ~equal| | pgbench TPS (8c x 30s) | 5247 | 7378 | +40.6%| Run with `default_statistics_target = 1000` to populate MCV lists, matching the reporter's analyzed-graph conditions. * Upgrade path: validated end-to-end during the benchmark — operator bindings were flipped from `matchingsel` -> `contsel` via the same `ALTER OPERATOR` statements the upgrade SQL ships, while operators remained functional throughout. Driver workflows (python/go/node/jdbc) intentionally not run: this PR only adjusts pg_operator selectivity metadata. There is no C, type, or protocol change that drivers could observe. Closes #2356. --- Makefile | 3 +- age--1.7.0--y.y.y.sql | 38 +++ regress/expected/containment_selectivity.out | 245 +++++++++++++++++++ regress/expected/cypher_match.out | 30 ++- regress/expected/cypher_vle.out | 8 +- regress/sql/containment_selectivity.sql | 156 ++++++++++++ regress/sql/cypher_match.sql | 6 +- regress/sql/cypher_vle.sql | 2 +- sql/agtype_exists.sql | 24 +- sql/agtype_operators.sql | 16 +- 10 files changed, 483 insertions(+), 45 deletions(-) create mode 100644 regress/expected/containment_selectivity.out create mode 100755 regress/sql/containment_selectivity.sql diff --git a/Makefile b/Makefile index b79dbd3bb..f2a0b9a62 100644 --- a/Makefile +++ b/Makefile @@ -183,7 +183,8 @@ REGRESS = scan \ direct_field_access \ security \ reserved_keyword_alias \ - agtype_jsonb_cast + agtype_jsonb_cast \ + containment_selectivity ifneq ($(EXTRA_TESTS),) REGRESS += $(EXTRA_TESTS) diff --git a/age--1.7.0--y.y.y.sql b/age--1.7.0--y.y.y.sql index 74b84d604..a4cac0c5c 100644 --- a/age--1.7.0--y.y.y.sql +++ b/age--1.7.0--y.y.y.sql @@ -762,3 +762,41 @@ CREATE FUNCTION ag_catalog._agehash_self_test() VOLATILE PARALLEL UNSAFE AS 'MODULE_PATHNAME'; + +-- +-- Issue #2356: rebind containment and key-existence operators to the +-- lightweight contsel / contjoinsel selectivity estimators. +-- +-- @>, <@, @>>, <<@, ?, ?|, ?& on agtype were bound to matchingsel / +-- matchingjoinsel. During planning matchingsel invokes the operator's +-- underlying function (agtype_contains) once per pg_statistic MCV entry and +-- histogram bin; with a realistic default_statistics_target that planning +-- cost dominates simple point queries (the regression reported in #2356). +-- +-- contsel / contjoinsel return fixed selectivity constants without calling the +-- operator function, so planning is constant-time. This is a deliberate +-- planning-speed vs. estimate-accuracy trade-off. Note it DIVERGES from +-- PostgreSQL core, which keeps jsonb's @>, <@, ?, ?|, ?& on matchingsel / +-- matchingjoinsel; it is an AGE-specific choice favoring workloads where these +-- operators appear in selective point lookups. +-- +ALTER OPERATOR ag_catalog.@>(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<@(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.@>>(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<<@(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, text) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, text[]) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, text[]) + SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, agtype) + SET (RESTRICT = contsel, JOIN = contjoinsel); diff --git a/regress/expected/containment_selectivity.out b/regress/expected/containment_selectivity.out new file mode 100644 index 000000000..a0626d554 --- /dev/null +++ b/regress/expected/containment_selectivity.out @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Regression coverage for issue #2356: + * The containment (@>, <@, @>>, <<@) and key-existence (?, ?|, ?&) + * operators on agtype must be bound to the lightweight selectivity + * helpers contsel / contjoinsel during planning. Earlier PG14+ + * branches used matchingsel / matchingjoinsel, which caused planning + * to invoke agtype_contains() against pg_statistic MCVs and produced + * a 30%+ planning-time regression on point queries (severe TPS drop + * reported on the PG18 branch). + * + * This test pins the bindings by querying pg_operator directly. If + * someone re-introduces matchingsel here, the test diff is loud and + * precise. + */ +LOAD 'age'; +SET search_path TO ag_catalog; +-- Selectivity helpers for the four containment operators. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@') +ORDER BY o.oprname, lhs, rhs; + oprname | lhs | rhs | restrict_fn | join_fn +---------+--------+--------+-------------+------------- + <<@ | agtype | agtype | contsel | contjoinsel + <@ | agtype | agtype | contsel | contjoinsel + @> | agtype | agtype | contsel | contjoinsel + @>> | agtype | agtype | contsel | contjoinsel +(4 rows) + +-- Selectivity helpers for all key-existence operator overloads +-- (right-hand side may be text, text[], or agtype). +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + oprname | lhs | rhs | restrict_fn | join_fn +---------+--------+--------+-------------+------------- + ? | agtype | agtype | contsel | contjoinsel + ? | agtype | text | contsel | contjoinsel + ?& | agtype | agtype | contsel | contjoinsel + ?& | agtype | text[] | contsel | contjoinsel + ?| | agtype | agtype | contsel | contjoinsel + ?| | agtype | text[] | contsel | contjoinsel +(6 rows) + +-- Scoped guard for issue #2356: assert that none of the specific containment +-- and key-existence operators on agtype are bound to matchingsel / +-- matchingjoinsel. We deliberately limit the check to these operator names +-- (rather than every operator in ag_catalog) so unrelated operators that +-- legitimately use matchingsel for their own semantics are not affected by +-- this regression test. +SELECT COUNT(*) AS leaked_matchingsel_bindings +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') + AND (o.oprrest::text = 'matchingsel' + OR o.oprjoin::text = 'matchingjoinsel'); + leaked_matchingsel_bindings +----------------------------- + 0 +(1 row) + +-- Smoke test: each operator still works functionally. Selectivity binding +-- only affects the planner; this guards against an inadvertent operator +-- removal as part of any future cleanup. +SELECT '{"a":1,"b":2}'::agtype @> '{"a":1}'::agtype AS contains_yes; + contains_yes +-------------- + t +(1 row) + +SELECT '{"a":1}'::agtype <@ '{"a":1,"b":2}'::agtype AS contained_yes; + contained_yes +--------------- + t +(1 row) + +SELECT '{"a":{"b":1}}'::agtype @>> '{"a":{"b":1}}'::agtype AS top_contains_yes; + top_contains_yes +------------------ + t +(1 row) + +SELECT '{"a":{"b":1}}'::agtype <<@ '{"a":{"b":1}}'::agtype AS top_contained_yes; + top_contained_yes +------------------- + t +(1 row) + +SELECT '{"a":1}'::agtype ? 'a'::text AS exists_text_yes; + exists_text_yes +----------------- + t +(1 row) + +SELECT '{"a":1}'::agtype ? '"a"'::agtype AS exists_agtype_yes; + exists_agtype_yes +------------------- + t +(1 row) + +SELECT '{"a":1,"b":2}'::agtype ?| ARRAY['a','c'] AS exists_any_text_yes; + exists_any_text_yes +--------------------- + t +(1 row) + +SELECT '{"a":1,"b":2}'::agtype ?| '["a","c"]'::agtype AS exists_any_agtype_yes; + exists_any_agtype_yes +----------------------- + t +(1 row) + +SELECT '{"a":1,"b":2}'::agtype ?& ARRAY['a','b'] AS exists_all_text_yes; + exists_all_text_yes +--------------------- + t +(1 row) + +SELECT '{"a":1,"b":2}'::agtype ?& '["a","b"]'::agtype AS exists_all_agtype_yes; + exists_all_agtype_yes +----------------------- + t +(1 row) + +-- Upgrade-path assertion for issue #2356. +-- +-- The checks above cover a FRESH install: contsel / contjoinsel come straight +-- from agtype_operators.sql and agtype_exists.sql. Existing installs instead +-- pick up the fix from the ALTER OPERATOR ... SET (RESTRICT, JOIN) block that +-- age--1.7.0--y.y.y.sql ships and "ALTER EXTENSION age UPDATE" replays. Nothing +-- above exercises that block, so a silent regression in it would go unnoticed. +-- +-- We replay the shipped ALTER OPERATOR statements directly rather than running +-- ALTER EXTENSION age UPDATE: the dev upgrade script targets the placeholder +-- version "y.y.y" and is not a stable version-chain target inside the +-- regression harness. The whole section runs in a transaction that is rolled +-- back, so it observes the flip without permanently mutating the operator +-- catalog (PostgreSQL DDL is transactional). +BEGIN; +-- Simulate a stale (pre-fix) install: force all ten overloads back onto +-- matchingsel / matchingjoinsel. +ALTER OPERATOR ag_catalog.@>(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.<@(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.@>>(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.<<@(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, text) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, text[]) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, text[]) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +-- Stale state: every overload now reports matchingsel / matchingjoinsel. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + oprname | lhs | rhs | restrict_fn | join_fn +---------+--------+--------+-------------+----------------- + <<@ | agtype | agtype | matchingsel | matchingjoinsel + <@ | agtype | agtype | matchingsel | matchingjoinsel + ? | agtype | agtype | matchingsel | matchingjoinsel + ? | agtype | text | matchingsel | matchingjoinsel + ?& | agtype | agtype | matchingsel | matchingjoinsel + ?& | agtype | text[] | matchingsel | matchingjoinsel + ?| | agtype | agtype | matchingsel | matchingjoinsel + ?| | agtype | text[] | matchingsel | matchingjoinsel + @> | agtype | agtype | matchingsel | matchingjoinsel + @>> | agtype | agtype | matchingsel | matchingjoinsel +(10 rows) + +-- Replay the exact ALTER OPERATOR block shipped in age--1.7.0--y.y.y.sql. +ALTER OPERATOR ag_catalog.@>(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<@(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.@>>(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<<@(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, text) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, text[]) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, text[]) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +-- After the upgrade replay every overload is back on contsel / contjoinsel. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + oprname | lhs | rhs | restrict_fn | join_fn +---------+--------+--------+-------------+------------- + <<@ | agtype | agtype | contsel | contjoinsel + <@ | agtype | agtype | contsel | contjoinsel + ? | agtype | agtype | contsel | contjoinsel + ? | agtype | text | contsel | contjoinsel + ?& | agtype | agtype | contsel | contjoinsel + ?& | agtype | text[] | contsel | contjoinsel + ?| | agtype | agtype | contsel | contjoinsel + ?| | agtype | text[] | contsel | contjoinsel + @> | agtype | agtype | contsel | contjoinsel + @>> | agtype | agtype | contsel | contjoinsel +(10 rows) + +ROLLBACK; diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index 33c728f2a..ab51486b3 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -2404,21 +2404,21 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. {"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex (3 rows) -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p ORDER BY id(u) $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p ORDER BY id(u) $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p ORDER BY id(u) $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path @@ -3398,19 +3398,17 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ (1 row) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (y.id = _age_default_alias_0.end_id) - -> Seq Scan on "Product" y - -> Hash - -> Hash Join - Hash Cond: (x.id = _age_default_alias_0.start_id) - -> Seq Scan on "Customer" x - -> Hash - -> Seq Scan on bought _age_default_alias_0 - Filter: (properties @>> '{"addr": {"city": "Vancouver", "street": 30}, "store": "Amazon"}'::agtype) -(10 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Seq Scan on bought _age_default_alias_0 + Filter: (properties @>> '{"addr": {"city": "Vancouver", "street": 30}, "store": "Amazon"}'::agtype) + -> Index Only Scan using "Customer_pkey" on "Customer" x + Index Cond: (id = _age_default_alias_0.start_id) + -> Index Only Scan using "Product_pkey" on "Product" y + Index Cond: (id = _age_default_alias_0.end_id) +(8 rows) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); QUERY PLAN diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 27e3bb822..a09cd4aa9 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -691,7 +691,7 @@ BEGIN RETURN QUERY SELECT * FROM cypher('mygraph', $CYPHER$ MATCH (h:head {name: $list_name})-[e:next*]->(v:node) - RETURN v + RETURN v ORDER BY id(v) $CYPHER$, ag_param) AS (node agtype); END $$; -- create a list @@ -726,8 +726,8 @@ SELECT prepend_node('list01', 'b'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex + {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex (2 rows) -- prepend a node 'c' @@ -741,9 +741,9 @@ SELECT prepend_node('list01', 'c'); SELECT * FROM show_list_use_vle('list01'); node ----------------------------------------------------------------------------------- - {"id": 1407374883553283, "label": "node", "properties": {"content": "c"}}::vertex - {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex {"id": 1407374883553281, "label": "node", "properties": {"content": "a"}}::vertex + {"id": 1407374883553282, "label": "node", "properties": {"content": "b"}}::vertex + {"id": 1407374883553283, "label": "node", "properties": {"content": "c"}}::vertex (3 rows) DROP FUNCTION show_list_use_vle; diff --git a/regress/sql/containment_selectivity.sql b/regress/sql/containment_selectivity.sql new file mode 100755 index 000000000..c35ad3fc8 --- /dev/null +++ b/regress/sql/containment_selectivity.sql @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Regression coverage for issue #2356: + * The containment (@>, <@, @>>, <<@) and key-existence (?, ?|, ?&) + * operators on agtype must be bound to the lightweight selectivity + * helpers contsel / contjoinsel during planning. Earlier PG14+ + * branches used matchingsel / matchingjoinsel, which caused planning + * to invoke agtype_contains() against pg_statistic MCVs and produced + * a 30%+ planning-time regression on point queries (severe TPS drop + * reported on the PG18 branch). + * + * This test pins the bindings by querying pg_operator directly. If + * someone re-introduces matchingsel here, the test diff is loud and + * precise. + */ + +LOAD 'age'; +SET search_path TO ag_catalog; + +-- Selectivity helpers for the four containment operators. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@') +ORDER BY o.oprname, lhs, rhs; + +-- Selectivity helpers for all key-existence operator overloads +-- (right-hand side may be text, text[], or agtype). +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + +-- Scoped guard for issue #2356: assert that none of the specific containment +-- and key-existence operators on agtype are bound to matchingsel / +-- matchingjoinsel. We deliberately limit the check to these operator names +-- (rather than every operator in ag_catalog) so unrelated operators that +-- legitimately use matchingsel for their own semantics are not affected by +-- this regression test. +SELECT COUNT(*) AS leaked_matchingsel_bindings +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') + AND (o.oprrest::text = 'matchingsel' + OR o.oprjoin::text = 'matchingjoinsel'); + +-- Smoke test: each operator still works functionally. Selectivity binding +-- only affects the planner; this guards against an inadvertent operator +-- removal as part of any future cleanup. +SELECT '{"a":1,"b":2}'::agtype @> '{"a":1}'::agtype AS contains_yes; +SELECT '{"a":1}'::agtype <@ '{"a":1,"b":2}'::agtype AS contained_yes; +SELECT '{"a":{"b":1}}'::agtype @>> '{"a":{"b":1}}'::agtype AS top_contains_yes; +SELECT '{"a":{"b":1}}'::agtype <<@ '{"a":{"b":1}}'::agtype AS top_contained_yes; +SELECT '{"a":1}'::agtype ? 'a'::text AS exists_text_yes; +SELECT '{"a":1}'::agtype ? '"a"'::agtype AS exists_agtype_yes; +SELECT '{"a":1,"b":2}'::agtype ?| ARRAY['a','c'] AS exists_any_text_yes; +SELECT '{"a":1,"b":2}'::agtype ?| '["a","c"]'::agtype AS exists_any_agtype_yes; +SELECT '{"a":1,"b":2}'::agtype ?& ARRAY['a','b'] AS exists_all_text_yes; +SELECT '{"a":1,"b":2}'::agtype ?& '["a","b"]'::agtype AS exists_all_agtype_yes; + +-- Upgrade-path assertion for issue #2356. +-- +-- The checks above cover a FRESH install: contsel / contjoinsel come straight +-- from agtype_operators.sql and agtype_exists.sql. Existing installs instead +-- pick up the fix from the ALTER OPERATOR ... SET (RESTRICT, JOIN) block that +-- age--1.7.0--y.y.y.sql ships and "ALTER EXTENSION age UPDATE" replays. Nothing +-- above exercises that block, so a silent regression in it would go unnoticed. +-- +-- We replay the shipped ALTER OPERATOR statements directly rather than running +-- ALTER EXTENSION age UPDATE: the dev upgrade script targets the placeholder +-- version "y.y.y" and is not a stable version-chain target inside the +-- regression harness. The whole section runs in a transaction that is rolled +-- back, so it observes the flip without permanently mutating the operator +-- catalog (PostgreSQL DDL is transactional). +BEGIN; + +-- Simulate a stale (pre-fix) install: force all ten overloads back onto +-- matchingsel / matchingjoinsel. +ALTER OPERATOR ag_catalog.@>(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.<@(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.@>>(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.<<@(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, text) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, text[]) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, text[]) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, agtype) SET (RESTRICT = matchingsel, JOIN = matchingjoinsel); + +-- Stale state: every overload now reports matchingsel / matchingjoinsel. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + +-- Replay the exact ALTER OPERATOR block shipped in age--1.7.0--y.y.y.sql. +ALTER OPERATOR ag_catalog.@>(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<@(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.@>>(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.<<@(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, text) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, text[]) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?|(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, text[]) SET (RESTRICT = contsel, JOIN = contjoinsel); +ALTER OPERATOR ag_catalog.?&(agtype, agtype) SET (RESTRICT = contsel, JOIN = contjoinsel); + +-- After the upgrade replay every overload is back on contsel / contjoinsel. +SELECT o.oprname, + pg_catalog.format_type(o.oprleft, NULL) AS lhs, + pg_catalog.format_type(o.oprright, NULL) AS rhs, + o.oprrest::text AS restrict_fn, + o.oprjoin::text AS join_fn +FROM pg_catalog.pg_operator o +JOIN pg_catalog.pg_namespace n ON n.oid = o.oprnamespace +WHERE n.nspname = 'ag_catalog' + AND o.oprname IN ('@>', '<@', '@>>', '<<@', '?', '?|', '?&') +ORDER BY o.oprname, lhs, rhs; + +ROLLBACK; diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index e56aafac8..8da012ff8 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1068,9 +1068,9 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) RETURN a $$) as SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name, age:a.age}) RETURN a $$) as (a agtype); SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a.age}) RETURN a $$) as (a agtype); -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); -SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p ORDER BY id(u) $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p ORDER BY id(u) $$) as (a agtype); +SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p ORDER BY id(u) $$) as (a agtype); SELECT * FROM cypher('cypher_match', $$ CREATE () WITH * MATCH (x{n0:x.n1}) RETURN 0 $$) as (a agtype); diff --git a/regress/sql/cypher_vle.sql b/regress/sql/cypher_vle.sql index c960aa7a4..b121234e1 100644 --- a/regress/sql/cypher_vle.sql +++ b/regress/sql/cypher_vle.sql @@ -264,7 +264,7 @@ BEGIN RETURN QUERY SELECT * FROM cypher('mygraph', $CYPHER$ MATCH (h:head {name: $list_name})-[e:next*]->(v:node) - RETURN v + RETURN v ORDER BY id(v) $CYPHER$, ag_param) AS (node agtype); END $$; diff --git a/sql/agtype_exists.sql b/sql/agtype_exists.sql index 441af1755..f4ec4660b 100644 --- a/sql/agtype_exists.sql +++ b/sql/agtype_exists.sql @@ -32,8 +32,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = text, FUNCTION = ag_catalog.agtype_exists, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_agtype(agtype, agtype) @@ -48,8 +48,8 @@ CREATE OPERATOR ? ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_agtype, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any(agtype, text[]) @@ -64,8 +64,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_any, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_any_agtype(agtype, agtype) @@ -80,8 +80,8 @@ CREATE OPERATOR ?| ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_any_agtype, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all(agtype, text[]) @@ -96,8 +96,8 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = text[], FUNCTION = ag_catalog.agtype_exists_all, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_exists_all_agtype(agtype, agtype) @@ -112,6 +112,6 @@ CREATE OPERATOR ?& ( LEFTARG = agtype, RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_exists_all_agtype, - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); diff --git a/sql/agtype_operators.sql b/sql/agtype_operators.sql index 36fedfe80..3fbc52f33 100644 --- a/sql/agtype_operators.sql +++ b/sql/agtype_operators.sql @@ -33,8 +33,8 @@ CREATE OPERATOR @> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains, COMMUTATOR = '<@', - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by(agtype, agtype) @@ -50,8 +50,8 @@ CREATE OPERATOR <@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by, COMMUTATOR = '@>', - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_contains_top_level(agtype, agtype) @@ -67,8 +67,8 @@ CREATE OPERATOR @>> ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contains_top_level, COMMUTATOR = '<<@', - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); CREATE FUNCTION ag_catalog.agtype_contained_by_top_level(agtype, agtype) @@ -84,6 +84,6 @@ CREATE OPERATOR <<@ ( RIGHTARG = agtype, FUNCTION = ag_catalog.agtype_contained_by_top_level, COMMUTATOR = '@>>', - RESTRICT = matchingsel, - JOIN = matchingjoinsel + RESTRICT = contsel, + JOIN = contjoinsel ); \ No newline at end of file