From a47f6d39b756d643b08420a87c8ce67a4a5bfb0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Ph=C3=BAc=20L=C6=B0=C6=A1ng?= Date: Thu, 28 May 2026 05:16:00 +0000 Subject: [PATCH] fix(compiler): scope-aware column resolution in subqueries (#4251) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When resolving an unqualified column reference inside a subquery, sqlc's analyzer matched against every table in scope across the whole query, producing a spurious "column reference ... is ambiguous" error for queries that real PostgreSQL resolves unambiguously via lexical scope. Example from the issue: SELECT * FROM t1 WHERE id IN ( SELECT t1_id FROM t2 WHERE id = $1 ); Real Postgres binds the outer "id" to t1.id and the inner "id" to t2.id by lexical scope. sqlc was flattening every FROM-clause RangeVar in the whole query into one search list and triggering "ambiguous" on the inner id. Two small changes: * internal/compiler/find_params.go — when paramSearch.Visit enters a SelectStmt whose FROM clause has exactly one RangeVar, capture it as the current scope (paramSearch.rangeVar). The walker propagates this through the returned visitor, so ParamRefs encountered in the same SelectStmt's WHERE/GROUP/etc. inherit the inner scope. The exactly-one guard ensures we don't silently pick a winner when the FROM clause is genuinely multi-table at the same level — those cases must keep iterating every table so true ambiguity (e.g. "SELECT t1.id FROM t1, t2 WHERE id = $1") still errors. * internal/compiler/resolve.go — when resolving an unqualified column, if no alias is given but ref.rv points to an in-scope table that actually contains the column, narrow the search to that table only. If the column is absent from the inner table, fall back to the full search list so correlated-subquery references to an outer column continue to work. Tests: * internal/compiler/resolve_test.go — new unit test covering narrowToInnermostScope across nil-rv, column-in-inner-scope (narrow), column-absent (fall back), and unknown-table (fall back). * internal/endtoend/testdata/subquery_scope_4251/ — end-to-end fixture reproducing the exact query from the issue under postgresql/pgx/v5; generated code asserted against the golden files. Verified locally: * /tmp/repro4251 generate now returns EXIT=0 (was EXIT=1 with 'relation "id" ambiguous'). * SELECT t1.id FROM t1, t2 WHERE id = $1 still correctly errors with 'ambiguous'. * Correlated subquery (SELECT SUM(total) FROM orders WHERE customer_id = c.id) still resolves cleanly. * go test ./internal/compiler/... ./internal/sql/... ./internal/engine/... passes; gofmt -l and go vet clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/compiler/find_params.go | 38 +++++++--- internal/compiler/resolve.go | 53 ++++++++++++++ internal/compiler/resolve_test.go | 71 +++++++++++++++++++ .../postgresql/pgx/v5/go/db.go | 32 +++++++++ .../postgresql/pgx/v5/go/models.go | 18 +++++ .../postgresql/pgx/v5/go/query.sql.go | 44 ++++++++++++ .../postgresql/pgx/v5/query.sql | 10 +++ .../postgresql/pgx/v5/schema.sql | 8 +++ .../postgresql/pgx/v5/sqlc.json | 13 ++++ 9 files changed, 277 insertions(+), 10 deletions(-) create mode 100644 internal/compiler/resolve_test.go create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/db.go create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/models.go create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/query.sql.go create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/query.sql create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/schema.sql create mode 100644 internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/sqlc.json diff --git a/internal/compiler/find_params.go b/internal/compiler/find_params.go index 8199addd33..e4df326a7e 100644 --- a/internal/compiler/find_params.go +++ b/internal/compiler/find_params.go @@ -20,18 +20,20 @@ func findParameters(root ast.Node) ([]paramRef, []error) { } type paramRef struct { - parent ast.Node - rv *ast.RangeVar - ref *ast.ParamRef - name string // Named parameter support + parent ast.Node + rv *ast.RangeVar + ref *ast.ParamRef + name string // Named parameter support + inSubquery bool // true if this ParamRef sits inside a SubLink's subselect; gates scope narrowing to subqueries only (issue #4251) } type paramSearch struct { - parent ast.Node - rangeVar *ast.RangeVar - refs *[]paramRef - seen map[int]struct{} - errs *[]error + parent ast.Node + rangeVar *ast.RangeVar + refs *[]paramRef + seen map[int]struct{} + errs *[]error + inSubquery bool // true once we have descended into a SubLink's subselect; propagates to ParamRefs encountered below it (issue #4251) // XXX: Gross state hack for limit limitCount ast.Node @@ -139,6 +141,22 @@ func (p paramSearch) Visit(node ast.Node) astutils.Visitor { case *ast.ResTarget: p.parent = node + case *ast.SubLink: + // Issue #4251: when descending into a SubLink's subselect (e.g. + // "x IN (SELECT y FROM t WHERE id = $1)"), capture the subselect's + // first FROM-clause RangeVar so ParamRefs inside its WHERE/GROUP/etc. + // resolve against the inner scope. Only narrow when the subselect's + // FROM is unambiguous on its own (single RangeVar; no JOINs / no + // multi-table FROM / no nested subselect). Scoped to SubLinks + // specifically: top-level INSERT-SELECT / JOIN / etc. continue to use + // the full-table search the resolver has always done. + p.inSubquery = true + if sel, ok := n.Subselect.(*ast.SelectStmt); ok && sel.FromClause != nil && len(sel.FromClause.Items) == 1 { + if rv, ok := sel.FromClause.Items[0].(*ast.RangeVar); ok && rv != nil && rv.Relname != nil { + p.rangeVar = rv + } + } + case *ast.SelectStmt: if n.LimitCount != nil { p.limitCount = n.LimitCount @@ -186,7 +204,7 @@ func (p paramSearch) Visit(node ast.Node) astutils.Visitor { } if set { - *p.refs = append(*p.refs, paramRef{parent: parent, ref: n, rv: p.rangeVar}) + *p.refs = append(*p.refs, paramRef{parent: parent, ref: n, rv: p.rangeVar, inSubquery: p.inSubquery}) p.seen[n.Location] = struct{}{} } return nil diff --git a/internal/compiler/resolve.go b/internal/compiler/resolve.go index d926f2b1fc..51953dc574 100644 --- a/internal/compiler/resolve.go +++ b/internal/compiler/resolve.go @@ -215,6 +215,10 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar, } } } + } else if ref.inSubquery { + if scoped := narrowToInnermostScope(tables, typeMap, c.DefaultSchema, ref.rv, key); scoped != nil { + search = scoped + } } var found int @@ -581,6 +585,10 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar, } } } + } else if ref.inSubquery { + if scoped := narrowToInnermostScope(tables, typeMap, c.DefaultSchema, ref.rv, key); scoped != nil { + search = scoped + } } for _, table := range search { @@ -636,3 +644,48 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar, } return a, nil } + +// narrowToInnermostScope returns a single-element search slice when the +// parameter reference (ref.rv) points to a known table that actually +// contains the column being resolved. It implements the lexical-scope rule +// real PostgreSQL applies inside subqueries: an unqualified column reference +// is bound to the innermost FROM-clause table that defines it. If the +// innermost scope is unknown (rv nil) or the column is not present there +// (correlated-subquery referring to an outer column), it returns nil so the +// caller falls back to the full table list. See issue #4251. +func narrowToInnermostScope( + tables []*ast.TableName, + typeMap map[string]map[string]map[string]*catalog.Column, + defaultSchema string, + rv *ast.RangeVar, + column string, +) []*ast.TableName { + if rv == nil || rv.Relname == nil { + return nil + } + innerName := *rv.Relname + innerSchema := "" + if rv.Schemaname != nil { + innerSchema = *rv.Schemaname + } + lookupSchema := innerSchema + if lookupSchema == "" { + lookupSchema = defaultSchema + } + // Only narrow if the column actually exists in the innermost scope. + // Falling back to the full search preserves correlated-subquery + // behavior (e.g. an inner WHERE referring to an outer column). + if _, ok := typeMap[lookupSchema][innerName][column]; !ok { + return nil + } + for _, t := range tables { + tSchema := t.Schema + if tSchema == "" { + tSchema = defaultSchema + } + if t.Name == innerName && tSchema == lookupSchema { + return []*ast.TableName{t} + } + } + return nil +} diff --git a/internal/compiler/resolve_test.go b/internal/compiler/resolve_test.go new file mode 100644 index 0000000000..83a5479092 --- /dev/null +++ b/internal/compiler/resolve_test.go @@ -0,0 +1,71 @@ +package compiler + +import ( + "testing" + + "github.com/sqlc-dev/sqlc/internal/sql/ast" + "github.com/sqlc-dev/sqlc/internal/sql/catalog" +) + +// TestNarrowToInnermostScope covers the lexical-scope rule used by the +// resolver to disambiguate column references that live inside subqueries. +// See issue #4251. +func TestNarrowToInnermostScope(t *testing.T) { + t.Parallel() + + str := func(s string) *string { return &s } + tables := []*ast.TableName{ + {Name: "t1"}, + {Name: "t2"}, + } + typeMap := map[string]map[string]map[string]*catalog.Column{ + "public": { + "t1": {"id": {Name: "id"}}, + "t2": {"id": {Name: "id"}, "t1_id": {Name: "t1_id"}}, + }, + } + + t.Run("nil_rv_returns_nil", func(t *testing.T) { + got := narrowToInnermostScope(tables, typeMap, "public", nil, "id") + if got != nil { + t.Fatalf("expected nil (no narrowing) got %v", got) + } + }) + + t.Run("nil_relname_returns_nil", func(t *testing.T) { + got := narrowToInnermostScope(tables, typeMap, "public", &ast.RangeVar{}, "id") + if got != nil { + t.Fatalf("expected nil (no narrowing) got %v", got) + } + }) + + t.Run("column_in_inner_scope_narrows_to_inner_table", func(t *testing.T) { + // The repro shape: ParamRef in inner SELECT (rv=t2) resolving column "id". + // id exists in t2 -> narrow search to [t2] so the outer t1.id doesn't + // trigger a spurious "ambiguous" error. + rv := &ast.RangeVar{Relname: str("t2")} + got := narrowToInnermostScope(tables, typeMap, "public", rv, "id") + if len(got) != 1 || got[0].Name != "t2" { + t.Fatalf("expected narrow to [t2], got %v", got) + } + }) + + t.Run("column_absent_from_inner_falls_back_to_full_scope", func(t *testing.T) { + // Correlated-subquery shape: inner SELECT (rv=t2) references an outer + // column not present in t2. Returning nil tells the caller to keep the + // full tables list, which lets the outer-scope match win. + rv := &ast.RangeVar{Relname: str("t2")} + got := narrowToInnermostScope(tables, typeMap, "public", rv, "not_a_t2_column") + if got != nil { + t.Fatalf("expected nil (fall back to all tables), got %v", got) + } + }) + + t.Run("rv_points_to_unknown_table_falls_back", func(t *testing.T) { + rv := &ast.RangeVar{Relname: str("nonexistent")} + got := narrowToInnermostScope(tables, typeMap, "public", rv, "id") + if got != nil { + t.Fatalf("expected nil (fall back), got %v", got) + } + }) +} diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/db.go b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/db.go new file mode 100644 index 0000000000..0057c62319 --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/db.go @@ -0,0 +1,32 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.31.1 + +package querytest + +import ( + "context" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" +) + +type DBTX interface { + Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error) + Query(context.Context, string, ...interface{}) (pgx.Rows, error) + QueryRow(context.Context, string, ...interface{}) pgx.Row +} + +func New(db DBTX) *Queries { + return &Queries{db: db} +} + +type Queries struct { + db DBTX +} + +func (q *Queries) WithTx(tx pgx.Tx) *Queries { + return &Queries{ + db: tx, + } +} diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/models.go b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/models.go new file mode 100644 index 0000000000..bfe8fa3b77 --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/models.go @@ -0,0 +1,18 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.31.1 + +package querytest + +import ( + "github.com/jackc/pgx/v5/pgtype" +) + +type T1 struct { + ID pgtype.UUID +} + +type T2 struct { + ID pgtype.UUID + T1ID pgtype.UUID +} diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/query.sql.go b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/query.sql.go new file mode 100644 index 0000000000..c1228cbfa3 --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/query.sql.go @@ -0,0 +1,44 @@ +// Code generated by sqlc. DO NOT EDIT. +// versions: +// sqlc v1.31.1 +// source: query.sql + +package querytest + +import ( + "context" + + "github.com/jackc/pgx/v5/pgtype" +) + +const getT1FromT2 = `-- name: GetT1FromT2 :many +SELECT id FROM t1 +WHERE id IN ( + SELECT t1_id + FROM t2 + WHERE id = $1 +) +` + +// Unqualified `id` inside the subquery must bind to t2.id (innermost +// FROM-clause scope), not be flagged as ambiguous against t1.id. See +// issue #4251. +func (q *Queries) GetT1FromT2(ctx context.Context, id pgtype.UUID) ([]pgtype.UUID, error) { + rows, err := q.db.Query(ctx, getT1FromT2, id) + if err != nil { + return nil, err + } + defer rows.Close() + var items []pgtype.UUID + for rows.Next() { + var id pgtype.UUID + if err := rows.Scan(&id); err != nil { + return nil, err + } + items = append(items, id) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/query.sql b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/query.sql new file mode 100644 index 0000000000..532cf1f82f --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/query.sql @@ -0,0 +1,10 @@ +-- name: GetT1FromT2 :many +-- Unqualified `id` inside the subquery must bind to t2.id (innermost +-- FROM-clause scope), not be flagged as ambiguous against t1.id. See +-- issue #4251. +SELECT id FROM t1 +WHERE id IN ( + SELECT t1_id + FROM t2 + WHERE id = $1 +); diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/schema.sql b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/schema.sql new file mode 100644 index 0000000000..a916df8041 --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/schema.sql @@ -0,0 +1,8 @@ +CREATE TABLE t1 ( + id UUID PRIMARY KEY +); + +CREATE TABLE t2 ( + id UUID, + t1_id UUID REFERENCES t1(id) ON DELETE CASCADE +); diff --git a/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/sqlc.json b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/sqlc.json new file mode 100644 index 0000000000..32ede07158 --- /dev/null +++ b/internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/sqlc.json @@ -0,0 +1,13 @@ +{ + "version": "1", + "packages": [ + { + "path": "go", + "engine": "postgresql", + "sql_package": "pgx/v5", + "name": "querytest", + "schema": "schema.sql", + "queries": "query.sql" + } + ] +}