From 39e6bfc894d4352779f23cceec84af6fa42ecf88 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 2 Jun 2026 14:09:28 +0000 Subject: [PATCH 1/3] Python: add shared-CFG AstSig adapter (AstNodeImpl) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preparatory refactor for the shared-CFG dataflow migration. Adds the adapter that mediates between the Python AST and the shared codeql.controlflow.ControlFlowGraph signature, plus the test suites that validate the new CFG directly against this adapter. The public facade is added in the following commit. Library additions: - semmle.python.controlflow.internal.AstNodeImpl — wraps Python's Stmt/Expr/Scope/Pattern and adds two synthetic kinds of node (BlockStmt for body slots, intermediate nodes for multi-operand boolean expressions) to satisfy the shared CFG signature. - lib/printCfgNew.ql — debug/visualisation query for the new CFG. - consistency-queries/CfgConsistency.ql — consistency query running the shared CFG's standard checks against Python. Test additions (all driven directly off AstNodeImpl): - ControlFlow/bindings/* — annotation-driven SSA-binding tests (annassign, compound, comprehension, decorated, except_handler, imports, match_pattern, parameters, simple, type_params, walrus_starred, with_stmt, dead_under_no_raise). - ControlFlow/evaluation-order/NewCfg*.ql — mirrors of the existing OldCfg evaluation-order self-validation suite, run against the new CFG via NewCfgImpl.qll. - Minor extensions to existing test_if.py / test_boolean.py + cosmetic .expected churn on a handful of OldCfg tests. No dataflow, SSA, or production query is migrated yet. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../ql/consistency-queries/CfgConsistency.ql | 2 + python/ql/lib/printCfgNew.ql | 45 + .../controlflow/internal/AstNodeImpl.qll | 1610 +++++++++++++++++ .../CONSISTENCY/CfgConsistency.expected | 4 + .../bindings/BindingsTest.expected | 0 .../ControlFlow/bindings/BindingsTest.ql | 32 + .../ControlFlow/bindings/annassign.py | 13 + .../ControlFlow/bindings/compound.py | 14 + .../ControlFlow/bindings/comprehension.py | 21 + .../bindings/dead_under_no_raise.py | 52 + .../ControlFlow/bindings/decorated.py | 30 + .../ControlFlow/bindings/except_handler.py | 19 + .../ControlFlow/bindings/imports.py | 14 + .../ControlFlow/bindings/match_pattern.py | 24 + .../ControlFlow/bindings/parameters.py | 42 + .../ControlFlow/bindings/simple.py | 14 + .../ControlFlow/bindings/type_params.py | 21 + .../ControlFlow/bindings/walrus_starred.py | 14 + .../ControlFlow/bindings/with_stmt.py | 21 + .../evaluation-order/AllLiveReachable.ql | 2 + .../evaluation-order/AnnotationHasCfgNode.ql | 2 + .../BasicBlockAnnotationGap.ql | 2 + .../BasicBlockOrdering.expected | 2 +- .../evaluation-order/BasicBlockOrdering.ql | 2 + .../evaluation-order/ConsecutiveTimestamps.ql | 2 + .../evaluation-order/ContiguousTimestamps.ql | 1 + .../evaluation-order/NeverReachable.ql | 2 + .../NewCfgAllLiveReachable.expected | 0 .../NewCfgAllLiveReachable.ql | 14 + .../NewCfgAnnotationHasCfgNode.expected | 1 + .../NewCfgAnnotationHasCfgNode.ql | 18 + .../NewCfgBasicBlockAnnotationGap.expected | 0 .../NewCfgBasicBlockAnnotationGap.ql | 26 + .../NewCfgBasicBlockOrdering.expected | 0 .../NewCfgBasicBlockOrdering.ql | 21 + .../NewCfgBranchTimestamps.expected | 0 .../NewCfgBranchTimestamps.ql | 80 + ...gConsecutivePredecessorTimestamps.expected | 1 + .../NewCfgConsecutivePredecessorTimestamps.ql | 22 + .../NewCfgConsecutiveTimestamps.expected | 0 .../NewCfgConsecutiveTimestamps.ql | 29 + .../evaluation-order/NewCfgImpl.qll | 101 ++ .../NewCfgNeverReachable.expected | 0 .../evaluation-order/NewCfgNeverReachable.ql | 21 + .../NewCfgNoBackwardFlow.expected | 0 .../evaluation-order/NewCfgNoBackwardFlow.ql | 22 + .../NewCfgNoBasicBlock.expected | 1 + .../evaluation-order/NewCfgNoBasicBlock.ql | 18 + .../NewCfgNoSharedReachable.expected | 0 .../NewCfgNoSharedReachable.ql | 21 + .../NewCfgStrictForward.expected | 0 .../evaluation-order/NewCfgStrictForward.ql | 22 + .../evaluation-order/NoBackwardFlow.expected | 2 +- .../evaluation-order/NoBackwardFlow.ql | 2 + .../evaluation-order/NoBasicBlock.ql | 2 + .../evaluation-order/NoSharedReachable.ql | 2 + .../evaluation-order/OldCfgImpl.qll | 8 +- .../evaluation-order/StrictForward.expected | 2 +- .../evaluation-order/StrictForward.ql | 2 + .../evaluation-order/test_boolean.py | 2 +- .../ControlFlow/evaluation-order/test_if.py | 2 +- 61 files changed, 2440 insertions(+), 9 deletions(-) create mode 100644 python/ql/consistency-queries/CfgConsistency.ql create mode 100644 python/ql/lib/printCfgNew.ql create mode 100644 python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll create mode 100644 python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/annassign.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/compound.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/comprehension.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/decorated.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/except_handler.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/imports.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/parameters.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/simple.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/type_params.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py create mode 100644 python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected create mode 100644 python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql diff --git a/python/ql/consistency-queries/CfgConsistency.ql b/python/ql/consistency-queries/CfgConsistency.ql new file mode 100644 index 000000000000..ab13eddf190c --- /dev/null +++ b/python/ql/consistency-queries/CfgConsistency.ql @@ -0,0 +1,2 @@ +import semmle.python.controlflow.internal.AstNodeImpl +import ControlFlow::Consistency diff --git a/python/ql/lib/printCfgNew.ql b/python/ql/lib/printCfgNew.ql new file mode 100644 index 000000000000..ba336de562a7 --- /dev/null +++ b/python/ql/lib/printCfgNew.ql @@ -0,0 +1,45 @@ +/** + * @name Print CFG (New) + * @description Produces a representation of a file's Control Flow Graph + * using the new shared control flow library. + * This query is used by the VS Code extension. + * @id python/print-cfg + * @kind graph + * @tags ide-contextual-queries/print-cfg + */ + +private import python as Py +import semmle.python.controlflow.internal.AstNodeImpl + +external string selectedSourceFile(); + +private predicate selectedSourceFileAlias = selectedSourceFile/0; + +external int selectedSourceLine(); + +private predicate selectedSourceLineAlias = selectedSourceLine/0; + +external int selectedSourceColumn(); + +private predicate selectedSourceColumnAlias = selectedSourceColumn/0; + +module ViewCfgQueryInput implements ControlFlow::ViewCfgQueryInputSig { + predicate selectedSourceFile = selectedSourceFileAlias/0; + + predicate selectedSourceLine = selectedSourceLineAlias/0; + + predicate selectedSourceColumn = selectedSourceColumnAlias/0; + + predicate cfgScopeSpan( + Ast::Callable callable, Py::File file, int startLine, int startColumn, int endLine, + int endColumn + ) { + exists(Py::Scope scope | + scope = callable.asScope() and + file = scope.getLocation().getFile() and + scope.getLocation().hasLocationInfo(_, startLine, startColumn, endLine, endColumn) + ) + } +} + +import ControlFlow::ViewCfgQuery diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll new file mode 100644 index 000000000000..5dba3d96ea8e --- /dev/null +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -0,0 +1,1610 @@ +/** + * Provides classes for the shared control-flow library, mediating between + * the Python AST and `AstSig`. + * + * The `Ast` module wraps Python's `Stmt`, `Expr`, `Scope`, and `Pattern`, + * and adds two synthetic kinds of node: + * - `BlockStmt`, identifying a body slot of a parent AST node (e.g. an + * `if`'s then or else branch). `Py::StmtList` itself is not directly + * wrapped. + * - Intermediate nodes for multi-operand boolean expressions. + */ +overlay[local?] +module; + +private import python as Py +private import codeql.controlflow.ControlFlowGraph +private import codeql.controlflow.SuccessorType +private import codeql.util.Void + +/** + * Gets the bound `Name` of a PEP 695 type parameter (`TypeVar`, + * `ParamSpec`, or `TypeVarTuple`). The base `TypeParameter` class does + * not expose `getName()`; this helper dispatches over the subtypes. + */ +private Py::Name typeParameterName(Py::TypeParameter tp) { + result = tp.(Py::TypeVar).getName() + or + result = tp.(Py::ParamSpec).getName() + or + result = tp.(Py::TypeVarTuple).getName() +} + +/** Provides the Python implementation of the shared CFG `AstSig`. */ +module Ast implements AstSig { + private newtype TAstNode = + TPyStmt(Py::Stmt s) or + TPyExpr(Py::Expr e) { not e instanceof Py::BoolExpr } or + TScope(Py::Scope sc) or + TPattern(Py::Pattern p) or + /** + * A synthetic node representing an operand pair of an `and`/`or` + * expression. For `a and b and c` (operands 0, 1, 2) we model the + * operation as a right-nested tree: pair 0 represents the whole + * expression with left=a and right=pair 1; pair 1 represents + * `b and c` with left=b and right=c. Each Python `Py::BoolExpr` + * with `n` operands has `n - 1` such pairs (indices `0 .. n - 2`). + */ + TBoolExprPair(Py::BoolExpr be, int index) { index = [0 .. count(be.getAValue()) - 2] } or + /** + * A synthetic block statement, wrapping a `Py::StmtList`. Each list of + * statements that represents an imperative block (a function/class/module + * body, an `if`/`while`/`for` branch, a `try`/`except`/`finally` body, + * etc.) becomes one `BlockStmt` node in the CFG. `Py::StmtList`s used + * in other roles - `Try.getHandlers()` (iterated via `getCatch`) and + * `MatchStmt.getCases()` (iterated via `getCase`) - are excluded, as + * the shared library's `Try`/`Switch` logic walks their items + * individually. + */ + TBlockStmt(Py::StmtList sl) { + not sl = any(Py::Try t).getHandlers() and + not sl = any(Py::MatchStmt m).getCases() + } + + /** + * The union of `TPyStmt` (wrapping `Py::Stmt`) and `TBlockStmt` (wrapping + * `Py::StmtList`). Both represent the kinds of node that can appear in + * a `Stmt` position in the CFG. + */ + private class TStmt = TPyStmt or TBlockStmt; + + /** + * The union of `TPyExpr` (wrapping non-boolean `Py::Expr`) and + * `TBoolExprPair` (synthetic operand pairs of `and`/`or` expressions). + * Both represent the kinds of node that can appear in an `Expr` + * position in the CFG. + */ + private class TExpr = TPyExpr or TBoolExprPair; + + /** + * An AST node visible to the shared CFG. + * + * This is the abstract implementation class. It enforces that each + * concrete subclass provides `toString`, `getLocation`, and + * `getEnclosingCallable` (one subclass per `TAstNode` newtype branch). + * The public alias `AstNode` is what users (and the `AstSig` signature) + * see; subclasses inside this module extend `AstNodeImpl` directly. + */ + abstract private class AstNodeImpl extends TAstNode { + /** Gets a textual representation of this AST node. */ + abstract string toString(); + + /** Gets the location of this AST node. */ + abstract Py::Location getLocation(); + + /** Gets the enclosing callable that contains this node, if any. */ + abstract Callable getEnclosingCallable(); + + /** Gets the underlying Python `Stmt`, if this node wraps one. */ + Py::Stmt asStmt() { this = TPyStmt(result) } + + /** + * Gets the underlying Python `Expr`, if this node wraps one. Boolean + * expressions are represented by `TBoolExprPair(_, 0)`; this + * predicate also recovers the underlying `Py::BoolExpr` from such a + * representation. + */ + Py::Expr asExpr() { + this = TPyExpr(result) + or + this = TBoolExprPair(result, 0) + } + + /** Gets the underlying Python `Scope`, if this node wraps one. */ + Py::Scope asScope() { this = TScope(result) } + + /** Gets the underlying Python `Pattern`, if this node wraps one. */ + Py::Pattern asPattern() { this = TPattern(result) } + + /** Gets the underlying Python `StmtList`, if this node is a `BlockStmt`. */ + Py::StmtList asStmtList() { this = TBlockStmt(result) } + + /** + * Gets the child of this AST node at the specified (zero-based) + * index, in evaluation order. Subclasses with children override + * this method. + */ + AstNode getChild(int index) { none() } + } + + /** An AST node visible to the shared CFG. */ + final class AstNode = AstNodeImpl; + + /** Gets the immediately enclosing callable that contains `node`. */ + Callable getEnclosingCallable(AstNode node) { result = node.getEnclosingCallable() } + + /** + * A callable: a function, class, or module scope. + * + * In Python, all three are executable scopes with statement bodies. + */ + class Callable extends AstNodeImpl, TScope { + private Py::Scope sc; + + Callable() { this = TScope(sc) } + + override string toString() { result = sc.toString() } + + override Py::Location getLocation() { result = sc.getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = sc.getEnclosingScope() } + } + + /** Gets the body of callable `c`. */ + AstNode callableGetBody(Callable c) { result.asStmtList() = c.asScope().getBody() } + + /** + * A parameter of a callable. + * + * Modelled per the C# template (`csharp/.../ControlFlowGraph.qll:147-156`): + * each Python parameter (the `Py::Parameter` AST node, which is a `Name` + * or — Python 2 only — a `Tuple` in store context) becomes a CFG node + * at a stable position in the enclosing callable's entry sequence. + * + * Default-value expressions for positional and keyword-only parameters + * are wired separately on the `FunctionDefExpr` / `LambdaExpr` wrappers + * (they evaluate at function-definition time, not at call time). + * `Parameter::getDefaultValue()` returns `none()` here, signalling to + * the shared library that the parameter never falls back to a default + * during call binding. This mirrors C# for non-optional parameters. + */ + class Parameter extends Expr { + private Py::Parameter param; + + Parameter() { this = TPyExpr(param) } + + /** Gets the underlying Python parameter. */ + Py::Parameter asParameter() { result = param } + + /** + * Gets the default-value expression of this parameter, if any. + * + * Returns `none()`: defaults evaluate at function-definition time and + * are wired into the CFG via `FunctionDefExpr.getDefault` / + * `LambdaExpr.getDefault`. The shared library calls this predicate + * to model the "missing argument → evaluate default" fallback during + * call binding, which Python does not model at the CFG level. + */ + Expr getDefaultValue() { none() } + + /** + * Gets the pattern for this parameter. In Python, there is no destructuring + * pattern syntax for parameters, so the pattern is the parameter itself. + */ + AstNode getPattern() { result = this } + } + + /** + * Gets the `index`th parameter of callable `c`, ordered as Python binds + * them at call time: positional, then vararg (`*args`), then + * keyword-only, then kwarg (`**kwargs`). + */ + Parameter callableGetParameter(Callable c, int index) { + exists(Py::Function f | f = c.asScope() | + result.asParameter() = + rank[index + 1](Py::Parameter p, int subOrder, int subIndex | + // positional parameters first + p = f.getArg(subIndex) and subOrder = 0 + or + // then *args + p = f.getVararg() and subOrder = 1 and subIndex = 0 + or + // then keyword-only parameters + p = f.getKeywordOnlyArg(subIndex) and subOrder = 2 + or + // finally **kwargs + p = f.getKwarg() and subOrder = 3 and subIndex = 0 + | + p order by subOrder, subIndex + ) + ) + } + + /** A statement. */ + class Stmt extends AstNodeImpl, TStmt { + // For `TPyStmt` instances, delegate to the wrapped Python statement. + // `BlockStmt` (the only `TBlockStmt` subclass) provides its own overrides. + override string toString() { result = this.asStmt().toString() } + + override Py::Location getLocation() { result = this.asStmt().getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = this.asStmt().getScope() } + } + + /** An expression. */ + class Expr extends AstNodeImpl, TExpr { + // For `TPyExpr` instances, delegate to the wrapped Python expression. + // `BinaryExpr` (the only `TBoolExprPair` subclass) provides its own overrides. + override string toString() { result = this.asExpr().toString() } + + override Py::Location getLocation() { result = this.asExpr().getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = this.asExpr().getScope() } + } + + /** A pattern in a `match` statement. */ + additional class Pattern extends AstNodeImpl, TPattern { + private Py::Pattern p; + + Pattern() { this = TPattern(p) } + + override string toString() { result = p.toString() } + + override Py::Location getLocation() { result = p.getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = p.getScope() } + } + + /** + * A `case x` pattern that binds `x` to the matched value. + */ + additional class MatchCapturePattern extends Pattern { + private Py::MatchCapturePattern cap; + + MatchCapturePattern() { this = TPattern(cap) } + + /** Gets the bound Name expression. */ + Expr getVariable() { result.asExpr() = cap.getVariable() } + + override AstNode getChild(int index) { index = 0 and result = this.getVariable() } + } + + /** + * A `case pattern as name` pattern. + */ + additional class MatchAsPattern extends Pattern { + private Py::MatchAsPattern asp; + + MatchAsPattern() { this = TPattern(asp) } + + /** Gets the inner pattern. */ + AstNode getPattern() { result.asPattern() = asp.getPattern() } + + /** Gets the bound Name expression. */ + Expr getAlias() { result.asExpr() = asp.getAlias() } + + override AstNode getChild(int index) { + index = 0 and result = this.getPattern() + or + index = 1 and result = this.getAlias() + } + } + + /** + * A `case [a, b, *rest]` star pattern. Binds `rest` to the remaining + * elements of the sequence. + */ + additional class MatchStarPattern extends Pattern { + private Py::MatchStarPattern starp; + + MatchStarPattern() { this = TPattern(starp) } + + /** Gets the target Pattern (a `MatchCapturePattern` if `*rest`). */ + AstNode getTarget() { result.asPattern() = starp.getTarget() } + + override AstNode getChild(int index) { index = 0 and result = this.getTarget() } + } + + /** + * A `case [a, b, ...]` sequence pattern. Recurses into the sub-patterns. + */ + additional class MatchSequencePattern extends Pattern { + private Py::MatchSequencePattern seqp; + + MatchSequencePattern() { this = TPattern(seqp) } + + /** Gets the `n`th sub-pattern. */ + AstNode getPattern(int n) { result.asPattern() = seqp.getPattern(n) } + + override AstNode getChild(int index) { result = this.getPattern(index) } + } + + /** + * A `case Cls(a, b, x=y)` class pattern. + */ + additional class MatchClassPattern extends Pattern { + private Py::MatchClassPattern clsp; + + MatchClassPattern() { this = TPattern(clsp) } + + /** Gets the class expression of this class pattern. */ + Expr getClass() { result.asExpr() = clsp.getClass() } + + /** Gets the `n`th positional sub-pattern. */ + AstNode getPositional(int n) { result.asPattern() = clsp.getPositional(n) } + + /** Gets the `n`th keyword sub-pattern. */ + AstNode getKeyword(int n) { result.asPattern() = clsp.getKeyword(n) } + + private int numPositional() { result = count(int i | exists(clsp.getPositional(i))) } + + override AstNode getChild(int index) { + index = 0 and result = this.getClass() + or + result = this.getPositional(index - 1) and index >= 1 + or + result = this.getKeyword(index - 1 - this.numPositional()) and + index >= 1 + this.numPositional() + } + } + + /** + * A `case {k: v}` mapping pattern. + */ + additional class MatchMappingPattern extends Pattern { + private Py::MatchMappingPattern mapp; + + MatchMappingPattern() { this = TPattern(mapp) } + + AstNode getMapping(int n) { result.asPattern() = mapp.getMapping(n) } + + override AstNode getChild(int index) { result = this.getMapping(index) } + } + + /** + * A key-value pair inside a `case {k: v}` mapping pattern. + */ + additional class MatchKeyValuePattern extends Pattern { + private Py::MatchKeyValuePattern kvp; + + MatchKeyValuePattern() { this = TPattern(kvp) } + + AstNode getKey() { result.asPattern() = kvp.getKey() } + + AstNode getValue() { result.asPattern() = kvp.getValue() } + + override AstNode getChild(int index) { + index = 0 and result = this.getKey() + or + index = 1 and result = this.getValue() + } + } + + /** + * A `case Cls(name=value)` keyword sub-pattern. + */ + additional class MatchKeywordPattern extends Pattern { + private Py::MatchKeywordPattern kwp; + + MatchKeywordPattern() { this = TPattern(kwp) } + + Expr getAttribute() { result.asExpr() = kwp.getAttribute() } + + AstNode getValue() { result.asPattern() = kwp.getValue() } + + override AstNode getChild(int index) { + index = 0 and result = this.getAttribute() + or + index = 1 and result = this.getValue() + } + } + + /** A `case **rest` double-star mapping sub-pattern. */ + additional class MatchDoubleStarPattern extends Pattern { + private Py::MatchDoubleStarPattern dsp; + + MatchDoubleStarPattern() { this = TPattern(dsp) } + + AstNode getTarget() { result.asPattern() = dsp.getTarget() } + + override AstNode getChild(int index) { index = 0 and result = this.getTarget() } + } + + /** A `case p1 | p2 | …` or-pattern. */ + additional class MatchOrPattern extends Pattern { + private Py::MatchOrPattern orp; + + MatchOrPattern() { this = TPattern(orp) } + + AstNode getPattern(int n) { result.asPattern() = orp.getPattern(n) } + + override AstNode getChild(int index) { result = this.getPattern(index) } + } + + /** A `case 1` literal pattern. */ + additional class MatchLiteralPattern extends Pattern { + private Py::MatchLiteralPattern litp; + + MatchLiteralPattern() { this = TPattern(litp) } + + Expr getLiteral() { result.asExpr() = litp.getLiteral() } + + override AstNode getChild(int index) { index = 0 and result = this.getLiteral() } + } + + /** A `case Cls.NAME` value pattern. */ + additional class MatchValuePattern extends Pattern { + private Py::MatchValuePattern vp; + + MatchValuePattern() { this = TPattern(vp) } + + Expr getValue() { result.asExpr() = vp.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** + * A block statement, modeling the body of a parent AST node as a + * sequence of statements. + */ + class BlockStmt extends Stmt, TBlockStmt { + private Py::StmtList sl; + + BlockStmt() { this = TBlockStmt(sl) } + + /** Gets the `n`th (zero-based) statement in this block. */ + Stmt getStmt(int n) { result.asStmt() = sl.getItem(n) } + + /** Gets the last statement in this block. */ + Stmt getLastStmt() { result.asStmt() = sl.getLastItem() } + + override string toString() { result = sl.toString() } + + // `Py::StmtList` has no native location; approximate with the first + // item's location. + override Py::Location getLocation() { result = sl.getItem(0).getLocation() } + + override Callable getEnclosingCallable() { + result.asScope() = sl.getParent().(Py::Scope) + or + result.asScope() = sl.getParent().(Py::Stmt).getScope() + } + + override AstNode getChild(int index) { result = this.getStmt(index) } + } + + /** An expression statement. */ + class ExprStmt extends Stmt { + private Py::ExprStmt exprStmt; + + ExprStmt() { this = TPyStmt(exprStmt) } + + /** Gets the expression in this expression statement. */ + Expr getExpr() { result.asExpr() = exprStmt.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getExpr() } + } + + /** An assignment statement (`x = y = expr`). */ + additional class AssignStmt extends Stmt { + private Py::Assign assign; + + AssignStmt() { this = TPyStmt(assign) } + + Expr getValue() { result.asExpr() = assign.getValue() } + + Expr getTarget(int n) { result.asExpr() = assign.getTarget(n) } + + int getNumberOfTargets() { result = count(assign.getATarget()) } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + result = this.getTarget(index - 1) and index >= 1 + } + } + + /** An augmented assignment statement (`x += expr`). */ + additional class AugAssignStmt extends Stmt { + private Py::AugAssign augAssign; + + AugAssignStmt() { this = TPyStmt(augAssign) } + + Expr getOperation() { result.asExpr() = augAssign.getOperation() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperation() } + } + + /** + * An annotated assignment statement (`x: T = expr`, or `x: T` without + * value). The evaluation order follows CPython: annotation first, then + * the optional value, then the target binding. + */ + additional class AnnAssignStmt extends Stmt { + private Py::AnnAssign annAssign; + + AnnAssignStmt() { this = TPyStmt(annAssign) } + + Expr getAnnotation() { result.asExpr() = annAssign.getAnnotation() } + + Expr getValue() { result.asExpr() = annAssign.getValue() } + + Expr getTarget() { result.asExpr() = annAssign.getTarget() } + + override AstNode getChild(int index) { + index = 0 and result = this.getAnnotation() + or + index = 1 and result = this.getValue() + or + index = 2 and result = this.getTarget() + } + } + + /** An assignment expression / walrus operator (`x := expr`). */ + additional class NamedExpr extends Expr { + private Py::AssignExpr assignExpr; + + NamedExpr() { this = TPyExpr(assignExpr) } + + Expr getValue() { result.asExpr() = assignExpr.getValue() } + + Expr getTarget() { result.asExpr() = assignExpr.getTarget() } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + index = 1 and result = this.getTarget() + } + } + + /** + * An `if` statement. + * + * Python's `elif` chains are represented as nested `If` nodes in the + * else branch's `StmtList`. The shared CFG library handles this + * naturally: `getElse()` returns the `BlockStmt` wrapping the else + * branch, and if that block contains a single `If`, the result is + * a chained conditional. + */ + class IfStmt extends Stmt { + private Py::If ifStmt; + + IfStmt() { this = TPyStmt(ifStmt) } + + /** Gets the underlying Python `If` statement. */ + Py::If asIf() { result = ifStmt } + + /** Gets the condition of this `if` statement. */ + Expr getCondition() { result.asExpr() = ifStmt.getTest() } + + /** Gets the `then` (true) branch of this `if` statement. */ + Stmt getThen() { result.asStmtList() = ifStmt.getBody() } + + /** Gets the `else` (false) branch, if any. */ + Stmt getElse() { result.asStmtList() = ifStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getThen() + or + index = 2 and result = this.getElse() + } + } + + /** A loop statement. */ + class LoopStmt extends Stmt { + LoopStmt() { + this = TPyStmt(any(Py::While w)) + or + this = TPyStmt(any(Py::For f)) + } + + /** Gets the body of this loop statement. */ + Stmt getBody() { none() } + } + + /** A `while` loop statement. */ + class WhileStmt extends LoopStmt { + private Py::While whileStmt; + + WhileStmt() { this = TPyStmt(whileStmt) } + + /** Gets the boolean condition of this `while` loop. */ + Expr getCondition() { result.asExpr() = whileStmt.getTest() } + + override Stmt getBody() { result.asStmtList() = whileStmt.getBody() } + + /** Gets the `else` branch of this `while` loop, if any. */ + Stmt getElse() { result.asStmtList() = whileStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getBody() + or + index = 2 and result = this.getElse() + } + } + + /** + * A `do-while` loop statement. Python has no do-while construct. + */ + class DoStmt extends LoopStmt { + DoStmt() { none() } + + Expr getCondition() { none() } + } + + /** An `until` loop. Python has no `until` loop. */ + class UntilStmt extends LoopStmt { + UntilStmt() { none() } + + Expr getCondition() { none() } + } + + /** A C-style `for` loop. Python has no C-style for loop. */ + class ForStmt extends LoopStmt { + ForStmt() { none() } + + AstNode getInit(int index) { none() } + + Expr getCondition() { none() } + + AstNode getUpdate(int index) { none() } + } + + /** A for-each loop (`for x in iterable:`). */ + class ForeachStmt extends LoopStmt { + private Py::For forStmt; + + ForeachStmt() { this = TPyStmt(forStmt) } + + /** Gets the loop variable. */ + Expr getVariable() { result.asExpr() = forStmt.getTarget() } + + /** Gets the collection being iterated. */ + Expr getCollection() { result.asExpr() = forStmt.getIter() } + + override Stmt getBody() { result.asStmtList() = forStmt.getBody() } + + /** Gets the `else` branch of this `for` loop, if any. */ + Stmt getElse() { result.asStmtList() = forStmt.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCollection() + or + index = 1 and result = this.getVariable() + or + index = 2 and result = this.getBody() + or + index = 3 and result = this.getElse() + } + } + + /** A `break` statement. */ + class BreakStmt extends Stmt { + BreakStmt() { this = TPyStmt(any(Py::Break b)) } + } + + /** A `continue` statement. */ + class ContinueStmt extends Stmt { + ContinueStmt() { this = TPyStmt(any(Py::Continue c)) } + } + + /** A `goto` statement. Python has no goto. */ + class GotoStmt extends Stmt { + GotoStmt() { none() } + } + + /** A `return` statement. */ + class ReturnStmt extends Stmt { + private Py::Return ret; + + ReturnStmt() { this = TPyStmt(ret) } + + /** Gets the expression being returned, if any. */ + Expr getExpr() { result.asExpr() = ret.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getExpr() } + } + + /** A `raise` statement (mapped to `Throw`). */ + class Throw extends Stmt { + private Py::Raise raise; + + Throw() { this = TPyStmt(raise) } + + /** Gets the expression being raised. */ + Expr getExpr() { result.asExpr() = raise.getException() } + + /** Gets the cause of this `raise`, if any. */ + Expr getCause() { result.asExpr() = raise.getCause() } + + override AstNode getChild(int index) { + index = 0 and result = this.getExpr() + or + index = 1 and result = this.getCause() + } + } + + /** + * An `import` statement (`import a, b` or `from m import a, b`). + * + * Each alias contributes two children in evaluation order: first the + * value expression (which performs the import side-effect), then the + * bound `asname` Name (the in-scope binding). This makes both reachable + * from the CFG and allows `Name.defines(v)` for `asname` Names to have + * corresponding CFG nodes — which is essential for SSA to see import + * bindings. + */ + additional class ImportStmt extends Stmt { + private Py::Import imp; + + ImportStmt() { this = TPyStmt(imp) } + + /** Gets the value (module/member expression) of the `n`th alias. */ + Expr getValue(int n) { result.asExpr() = imp.getName(n).getValue() } + + /** Gets the bound `asname` of the `n`th alias. */ + Expr getAsname(int n) { result.asExpr() = imp.getName(n).getAsname() } + + /** Gets the number of aliases in this import statement. */ + int getNumberOfAliases() { result = count(int i | exists(imp.getName(i))) } + + override AstNode getChild(int index) { + exists(int i | + index = 2 * i and result = this.getValue(i) + or + index = 2 * i + 1 and result = this.getAsname(i) + ) + } + } + + /** + * A `from m import *` statement. Evaluates the module expression but + * binds no name (the bindings happen by side-effect at runtime, which + * is not modelled at the CFG level). + */ + additional class ImportStarStmt extends Stmt { + private Py::ImportStar imp; + + ImportStarStmt() { this = TPyStmt(imp) } + + Expr getModule() { result.asExpr() = imp.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + + /** A `with` statement. */ + additional class WithStmt extends Stmt { + private Py::With withStmt; + + WithStmt() { this = TPyStmt(withStmt) } + + Expr getContextExpr() { result.asExpr() = withStmt.getContextExpr() } + + Expr getOptionalVars() { result.asExpr() = withStmt.getOptionalVars() } + + Stmt getBody() { result.asStmtList() = withStmt.getBody() } + + override AstNode getChild(int index) { + index = 0 and result = this.getContextExpr() + or + index = 1 and result = this.getOptionalVars() + or + index = 2 and result = this.getBody() + } + } + + /** An `assert` statement. */ + additional class AssertStmt extends Stmt { + private Py::Assert assertStmt; + + AssertStmt() { this = TPyStmt(assertStmt) } + + Expr getTest() { result.asExpr() = assertStmt.getTest() } + + Expr getMsg() { result.asExpr() = assertStmt.getMsg() } + + override AstNode getChild(int index) { + index = 0 and result = this.getTest() + or + index = 1 and result = this.getMsg() + } + } + + /** A `delete` statement. */ + additional class DeleteStmt extends Stmt { + private Py::Delete del; + + DeleteStmt() { this = TPyStmt(del) } + + Expr getTarget(int n) { result.asExpr() = del.getTarget(n) } + + override AstNode getChild(int index) { result = this.getTarget(index) } + } + + /** + * A PEP 695 `type` statement (`type Alias[T1, T2] = value`). + * + * The type parameters bind at statement-evaluation time. The value + * expression is captured for lazy evaluation but the alias `Name` + * itself binds the resulting `TypeAliasType` object — so the CFG must + * visit at minimum the type-parameter names and the alias name. + */ + additional class TypeAliasStmt extends Stmt { + private Py::TypeAlias ta; + + TypeAliasStmt() { this = TPyStmt(ta) } + + /** Gets the alias `Name` bound by this statement. */ + Expr getName() { result.asExpr() = ta.getName() } + + /** + * Gets the `n`th PEP 695 type-parameter name (a `Name` in store + * context), in declaration order. + */ + Expr getTypeParamName(int n) { result.asExpr() = typeParameterName(ta.getTypeParameter(n)) } + + int getNumberOfTypeParams() { result = count(ta.getATypeParameter()) } + + override AstNode getChild(int index) { + result = this.getTypeParamName(index) + or + index = this.getNumberOfTypeParams() and result = this.getName() + } + } + + /** A `try` statement. */ + class TryStmt extends Stmt { + private Py::Try tryStmt; + + TryStmt() { this = TPyStmt(tryStmt) } + + AstNode getBody(int index) { index = 0 and result.asStmtList() = tryStmt.getBody() } + + /** Gets the `else` branch of this `try` statement, if any. */ + Stmt getElse() { result.asStmtList() = tryStmt.getOrelse() } + + Stmt getFinally() { result.asStmtList() = tryStmt.getFinalbody() } + + CatchClause getCatch(int index) { result.asStmt() = tryStmt.getHandler(index) } + + override AstNode getChild(int index) { + index = 0 and result = this.getBody(0) + or + result = this.getCatch(index - 1) and index >= 1 + or + index = -1 and result = this.getFinally() + or + index = -2 and result = this.getElse() + } + } + + /** + * Gets the `else` branch of `try` statement `try`, if any. + */ + AstNode getTryElse(TryStmt try) { result = try.getElse() } + + /** + * Gets the `else` branch of loop `loop`, if any. + * + * Python's `while`/`for` loops may have an `else` block that runs when the + * loop completes without `break`. + */ + AstNode getLoopElse(LoopStmt loop) { + result = loop.(WhileStmt).getElse() + or + result = loop.(ForeachStmt).getElse() + } + + /** An exception handler (`except` or `except*`). */ + class CatchClause extends Stmt { + private Py::ExceptionHandler handler; + + CatchClause() { this = TPyStmt(handler) } + + /** Gets the type expression of this exception handler. */ + Expr getType() { result.asExpr() = handler.getType() } + + /** Gets the variable name of this exception handler, if any. */ + AstNode getVariable() { result.asExpr() = handler.getName() } + + /** Holds: catch clauses do not have a `Condition` in Python's model. */ + Expr getCondition() { none() } + + /** Gets the body of this exception handler. */ + Stmt getBody() { + result.asStmtList() = handler.(Py::ExceptStmt).getBody() + or + result.asStmtList() = handler.(Py::ExceptGroupStmt).getBody() + } + + override AstNode getChild(int index) { + index = 0 and result = this.getType() + or + index = 1 and result = this.getVariable() + or + index = 2 and result = this.getBody() + } + } + + /** A `match` statement, mapped to the shared CFG's `Switch`. */ + class Switch extends Stmt { + private Py::MatchStmt matchStmt; + + Switch() { this = TPyStmt(matchStmt) } + + Expr getExpr() { result.asExpr() = matchStmt.getSubject() } + + Case getCase(int index) { result.asStmt() = matchStmt.getCase(index) } + + Stmt getStmt(int index) { none() } + + override AstNode getChild(int index) { + index = 0 and result = this.getExpr() + or + result = this.getCase(index - 1) and index >= 1 + } + } + + /** A `case` clause in a match statement. */ + class Case extends Stmt { + private Py::Case caseStmt; + + Case() { this = TPyStmt(caseStmt) } + + AstNode getPattern(int index) { index = 0 and result.asPattern() = caseStmt.getPattern() } + + Expr getGuard() { result.asExpr() = caseStmt.getGuard().(Py::Guard).getTest() } + + AstNode getBody() { result.asStmtList() = caseStmt.getBody() } + + /** Holds if this case is a wildcard pattern (`case _:`). */ + predicate isWildcard() { caseStmt.getPattern() instanceof Py::MatchWildcardPattern } + + override AstNode getChild(int index) { + index = 0 and result = this.getPattern(0) + or + index = 1 and result = this.getGuard() + or + index = 2 and result = this.getBody() + } + } + + /** A wildcard case (`case _:`). */ + class DefaultCase extends Case { + DefaultCase() { this.isWildcard() } + } + + /** A conditional expression (`x if cond else y`). */ + class ConditionalExpr extends Expr { + private Py::IfExp ifExp; + + ConditionalExpr() { this = TPyExpr(ifExp) } + + /** Gets the condition of this expression. */ + Expr getCondition() { result.asExpr() = ifExp.getTest() } + + /** Gets the true branch of this expression. */ + Expr getThen() { result.asExpr() = ifExp.getBody() } + + /** Gets the false branch of this expression. */ + Expr getElse() { result.asExpr() = ifExp.getOrelse() } + + override AstNode getChild(int index) { + index = 0 and result = this.getCondition() + or + index = 1 and result = this.getThen() + or + index = 2 and result = this.getElse() + } + } + + /** + * A binary expression for the shared CFG. In Python, this covers all + * `and`/`or` expression operand pairs. + */ + class BinaryExpr extends Expr, TBoolExprPair { + private Py::BoolExpr be; + private int index; + + BinaryExpr() { this = TBoolExprPair(be, index) } + + /** Gets the underlying Python `BoolExpr`. */ + Py::BoolExpr getBoolExpr() { result = be } + + /** Gets the (zero-based) index of this pair within its `BoolExpr`. */ + int getIndex() { result = index } + + override string toString() { result = be.getOperator() } + + override Py::Location getLocation() { result = be.getValue(index).getLocation() } + + override Callable getEnclosingCallable() { result.asScope() = be.getScope() } + + /** Gets the left operand of this binary expression. */ + Expr getLeftOperand() { result.asExpr() = be.getValue(index) } + + /** Gets the right operand of this binary expression. */ + Expr getRightOperand() { + // Last pair: right operand is the final value. + index = count(be.getAValue()) - 2 and result.asExpr() = be.getValue(index + 1) + or + // Non-last pair: right operand is the next synthetic pair. + index < count(be.getAValue()) - 2 and + exists(BinaryExpr next | + next.getBoolExpr() = be and next.getIndex() = index + 1 and result = next + ) + } + + override AstNode getChild(int childIndex) { + childIndex = 0 and result = this.getLeftOperand() + or + childIndex = 1 and result = this.getRightOperand() + } + } + + /** A short-circuiting logical `and` expression. */ + class LogicalAndExpr extends BinaryExpr { + LogicalAndExpr() { this.getBoolExpr().getOp() instanceof Py::And } + } + + /** A short-circuiting logical `or` expression. */ + class LogicalOrExpr extends BinaryExpr { + LogicalOrExpr() { this.getBoolExpr().getOp() instanceof Py::Or } + } + + /** A null-coalescing expression. Python has no null-coalescing operator. */ + class NullCoalescingExpr extends BinaryExpr { + NullCoalescingExpr() { none() } + } + + /** + * A unary expression. Currently only used for the `not` subclass. + */ + class UnaryExpr extends Expr { + UnaryExpr() { exists(Py::UnaryExpr u | this = TPyExpr(u) and u.getOp() instanceof Py::Not) } + + /** Gets the operand of this unary expression. */ + Expr getOperand() { result.asExpr() = this.asExpr().(Py::UnaryExpr).getOperand() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperand() } + } + + /** A logical `not` expression. */ + class LogicalNotExpr extends UnaryExpr { } + + /** + * An assignment expression. + * + * Empty in Python: `x = y` and `x += y` are statements (`AssignStmt` and + * `AugAssignStmt`), not expressions, and the walrus `x := y` is modeled + * separately as `NamedExpr`. The shared library's `Assignment` extends + * `BinaryExpr`, so it cannot share instances with our `Stmt`-based + * assignment forms. + */ + class Assignment extends BinaryExpr { + Assignment() { none() } + } + + /** A simple assignment expression. Empty in Python (see `Assignment`). */ + class AssignExpr extends Assignment { } + + /** A compound assignment expression. Empty in Python (see `Assignment`). */ + class CompoundAssignment extends Assignment { } + + /** + * A short-circuiting logical AND compound assignment expression (`&&=`). + * Python has no such operator. + */ + class AssignLogicalAndExpr extends CompoundAssignment { } + + /** + * A short-circuiting logical OR compound assignment expression (`||=`). + * Python has no such operator. + */ + class AssignLogicalOrExpr extends CompoundAssignment { } + + /** + * A short-circuiting null-coalescing compound assignment expression + * (`??=`). Python has no such operator. + */ + class AssignNullCoalescingExpr extends CompoundAssignment { } + + /** A boolean literal expression (`True` or `False`). */ + class BooleanLiteral extends Expr { + BooleanLiteral() { this = TPyExpr(any(Py::True t)) or this = TPyExpr(any(Py::False f)) } + + /** Gets the boolean value of this literal. */ + boolean getValue() { + this.asExpr() instanceof Py::True and result = true + or + this.asExpr() instanceof Py::False and result = false + } + } + + /** A pattern match expression. Python has no `instanceof`-style pattern match expression. */ + class PatternMatchExpr extends Expr { + PatternMatchExpr() { none() } + + Expr getExpr() { none() } + + AstNode getPattern() { none() } + } + + // ===== Python-specific expression classes (used by `getChild`) ===== + /** A Python binary expression (arithmetic, bitwise, matmul, etc.). */ + additional class ArithBinaryExpr extends Expr { + private Py::BinaryExpr binExpr; + + ArithBinaryExpr() { this = TPyExpr(binExpr) } + + Expr getLeft() { result.asExpr() = binExpr.getLeft() } + + Expr getRight() { result.asExpr() = binExpr.getRight() } + + override AstNode getChild(int index) { + index = 0 and result = this.getLeft() + or + index = 1 and result = this.getRight() + } + } + + /** A call expression (`func(args...)`). */ + additional class CallExpr extends Expr { + private Py::Call call; + + CallExpr() { this = TPyExpr(call) } + + Expr getFunc() { result.asExpr() = call.getFunc() } + + Expr getPositionalArg(int n) { result.asExpr() = call.getPositionalArg(n) } + + int getNumberOfPositionalArgs() { result = count(call.getAPositionalArg()) } + + Expr getKeywordValue(int n) { + result.asExpr() = call.getNamedArg(n).(Py::Keyword).getValue() + or + result.asExpr() = call.getNamedArg(n).(Py::DictUnpacking).getValue() + } + + int getNumberOfNamedArgs() { result = count(call.getANamedArg()) } + + override AstNode getChild(int index) { + index = 0 and result = this.getFunc() + or + result = this.getPositionalArg(index - 1) and index >= 1 + or + result = this.getKeywordValue(index - 1 - this.getNumberOfPositionalArgs()) and + index >= 1 + this.getNumberOfPositionalArgs() + } + } + + /** A subscript expression (`obj[index]`). */ + additional class SubscriptExpr extends Expr { + private Py::Subscript sub; + + SubscriptExpr() { this = TPyExpr(sub) } + + Expr getObject() { result.asExpr() = sub.getObject() } + + Expr getIndex() { result.asExpr() = sub.getIndex() } + + override AstNode getChild(int index) { + index = 0 and result = this.getObject() + or + index = 1 and result = this.getIndex() + } + } + + /** An attribute access (`obj.name`). */ + additional class AttributeExpr extends Expr { + private Py::Attribute attr; + + AttributeExpr() { this = TPyExpr(attr) } + + Expr getObject() { result.asExpr() = attr.getObject() } + + override AstNode getChild(int index) { index = 0 and result = this.getObject() } + } + + /** + * An `import x.y` module expression. Modelled as a leaf — the dotted + * name is just a string. + */ + additional class ImportExpression extends Expr { + ImportExpression() { this.asExpr() instanceof Py::ImportExpr } + } + + /** + * A `from m import x` member access. The module sub-expression is a + * child so that the CFG visits both the module load and this + * attribute selection. + */ + additional class ImportMemberExpr extends Expr { + private Py::ImportMember im; + + ImportMemberExpr() { this = TPyExpr(im) } + + /** Gets the module expression `m` in `from m import x`. */ + Expr getModule() { result.asExpr() = im.getModule() } + + override AstNode getChild(int index) { index = 0 and result = this.getModule() } + } + + /** A tuple literal. */ + additional class TupleExpr extends Expr { + private Py::Tuple tuple; + + TupleExpr() { this = TPyExpr(tuple) } + + Expr getElt(int n) { result.asExpr() = tuple.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A list literal. */ + additional class ListExpr extends Expr { + private Py::List list; + + ListExpr() { this = TPyExpr(list) } + + Expr getElt(int n) { result.asExpr() = list.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A set literal. */ + additional class SetExpr extends Expr { + private Py::Set set; + + SetExpr() { this = TPyExpr(set) } + + Expr getElt(int n) { result.asExpr() = set.getElt(n) } + + override AstNode getChild(int index) { result = this.getElt(index) } + } + + /** A dict literal. */ + additional class DictExpr extends Expr { + private Py::Dict dict; + + DictExpr() { this = TPyExpr(dict) } + + /** + * Gets the key of the `n`th item (at child index `2*n`); the value is + * at child index `2*n + 1`. + */ + Expr getKey(int n) { result.asExpr() = dict.getItem(n).(Py::KeyValuePair).getKey() } + + Expr getValue(int n) { result.asExpr() = dict.getItem(n).(Py::KeyValuePair).getValue() } + + int getNumberOfItems() { result = count(dict.getAnItem()) } + + override AstNode getChild(int index) { + exists(int item | + index = 2 * item and result = this.getKey(item) + or + index = 2 * item + 1 and result = this.getValue(item) + ) + } + } + + /** A unary expression other than `not` (e.g., `-x`, `+x`, `~x`). */ + additional class ArithUnaryExpr extends Expr { + private Py::UnaryExpr unaryExpr; + + ArithUnaryExpr() { this = TPyExpr(unaryExpr) and not unaryExpr.getOp() instanceof Py::Not } + + Expr getOperand() { result.asExpr() = unaryExpr.getOperand() } + + override AstNode getChild(int index) { index = 0 and result = this.getOperand() } + } + + /** + * A comprehension or generator expression. The iterable is evaluated in + * the enclosing scope; the body runs in a nested synthetic function + * scope handled by its own CFG. + */ + additional class Comprehension extends Expr { + private Py::Expr iterable; + + Comprehension() { + exists(Py::Expr c | this = TPyExpr(c) | + iterable = c.(Py::ListComp).getIterable() + or + iterable = c.(Py::SetComp).getIterable() + or + iterable = c.(Py::DictComp).getIterable() + or + iterable = c.(Py::GeneratorExp).getIterable() + ) + } + + Expr getIterable() { result.asExpr() = iterable } + + override AstNode getChild(int index) { index = 0 and result = this.getIterable() } + } + + /** A comparison expression (`a < b`, `a < b < c`, etc.). */ + additional class CompareExpr extends Expr { + private Py::Compare cmp; + + CompareExpr() { this = TPyExpr(cmp) } + + Expr getLeft() { result.asExpr() = cmp.getLeft() } + + Expr getComparator(int n) { result.asExpr() = cmp.getComparator(n) } + + override AstNode getChild(int index) { + index = 0 and result = this.getLeft() + or + result = this.getComparator(index - 1) and index >= 1 + } + } + + /** A slice expression (`start:stop:step`). */ + additional class SliceExpr extends Expr { + private Py::Slice slice; + + SliceExpr() { this = TPyExpr(slice) } + + Expr getStart() { result.asExpr() = slice.getStart() } + + Expr getStop() { result.asExpr() = slice.getStop() } + + Expr getStep() { result.asExpr() = slice.getStep() } + + override AstNode getChild(int index) { + index = 0 and result = this.getStart() + or + index = 1 and result = this.getStop() + or + index = 2 and result = this.getStep() + } + } + + /** A starred expression (`*x`). */ + additional class StarredExpr extends Expr { + private Py::Starred starred; + + StarredExpr() { this = TPyExpr(starred) } + + Expr getValue() { result.asExpr() = starred.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** A formatted string literal (`f"...{expr}..."`). */ + additional class FstringExpr extends Expr { + private Py::Fstring fstring; + + FstringExpr() { this = TPyExpr(fstring) } + + Expr getValue(int n) { result.asExpr() = fstring.getValue(n) } + + override AstNode getChild(int index) { result = this.getValue(index) } + } + + /** A formatted value inside an f-string (`{expr}` or `{expr:spec}`). */ + additional class FormattedValueExpr extends Expr { + private Py::FormattedValue fv; + + FormattedValueExpr() { this = TPyExpr(fv) } + + Expr getValue() { result.asExpr() = fv.getValue() } + + Expr getFormatSpec() { result.asExpr() = fv.getFormatSpec() } + + override AstNode getChild(int index) { + index = 0 and result = this.getValue() + or + index = 1 and result = this.getFormatSpec() + } + } + + /** A `yield` expression. */ + additional class YieldExpr extends Expr { + private Py::Yield yield; + + YieldExpr() { this = TPyExpr(yield) } + + Expr getValue() { result.asExpr() = yield.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** A `yield from` expression. */ + additional class YieldFromExpr extends Expr { + private Py::YieldFrom yieldFrom; + + YieldFromExpr() { this = TPyExpr(yieldFrom) } + + Expr getValue() { result.asExpr() = yieldFrom.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** An `await` expression. */ + additional class AwaitExpr extends Expr { + private Py::Await await; + + AwaitExpr() { this = TPyExpr(await) } + + Expr getValue() { result.asExpr() = await.getValue() } + + override AstNode getChild(int index) { index = 0 and result = this.getValue() } + } + + /** + * A class definition expression (visits bases, but NOT PEP 695 type + * parameters — those bind in an annotation scope that nests the class + * body, so they belong to the inner scope's CFG, not the enclosing + * scope's; the legacy CFG also omitted them). + */ + additional class ClassDefExpr extends Expr { + private Py::ClassExpr classExpr; + + ClassDefExpr() { this = TPyExpr(classExpr) } + + Expr getBase(int n) { result.asExpr() = classExpr.getBase(n) } + + override AstNode getChild(int index) { result = this.getBase(index) } + } + + /** + * A function definition expression (visits positional and keyword + * defaults, but NOT PEP 695 type parameters — those bind in an + * annotation scope that nests the function body, so they belong to + * the inner scope's CFG, not the enclosing scope's; the legacy CFG + * also omitted them). + */ + additional class FunctionDefExpr extends Expr { + private Py::FunctionExpr funcExpr; + + FunctionDefExpr() { this = TPyExpr(funcExpr) } + + /** + * Gets the `n`th default for a positional argument, in evaluation + * order. Note that `Args.getDefault(int)` is indexed by argument + * position (with gaps for arguments without defaults), so we must + * renumber here to obtain contiguous indices. + */ + Expr getDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = funcExpr.getArgs().getDefault(i) | d order by i) + } + + /** Gets the `n`th default for a keyword-only argument, in evaluation order. */ + Expr getKwDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = funcExpr.getArgs().getKwDefault(i) | d order by i) + } + + int getNumberOfDefaults() { result = count(funcExpr.getArgs().getADefault()) } + + override AstNode getChild(int index) { + result = this.getDefault(index) + or + result = this.getKwDefault(index - this.getNumberOfDefaults()) + } + } + + /** A lambda expression (has default args evaluated at definition time). */ + additional class LambdaExpr extends Expr { + private Py::Lambda lambda; + + LambdaExpr() { this = TPyExpr(lambda) } + + /** Gets the `n`th default for a positional argument, in evaluation order. */ + Expr getDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = lambda.getArgs().getDefault(i) | d order by i) + } + + /** Gets the `n`th default for a keyword-only argument, in evaluation order. */ + Expr getKwDefault(int n) { + result.asExpr() = + rank[n + 1](Py::Expr d, int i | d = lambda.getArgs().getKwDefault(i) | d order by i) + } + + int getNumberOfDefaults() { result = count(lambda.getArgs().getADefault()) } + + override AstNode getChild(int index) { + result = this.getDefault(index) + or + result = this.getKwDefault(index - this.getNumberOfDefaults()) + } + } + + /** Gets the child of `n` at the specified (zero-based) index. */ + AstNode getChild(AstNode n, int index) { result = n.getChild(index) } +} + +private module Cfg0 = Make0; + +private import Cfg0 + +private module Cfg1 = Make1; + +private import Cfg1 + +private module Cfg2 = Make2; + +private import Cfg2 + +private module Input implements InputSig1, InputSig2 { + predicate cfgCachedStageRef() { CfgCachedStage::ref() } + + private newtype TLabel = TNone() + + class Label extends TLabel { + string toString() { result = "label" } + } + + class CallableContext = Void; + + predicate inConditionalContext(Ast::AstNode n, ConditionKind kind) { + kind.isBoolean() and + n = any(Ast::AssertStmt a).getTest() + } + + private string assertThrowTag() { result = "[assert-throw]" } + + predicate additionalNode(Ast::AstNode n, string tag, NormalSuccessor t) { + n instanceof Ast::AssertStmt and tag = assertThrowTag() and t instanceof DirectSuccessor + } + + predicate beginAbruptCompletion( + Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c, boolean always + ) { + ast instanceof Ast::AssertStmt and + n.isAdditional(ast, assertThrowTag()) and + c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and + always = true + } + + predicate endAbruptCompletion(Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c) { + none() + } + + predicate step(PreControlFlowNode n1, PreControlFlowNode n2) { + exists(Ast::AssertStmt assertStmt | + n1.isBefore(assertStmt) and + n2.isBefore(assertStmt.getTest()) + or + n1.isAfterTrue(assertStmt.getTest()) and + n2.isAfter(assertStmt) + or + n1.isAfterFalse(assertStmt.getTest()) and + ( + n2.isBefore(assertStmt.getMsg()) + or + not exists(assertStmt.getMsg()) and + n2.isAdditional(assertStmt, assertThrowTag()) + ) + or + n1.isAfter(assertStmt.getMsg()) and + n2.isAdditional(assertStmt, assertThrowTag()) + ) + } +} + +import CfgCachedStage +import Public + +/** + * Maps a CFG AST wrapper node to the corresponding Python AST node, if any. + * Entry, exit, and synthetic nodes have no corresponding Python AST node. + */ +Py::AstNode astNodeToPyNode(Ast::AstNode n) { + result = n.asExpr() + or + result = n.asStmt() + or + result = n.asScope() + or + result = n.asPattern() +} diff --git a/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected b/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected new file mode 100644 index 000000000000..91a01a3a3d93 --- /dev/null +++ b/python/ql/test/extractor-tests/syntax_error/CONSISTENCY/CfgConsistency.expected @@ -0,0 +1,4 @@ +consistencyOverview +| deadEnd | 1 | +deadEnd +| without_loop.py:7:5:7:9 | Break | diff --git a/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql new file mode 100644 index 000000000000..a507878911b1 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/BindingsTest.ql @@ -0,0 +1,32 @@ +/** + * Phase -1 of the dataflow CFG migration: verifies that every variable + * binding visible to the AST (`Name.defines(v)`) corresponds to a CFG node + * in the new CFG (`semmle.python.controlflow.internal.AstNodeImpl`). + * + * The expected tag is `cfgdefines=`. Each binding annotation in the + * test sources looks like `# $ cfgdefines=x` for a binding currently + * covered by the new CFG, or `# $ MISSING: cfgdefines=x` for a binding + * that is known to be uncovered (a "red" test case that should be + * green-flipped once the corresponding `cfg-ext-*` extension lands). + */ + +import python +import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +import utils.test.InlineExpectationsTest + +module CfgBindingsTest implements TestSig { + string getARelevantTag() { result = "cfgdefines" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(Name n, Variable v, CfgImpl::ControlFlowNode cfg | + n.defines(v) and + cfg.getAstNode().asExpr() = n and + location = n.getLocation() and + element = n.toString() and + tag = "cfgdefines" and + value = v.getId() + ) + } +} + +import MakeTest diff --git a/python/ql/test/library-tests/ControlFlow/bindings/annassign.py b/python/ql/test/library-tests/ControlFlow/bindings/annassign.py new file mode 100644 index 000000000000..7a9ae3ab6c79 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/annassign.py @@ -0,0 +1,13 @@ +# Annotated assignment (PEP 526). Both with and without an initializer. + +a: int = 1 # $ cfgdefines=a +b: str = "hi" # $ cfgdefines=b + +# Annotation without value: the AST records `c` as defined, +# and the new CFG now visits it via the AnnAssignStmt wrapper. +c: int # $ cfgdefines=c + +class K: # $ cfgdefines=K + field: int = 0 # $ cfgdefines=field + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/compound.py b/python/ql/test/library-tests/ControlFlow/bindings/compound.py new file mode 100644 index 000000000000..cb2f36f12ffe --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/compound.py @@ -0,0 +1,14 @@ +# Compound (tuple/list) assignment targets — actually wired in the new CFG. + +a, b = (1, 2) # $ cfgdefines=a cfgdefines=b +[c, d] = [3, 4] # $ cfgdefines=c cfgdefines=d + +# Nested unpacking. +(e, (f, g)) = (1, (2, 3)) # $ cfgdefines=e cfgdefines=f cfgdefines=g + +# Star unpacking. +h, *i = [1, 2, 3] # $ cfgdefines=h cfgdefines=i + +# Chained assignment with compound target. +j = k, l = (5, 6) # $ cfgdefines=j cfgdefines=k cfgdefines=l + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py b/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py new file mode 100644 index 000000000000..6b5f722c1f7e --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/comprehension.py @@ -0,0 +1,21 @@ +# Comprehension and `for` loop targets — wired in the new CFG. +# Comprehensions are nested function scopes with a synthetic `.0` parameter +# bound to the iterable. + +# Bare-name `for` target. +for i in range(3): # $ cfgdefines=i + pass + +# Compound `for` target. +for k, v in [(1, 2)]: # $ cfgdefines=k cfgdefines=v + pass + +# Comprehension targets. +_ = [x for x in range(3)] # $ cfgdefines=_ cfgdefines=x cfgdefines=.0 +_ = {y: z for y, z in []} # $ cfgdefines=_ cfgdefines=y cfgdefines=z cfgdefines=.0 +_ = (a for a in []) # $ cfgdefines=_ cfgdefines=a cfgdefines=.0 + +# Nested comprehensions. +_ = [b for c in [] for b in c] # $ cfgdefines=_ cfgdefines=c cfgdefines=b cfgdefines=.0 + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py new file mode 100644 index 000000000000..dbfb857b5360 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py @@ -0,0 +1,52 @@ +# Dead bindings under the "no expressions raise" CFG abstraction. +# +# The new CFG does not currently model raise edges from arbitrary +# expressions. As a consequence, code that is only reachable through +# exception flow is (correctly) classified as dead and has no CFG node. +# Variable bindings in dead code do not need CFG nodes - SSA / dataflow +# over dead code is moot. +# +# These tests act as a regression guard: the bindings below intentionally +# have no `cfgdefines=` annotations. If raise modelling is later added, +# the BindingsTest infrastructure will surface the new CFG nodes as +# unexpected results, and this file will need to be revisited. + + +def f(obj): # $ cfgdefines=f cfgdefines=obj + try: + return len(obj) + except TypeError: + pass + + # The first try's body always returns; its except handler does not + # raise or otherwise transfer control, so under "no expressions + # raise" the only paths out of the try-statement are dead. Everything + # below is unreachable. + try: + hint = type(obj).__length_hint__ + except AttributeError: + return None + return hint + + +def g(): # $ cfgdefines=g + try: + raise Exception("inner") + except: + raise Exception("outer") + else: + # Unreachable: the inner try body always raises, so the `else:` + # clause never runs. + hit_inner_else = True + + +def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key + try: + return cache[key] + except KeyError: + pass + + # Same pattern as `f`: dead under "no expressions raise". + value = compute(key) + cache[key] = value + return value diff --git a/python/ql/test/library-tests/ControlFlow/bindings/decorated.py b/python/ql/test/library-tests/ControlFlow/bindings/decorated.py new file mode 100644 index 000000000000..9b93c166acec --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/decorated.py @@ -0,0 +1,30 @@ +# Decorated `def`/`class` — wired in the new CFG. + + +def deco(f): # $ cfgdefines=deco cfgdefines=f + return f + + +@deco +def decorated_func(): # $ cfgdefines=decorated_func + pass + + +@deco +class DecoratedClass: # $ cfgdefines=DecoratedClass + pass + + +# Stacked decorators. +@deco +@deco +def doubly(): # $ cfgdefines=doubly + pass + + +# Inside a class body. +class Outer: # $ cfgdefines=Outer + @staticmethod + def inner(): # $ cfgdefines=inner + pass + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py b/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py new file mode 100644 index 000000000000..57b6c99fe9b6 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/except_handler.py @@ -0,0 +1,19 @@ +# Exception-handler name bindings. These are already wired in the new +# CFG provided the try body can raise; `raise` statements are reliably +# treated as exception sources. + +try: + raise ValueError("oops") +except ValueError as e: # $ cfgdefines=e + pass + +try: + raise TypeError("oops") +except (TypeError, KeyError) as err: # $ cfgdefines=err + pass + +# Exception groups (Python 3.11+). +try: + raise ValueError("oops") +except* ValueError as eg: # $ cfgdefines=eg + pass diff --git a/python/ql/test/library-tests/ControlFlow/bindings/imports.py b/python/ql/test/library-tests/ControlFlow/bindings/imports.py new file mode 100644 index 000000000000..c8834b5332a0 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/imports.py @@ -0,0 +1,14 @@ +# Import aliases — all bound names below are now reachable via the new +# CFG's `ImportStmt` wrapper. + +import os # $ cfgdefines=os +import os.path # $ cfgdefines=os +import os as o # $ cfgdefines=o +from os import path # $ cfgdefines=path +from os import path as p # $ cfgdefines=p +from os import sep, linesep # $ cfgdefines=sep cfgdefines=linesep +from os import ( + getcwd, # $ cfgdefines=getcwd + getcwdb, # $ cfgdefines=getcwdb +) + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py b/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py new file mode 100644 index 000000000000..0868a2680d0a --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/match_pattern.py @@ -0,0 +1,24 @@ +# Match-statement pattern bindings — wired in the new CFG. + +def f(subject): # $ cfgdefines=f cfgdefines=subject + match subject: + case x: # $ cfgdefines=x + pass + case [a, b]: # $ cfgdefines=a cfgdefines=b + pass + case {"k": v}: # $ cfgdefines=v + pass + case Point(p, q): # $ cfgdefines=p cfgdefines=q + pass + case [_, *rest]: # $ cfgdefines=rest + pass + case (1 | 2) as n: # $ cfgdefines=n + pass + + +class Point: # $ cfgdefines=Point + __match_args__ = ("x", "y") # $ cfgdefines=__match_args__ + x: int # $ cfgdefines=x + y: int # $ cfgdefines=y + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/parameters.py b/python/ql/test/library-tests/ControlFlow/bindings/parameters.py new file mode 100644 index 000000000000..7fe5e01e4c4b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/parameters.py @@ -0,0 +1,42 @@ +# Function parameters. + +def positional(a, b): # $ cfgdefines=positional cfgdefines=a cfgdefines=b + pass + + +def with_default(x=1, y=2): # $ cfgdefines=with_default cfgdefines=x cfgdefines=y + pass + + +def with_vararg(*args): # $ cfgdefines=with_vararg cfgdefines=args + pass + + +def with_kwarg(**kwargs): # $ cfgdefines=with_kwarg cfgdefines=kwargs + pass + + +def with_kwonly(*, k1, k2=5): # $ cfgdefines=with_kwonly cfgdefines=k1 cfgdefines=k2 + pass + + +def kitchen_sink(a, b=2, *args, k1, k2=5, **kw): # $ cfgdefines=kitchen_sink cfgdefines=a cfgdefines=b cfgdefines=args cfgdefines=k1 cfgdefines=k2 cfgdefines=kw + pass + + +# Methods get `self` / `cls`. +class C: # $ cfgdefines=C + def method(self, x): # $ cfgdefines=method cfgdefines=self cfgdefines=x + pass + + @classmethod + def cmethod(cls, x): # $ cfgdefines=cmethod cfgdefines=cls cfgdefines=x + pass + + +# Lambda parameter. +_ = lambda p: p + 1 # $ cfgdefines=_ cfgdefines=p + +# PEP 570 positional-only. +def pos_only(a, b, /, c): # $ cfgdefines=pos_only cfgdefines=a cfgdefines=b cfgdefines=c + pass diff --git a/python/ql/test/library-tests/ControlFlow/bindings/simple.py b/python/ql/test/library-tests/ControlFlow/bindings/simple.py new file mode 100644 index 000000000000..51cb7d828c91 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/simple.py @@ -0,0 +1,14 @@ +# Simple bindings that should already work in the new CFG. +# No MISSING annotations expected. + +x = 1 # $ cfgdefines=x +y = x + 1 # $ cfgdefines=y + +def f(): # $ cfgdefines=f + pass + +class C: # $ cfgdefines=C + pass + +# Re-assignment. +x = 2 # $ cfgdefines=x diff --git a/python/ql/test/library-tests/ControlFlow/bindings/type_params.py b/python/ql/test/library-tests/ControlFlow/bindings/type_params.py new file mode 100644 index 000000000000..2bd34dc3f0ee --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/type_params.py @@ -0,0 +1,21 @@ +# PEP 695 type parameters (Python 3.12+). + +# PEP 695 type-param names on `def`/`class` bind in an annotation scope +# that nests the function/class body — they have no CFG node in the +# enclosing scope (matching the legacy CFG). +def func[T](x: T) -> T: # $ cfgdefines=func cfgdefines=x + return x + + +class Box[T]: # $ cfgdefines=Box + item: T # $ cfgdefines=item + + +# Multi-parameter, with bound and variadics. +def multi[T: int, *Ts, **P](x: T, *args: *Ts, **kwargs: P.kwargs) -> T: # $ cfgdefines=multi cfgdefines=x cfgdefines=args cfgdefines=kwargs + return x + + +# `type` statement (PEP 695). +type Alias[T] = list[T] # $ cfgdefines=Alias cfgdefines=T + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py b/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py new file mode 100644 index 000000000000..5c0c1bd83191 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/walrus_starred.py @@ -0,0 +1,14 @@ +# Walrus and starred-target edge cases — wired in the new CFG. + +# Walrus in expression context. +if (y := 5) > 0: # $ cfgdefines=y + pass + +# Walrus in a comprehension. The comprehension introduces a synthetic +# `.0` parameter bound to the iterable. +_ = [w for _ in range(3) if (w := 1)] # $ cfgdefines=_ cfgdefines=w cfgdefines=.0 + +# Starred target in a Tuple LHS. +*head, tail = [1, 2, 3] # $ cfgdefines=head cfgdefines=tail + + diff --git a/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py b/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py new file mode 100644 index 000000000000..5fffe46c5d40 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/bindings/with_stmt.py @@ -0,0 +1,21 @@ +# `with cm() as x:` bindings — wired in the new CFG. + +class CM: # $ cfgdefines=CM + def __enter__(self): return self # $ cfgdefines=__enter__ cfgdefines=self + def __exit__(self, *a): pass # $ cfgdefines=__exit__ cfgdefines=self cfgdefines=a + +with CM() as x: # $ cfgdefines=x + pass + +# Multiple items. +with CM() as a, CM() as b: # $ cfgdefines=a cfgdefines=b + pass + +# Parenthesised form (Python 3.10+). +with (CM() as p, CM() as q): # $ cfgdefines=p cfgdefines=q + pass + +# Compound target in `with`. +with CM() as (m, n): # $ cfgdefines=m cfgdefines=n + pass + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql index 886ccb4c3489..de44daa3e2c2 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/AllLiveReachable.ql @@ -5,6 +5,8 @@ * have separate CFGs and are excluded from this check. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql index 04c01abf8a67..5311d118576b 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/AnnotationHasCfgNode.ql @@ -2,6 +2,8 @@ * Checks that every timer annotation has a corresponding CFG node. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql index 691144e06e4f..0a2b08ff3fdd 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockAnnotationGap.ql @@ -8,6 +8,8 @@ * edge leaves the basic block and the normal successor may be dead. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected index 910fd3c8a80d..c5ef1ba93945 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:9:59:9:59 | IntegerLiteral | timestamp 2 | test_boolean.py:9:19:9:19 | IntegerLiteral | timestamp 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:15:50:15:50 | IntegerLiteral | timestamp 1 | test_boolean.py:15:20:15:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:21:49:21:49 | IntegerLiteral | timestamp 1 | test_boolean.py:21:19:21:19 | IntegerLiteral | timestamp 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:27:50:27:50 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:27:59:27:59 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:40:86:40:86 | IntegerLiteral | timestamp 3 | test_boolean.py:40:16:40:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:46:86:46:86 | IntegerLiteral | timestamp 3 | test_boolean.py:46:16:46:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Basic block ordering: $@ appears before $@ | test_boolean.py:52:120:52:120 | IntegerLiteral | timestamp 4 | test_boolean.py:52:20:52:20 | IntegerLiteral | timestamp 0 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql index 6c08d44a5a59..30697f1403e2 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/BasicBlockOrdering.ql @@ -3,6 +3,8 @@ * increasing minimum-timestamp order. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql index 01ff59b49bf6..709fd5665ea4 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/ConsecutiveTimestamps.ql @@ -11,6 +11,8 @@ * lambdas that have annotations in nested scopes). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql index f18c52750b52..456ebf447dad 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/ContiguousTimestamps.ql @@ -4,6 +4,7 @@ * in at least one annotation (live or dead). */ +import python import TimerUtils from TestFunction f, int missing, int maxTs, TimerAnnotation maxAnn diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql index 9fbb9115814a..b09a936a0a40 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NeverReachable.ql @@ -4,6 +4,8 @@ * entry (including within the same basic block). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql new file mode 100644 index 000000000000..75f02d14a9cb --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAllLiveReachable.ql @@ -0,0 +1,14 @@ +/** New-CFG version of AllLiveReachable. */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TestFunction f +where allLiveReachable(a, f) +select a, "Unreachable live annotation; entry of $@ does not reach this node", f, f.getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql new file mode 100644 index 000000000000..4b1d82e27e67 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgAnnotationHasCfgNode.ql @@ -0,0 +1,18 @@ +/** + * New-CFG version of AnnotationHasCfgNode. + * + * Checks that every timer annotation has a corresponding CFG node. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils::CfgTests + +from TimerAnnotation ann +where annotationWithoutCfgNode(ann) +select ann, "Annotation in $@ has no CFG node", ann.getTestFunction(), + ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql new file mode 100644 index 000000000000..80dd759a3651 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockAnnotationGap.ql @@ -0,0 +1,26 @@ +/** + * New-CFG version of BasicBlockAnnotationGap. + * + * Original: + * Checks that within a basic block, if a node is annotated then its + * successor is also annotated (or excluded). A gap in annotations + * within a basic block indicates a missing annotation, since there + * are no branches to justify the gap. + * + * Nodes with exceptional successors are excluded, as the exception + * edge leaves the basic block and the normal successor may be dead. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, CfgNode succ +where basicBlockAnnotationGap(a, succ) +select a, "Annotated node followed by unannotated $@ in the same basic block", succ, + succ.getNode().toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql new file mode 100644 index 000000000000..f06d08d937e3 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBasicBlockOrdering.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of BasicBlockOrdering. + * + * Original: + * Checks that within a single basic block, annotations appear in + * increasing minimum-timestamp order. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int minA, int minB +where basicBlockOrdering(a, b, minA, minB) +select a, "Basic block ordering: $@ appears before $@", a.getTimestampExpr(minA), + "timestamp " + minA, b.getTimestampExpr(minB), "timestamp " + minB diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql new file mode 100644 index 000000000000..cfd8ffb4e4bd --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgBranchTimestamps.ql @@ -0,0 +1,80 @@ +/** + * New-CFG version of BranchTimestamps. + * + * Checks that when a node has both a true and false successor, the + * live timestamps on one branch are marked as dead on the other. + * This ensures that boolean branches are fully annotated with dead() + * markers for the paths not taken. + * + * Limitation: the `@ t[ts, ...]` / `dead(ts)` annotation scheme can only + * model branch-dead-ness for plain boolean control flow that reconverges + * linearly after the split — i.e. `if`-with-else and `if`-expression. + * It cannot model: + * + * * loops (`while` / `for`): body timestamps repeat across iterations, + * so the loop-exit annotation can't list them as dead; + * * `match` statements: each `case` body is a syntactically distinct + * sub-tree, and the branches don't reconverge through a common + * annotation point in the timeline; + * * `try` / `with` and `raise` / `assert`: exception edges are modelled + * as true/false but flow to syntactically distinct handlers, with no + * reconvergence in the linear annotation order; + * * short-circuit `and` / `or` (`BoolExpr`): the branches reconverge at + * the BoolExpr's after-node, so timestamps on one branch are live + * downstream of the other rather than dead; + * * `if` without an `else` clause, and `if`/`elif` chains: the false + * branch reconverges with the true branch at the post-if statement + * (no-else) or fans out across multiple elif-test annotations, + * neither of which fit the binary annotation scheme. + * + * Branch nodes inside those constructs are therefore whitelisted out + * below. The check still fires (and is useful) for plain `if`/`else` + * and conditional-expression branching. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +/** + * Holds if `f` contains a construct whose branches the linear-timestamp + * annotation scheme cannot describe (see file-level comment). + */ +private predicate hasUnmodellableBranching(Function f) { + exists(AstNode bad | + bad.getScope() = f and + ( + bad instanceof While + or + bad instanceof For + or + bad instanceof MatchStmt + or + bad instanceof Try + or + bad instanceof With + or + bad instanceof Raise + or + bad instanceof Assert + or + bad instanceof BoolExpr + or + bad instanceof If and + (not exists(bad.(If).getAnOrelse()) or bad.(If).isElif()) + ) + ) +} + +from TimerCfgNode node, int ts, string branch +where + missingBranchTimestamp(node, ts, branch) and + not hasUnmodellableBranching(node.getTestFunction()) +select node, + "Timestamp " + ts + " on true/false branch is missing a dead() annotation on the " + branch + + " successor in $@", node.getTestFunction(), node.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql new file mode 100644 index 000000000000..3feacae264e5 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutivePredecessorTimestamps.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of ConsecutivePredecessorTimestamps. + * + * Checks that each annotated node (except the minimum timestamp) has + * a predecessor annotation with timestamp `a - 1`. This is the reverse + * of ConsecutiveTimestamps: it catches nodes that are reachable but + * arrived at from the wrong place (skipping an intermediate node). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerAnnotation ann, int a +where consecutivePredecessorTimestamps(ann, a) +select ann, "$@ in $@ has no consecutive predecessor (expected " + (a - 1) + ")", + ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql new file mode 100644 index 000000000000..8e52663d6eaf --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgConsecutiveTimestamps.ql @@ -0,0 +1,29 @@ +/** + * New-CFG version of ConsecutiveTimestamps. + * + * Original: + * Checks that consecutive annotated nodes have consecutive timestamps: + * for each annotation with timestamp `a`, some CFG node for that annotation + * must have a next annotation containing `a + 1`. + * + * Handles CFG splitting (e.g., finally blocks duplicated for normal/exceptional + * flow) by checking that at least one split has the required successor. + * + * Only applies to functions where all annotations are in the function's + * own scope (excludes tests with generators, async, comprehensions, or + * lambdas that have annotations in nested scopes). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerAnnotation ann, int a +where consecutiveTimestamps(ann, a) +select ann, "$@ in $@ has no consecutive successor (expected " + (a + 1) + ")", + ann.getTimestampExpr(a), "Timestamp " + a, ann.getTestFunction(), ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll new file mode 100644 index 000000000000..1da80d2ee0dd --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgImpl.qll @@ -0,0 +1,101 @@ +/** + * Implementation of the evaluation-order CFG signature using the new + * shared control flow graph from AstNodeImpl. + */ + +private import python as Py +import TimerUtils +private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +private import codeql.controlflow.SuccessorType + +private class NewControlFlowNode = CfgImpl::ControlFlowNode; + +private class NewBasicBlock = CfgImpl::BasicBlock; + +/** New (shared) CFG implementation of the evaluation-order signature. */ +module NewCfg implements EvalOrderCfgSig { + class CfgNode instanceof NewControlFlowNode { + // Use the post-order representative for each AST node: the "after" node. + // For simple leaf nodes this is the merged before/after node. For + // post-order expressions this is the TAstNode. For pre-order expressions + // (and/or/not/ternary) this uses an AfterValueNode, which places the + // expression after its operands — matching the timer test expectations. + CfgNode() { NewControlFlowNode.super.isAfter(_) } + + string toString() { result = NewControlFlowNode.super.toString() } + + Py::Location getLocation() { result = NewControlFlowNode.super.getLocation() } + + Py::AstNode getNode() { + result = CfgImpl::astNodeToPyNode(NewControlFlowNode.super.getAstNode()) + } + + CfgNode getASuccessor() { nextCfgNode(this, result) } + + CfgNode getATrueSuccessor() { + NewControlFlowNode.super.isAfterTrue(_) and + // Only where there's also a false branch (true boolean split) + exists(NewControlFlowNode other | other.isAfterFalse(NewControlFlowNode.super.getAstNode())) and + nextCfgNodeFrom(this, result) + } + + CfgNode getAFalseSuccessor() { + NewControlFlowNode.super.isAfterFalse(_) and + // Only where there's also a true branch (true boolean split) + exists(NewControlFlowNode other | other.isAfterTrue(NewControlFlowNode.super.getAstNode())) and + nextCfgNodeFrom(this, result) + } + + CfgNode getAnExceptionalSuccessor() { + exists(NewControlFlowNode mid | + mid = NewControlFlowNode.super.getAnExceptionSuccessor() and + nextCfgNodeFrom(mid, result) + ) + } + + Py::Scope getScope() { result = NewControlFlowNode.super.getEnclosingCallable().asScope() } + + BasicBlock getBasicBlock() { + exists(NewBasicBlock bb, int i | bb.getNode(i) = this and result = bb) + } + } + + /** + * Holds if `next` is the nearest CfgNode reachable from `n` via + * one or more raw CFG successor edges, skipping non-CfgNode intermediaries. + */ + private predicate nextCfgNodeFrom(NewControlFlowNode n, CfgNode next) { + next = n.getASuccessor() + or + exists(NewControlFlowNode mid | + mid = n.getASuccessor() and + not mid instanceof CfgNode and + nextCfgNodeFrom(mid, next) + ) + } + + /** + * Holds if `next` is the nearest CfgNode successor of `n`, + * skipping synthetic intermediate nodes. + */ + private predicate nextCfgNode(CfgNode n, CfgNode next) { nextCfgNodeFrom(n, next) } + + class BasicBlock instanceof NewBasicBlock { + string toString() { result = NewBasicBlock.super.toString() } + + CfgNode getNode(int n) { result = NewBasicBlock.super.getNode(n) } + + predicate reaches(BasicBlock bb) { this = bb or this.strictlyReaches(bb) } + + predicate strictlyReaches(BasicBlock bb) { NewBasicBlock.super.getASuccessor+() = bb } + + predicate strictlyDominates(BasicBlock bb) { NewBasicBlock.super.strictlyDominates(bb) } + } + + CfgNode scopeGetEntryNode(Py::Scope s) { + exists(CfgImpl::ControlFlow::EntryNode entry | + entry.getEnclosingCallable().asScope() = s and + nextCfgNodeFrom(entry, result) + ) + } +} diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql new file mode 100644 index 000000000000..6949b2cc6e9b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNeverReachable.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of NeverReachable. + * + * Original: + * Checks that expressions annotated with `t.never` either have no CFG + * node, or if they do, that the node is not reachable from its scope's + * entry (including within the same basic block). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils::CfgTests + +from TimerAnnotation ann +where neverReachable(ann) +select ann, "Node annotated with t.never is reachable in $@", ann.getTestFunction(), + ann.getTestFunction().getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql new file mode 100644 index 000000000000..442ca5f5456c --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBackwardFlow.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of NoBackwardFlow. + * + * Original: + * Checks that time never flows backward between consecutive timer annotations + * in the CFG. For each pair of consecutive annotated nodes (A -> B), there must + * exist timestamps a in A and b in B with a < b. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int minA, int maxB +where noBackwardFlow(a, b, minA, maxB) +select a, "Backward flow: $@ flows to $@ (max timestamp $@)", a.getTimestampExpr(minA), + minA.toString(), b, b.getNode().toString(), b.getTimestampExpr(maxB), maxB.toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.expected @@ -0,0 +1 @@ + diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql new file mode 100644 index 000000000000..e07890f72502 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoBasicBlock.ql @@ -0,0 +1,18 @@ +/** + * New-CFG version of NoBasicBlock. + * + * Checks that every annotated CFG node belongs to a basic block. + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from CfgNode n, TestFunction f +where noBasicBlock(n, f) +select n, "CFG node in $@ does not belong to any basic block", f, f.getName() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql new file mode 100644 index 000000000000..5a1a1aba2a7a --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgNoSharedReachable.ql @@ -0,0 +1,21 @@ +/** + * New-CFG version of NoSharedReachable. + * + * Original: + * Checks that two annotations sharing a timestamp value are on + * mutually exclusive CFG paths (neither can reach the other). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int ts +where noSharedReachable(a, b, ts) +select a, "Shared timestamp $@ but this node reaches $@", a.getTimestampExpr(ts), ts.toString(), b, + b.getNode().toString() diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql new file mode 100644 index 000000000000..ebbc60346db0 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NewCfgStrictForward.ql @@ -0,0 +1,22 @@ +/** + * New-CFG version of StrictForward. + * + * Original: + * Stronger version of NoBackwardFlow: for consecutive annotated nodes + * A -> B that both have a single timestamp (non-loop code) and B does + * NOT dominate A (forward edge), requires max(A) < min(B). + */ + +import python +import TimerUtils +import NewCfgImpl + +private module Utils = EvalOrderCfgUtils; + +private import Utils +private import Utils::CfgTests + +from TimerCfgNode a, TimerCfgNode b, int maxA, int minB +where strictForward(a, b, maxA, minB) +select a, "Strict forward violation: $@ flows to $@", a.getTimestampExpr(maxA), "timestamp " + maxA, + b.getTimestampExpr(minB), "timestamp " + minB diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected index 6e8ea12c9dd4..775cc7bdbbfd 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:9:59:9:59 | IntegerLiteral | 2 | test_boolean.py:9:10:9:13 | ControlFlowNode for True | True | test_boolean.py:9:19:9:19 | IntegerLiteral | 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:15:50:15:50 | IntegerLiteral | 1 | test_boolean.py:15:10:15:14 | ControlFlowNode for False | False | test_boolean.py:15:20:15:20 | IntegerLiteral | 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:21:49:21:49 | IntegerLiteral | 1 | test_boolean.py:21:10:21:13 | ControlFlowNode for True | True | test_boolean.py:21:19:21:19 | IntegerLiteral | 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:27:50:27:50 | IntegerLiteral | 2 | test_boolean.py:27:10:27:14 | ControlFlowNode for False | False | test_boolean.py:27:20:27:20 | IntegerLiteral | 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:27:59:27:59 | IntegerLiteral | 2 | test_boolean.py:27:10:27:14 | ControlFlowNode for False | False | test_boolean.py:27:20:27:20 | IntegerLiteral | 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:40:86:40:86 | IntegerLiteral | 3 | test_boolean.py:40:10:40:10 | ControlFlowNode for IntegerLiteral | IntegerLiteral | test_boolean.py:40:16:40:16 | IntegerLiteral | 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:46:86:46:86 | IntegerLiteral | 3 | test_boolean.py:46:10:46:10 | ControlFlowNode for IntegerLiteral | IntegerLiteral | test_boolean.py:46:16:46:16 | IntegerLiteral | 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Backward flow: $@ flows to $@ (max timestamp $@) | test_boolean.py:52:120:52:120 | IntegerLiteral | 4 | test_boolean.py:52:11:52:47 | ControlFlowNode for BoolExpr | BoolExpr | test_boolean.py:52:63:52:63 | IntegerLiteral | 2 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql index e9926284295f..4acf45db3cda 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBackwardFlow.ql @@ -4,6 +4,8 @@ * exist timestamps a in A and b in B with a < b. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql index 82d9589a9750..5568bd2a9a4a 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoBasicBlock.ql @@ -2,6 +2,8 @@ * Checks that every annotated CFG node belongs to a basic block. */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql index e9f685e8ffae..1fcceb2aca98 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/NoSharedReachable.ql @@ -3,6 +3,8 @@ * mutually exclusive CFG paths (neither can reach the other). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll b/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll index cb7bbb495b87..fc52c8dd3ed1 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/OldCfgImpl.qll @@ -3,14 +3,14 @@ * Python control flow graph. */ -private import python as PY +private import python as Py import TimerUtils /** Existing Python CFG implementation of the evaluation-order signature. */ module OldCfg implements EvalOrderCfgSig { - class CfgNode = PY::ControlFlowNode; + class CfgNode = Py::ControlFlowNode; - class BasicBlock = PY::BasicBlock; + class BasicBlock = Py::BasicBlock; - CfgNode scopeGetEntryNode(PY::Scope s) { result = s.getEntryNode() } + CfgNode scopeGetEntryNode(Py::Scope s) { result = s.getEntryNode() } } diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected index 6562ff9f7b2f..34e050b0f8aa 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.expected @@ -1,7 +1,7 @@ | test_boolean.py:9:10:9:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:9:59:9:59 | IntegerLiteral | timestamp 2 | test_boolean.py:9:19:9:19 | IntegerLiteral | timestamp 0 | | test_boolean.py:15:10:15:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:15:50:15:50 | IntegerLiteral | timestamp 1 | test_boolean.py:15:20:15:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:21:10:21:42 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:21:49:21:49 | IntegerLiteral | timestamp 1 | test_boolean.py:21:19:21:19 | IntegerLiteral | timestamp 0 | -| test_boolean.py:27:10:27:34 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:27:50:27:50 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | +| test_boolean.py:27:10:27:43 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:27:59:27:59 | IntegerLiteral | timestamp 2 | test_boolean.py:27:20:27:20 | IntegerLiteral | timestamp 0 | | test_boolean.py:40:10:40:61 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:40:86:40:86 | IntegerLiteral | timestamp 3 | test_boolean.py:40:16:40:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:46:10:46:61 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:46:86:46:86 | IntegerLiteral | timestamp 3 | test_boolean.py:46:16:46:16 | IntegerLiteral | timestamp 0 | | test_boolean.py:52:10:52:95 | ControlFlowNode for BoolExpr | Strict forward violation: $@ flows to $@ | test_boolean.py:52:120:52:120 | IntegerLiteral | timestamp 4 | test_boolean.py:52:63:52:63 | IntegerLiteral | timestamp 2 | diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql index 79b383a4acfa..9e64770bab4d 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/StrictForward.ql @@ -4,6 +4,8 @@ * NOT dominate A (forward edge), requires max(A) < min(B). */ +import python +import TimerUtils import OldCfgImpl private module Utils = EvalOrderCfgUtils; diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py index a12975634f49..a3b2268a8315 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_boolean.py @@ -24,7 +24,7 @@ def test_or_short_circuit(t): @test def test_or_both_sides(t): # False or X — both operands evaluated, result is X - x = (False @ t[0] or 42 @ t[1]) @ t[dead(1), 2] + x = (False @ t[0] or 42 @ t[1, dead(2)]) @ t[dead(1), 2] @test diff --git a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py index 8880aaaef348..a6eb6c7d5cac 100644 --- a/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py +++ b/python/ql/test/library-tests/ControlFlow/evaluation-order/test_if.py @@ -85,7 +85,7 @@ def test_nested_if_else(t): else: z = 2 @ t[dead(4)] else: - z = 3 @ t[dead(4)] + z = 3 @ t[dead(3), dead(4)] w = 0 @ t[5] From 6c03194092cb12c423b95b26e00b8bf550f3bf44 Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 2 Jun 2026 14:09:28 +0000 Subject: [PATCH 2/3] Python: add new shared-CFG-backed control flow graph facade (Cfg) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the public facade on top of the AstNodeImpl adapter from the previous commit. Re-exposes the same API surface as semmle/python/Flow.qll (ControlFlowNode, CallNode, BasicBlock, NameNode, DefinitionNode, CompareNode, ...), backed by the shared codeql.controlflow.ControlFlowGraph library. - semmle.python.controlflow.internal.Cfg — public facade. - ControlFlow/store-load/* — basic store/load coverage via the facade. The new CFG library is added additively: it has zero callers in lib/ and src/, and the legacy CFG in semmle/python/Flow.qll remains the default. Dataflow, SSA, and production query migration land in follow-up PRs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../change-notes/2026-05-19-add-shared-cfg.md | 4 + .../python/controlflow/internal/Cfg.qll | 1023 +++++++++++++++++ .../store-load/StoreLoadTest.expected | 0 .../ControlFlow/store-load/StoreLoadTest.ql | 41 + .../ControlFlow/store-load/test.py | 56 + 5 files changed, 1124 insertions(+) create mode 100644 python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md create mode 100644 python/ql/lib/semmle/python/controlflow/internal/Cfg.qll create mode 100644 python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.expected create mode 100644 python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql create mode 100644 python/ql/test/library-tests/ControlFlow/store-load/test.py diff --git a/python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md b/python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md new file mode 100644 index 000000000000..913f95320d87 --- /dev/null +++ b/python/ql/lib/change-notes/2026-05-19-add-shared-cfg.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* A new Python control flow graph implementation has been added under `semmle.python.controlflow.internal.Cfg` (backed by `AstNodeImpl.qll`), built on the shared `codeql.controlflow.ControlFlowGraph` library. It is not yet used by the dataflow library or any production query; the legacy CFG in `semmle/python/Flow.qll` remains the default. The new library is exposed for tests and for upcoming migrations. diff --git a/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll new file mode 100644 index 000000000000..e56207d8af94 --- /dev/null +++ b/python/ql/lib/semmle/python/controlflow/internal/Cfg.qll @@ -0,0 +1,1023 @@ +/** + * Provides a Python control flow graph facade backed by the shared + * `codeql.controlflow.ControlFlowGraph` library (via `AstNodeImpl.qll`). + * + * This module re-exposes the same API surface as `semmle/python/Flow.qll` + * (the legacy CFG), but is implemented on the new shared CFG. It is + * intended as a drop-in replacement for use by the Python dataflow library + * and other downstream code. + * + * Layering follows the Java pattern (`java/ql/lib/semmle/code/java/Expr.qll` + * and `SsaImpl.qll`): variable identity and similar AST-level semantics + * live on the Python AST classes (`Name.defines(v)`, `Name.uses(v)`, ...); + * the CFG layer is purely positional, with `toAst` / `getNode` bridging + * back to the AST. The shared SSA library can then be parameterized on + * (`BasicBlock`, `int`) directly, with no CFG-level variable predicates. + */ +overlay[local?] +module; + +private import python as Py +private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +private import codeql.controlflow.SuccessorType +private import codeql.controlflow.BasicBlock as BB + +/** + * A nested sub-module that explicitly implements `BB::CfgSig`, so this + * `Cfg` facade can be passed to parameterised shared modules such as + * `codeql.dataflow.VariableCapture::Flow`. The sub-module + * exposes the *raw* shared-CFG types from `AstNodeImpl.qll` (where the + * signature is satisfied natively), not the facade's wrapped types. + */ +module CfgSigImpl implements BB::CfgSig { + class ControlFlowNode = CfgImpl::ControlFlowNode; + + class BasicBlock = CfgImpl::BasicBlock; + + class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock; + + predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2; +} + +/** + * Gets the Python AST node corresponding to CFG node `n`, if any. + * + * Multiple CFG nodes may map to the same AST node (e.g. `TBeforeNode(Call)` + * and `TAstNode(Call)` both map to `Py::Call`). This is a pure translation; + * uniqueness constraints are enforced at the dataflow layer where needed. + */ +private Py::AstNode toAst(CfgImpl::ControlFlowNode n) { + result = CfgImpl::astNodeToPyNode(n.getAstNode()) +} + +/** + * A control flow node. + * + * This is the full set of CFG nodes from the shared library — it includes + * before-nodes, in-order/post-order nodes, after-value-split nodes, and + * entry/exit nodes. This enables full control-flow-level reasoning and + * compatibility with the shared control-flow reachability library. + * + * AST-level semantics (`getNode()`, `isLoad()`, typed wrappers, etc.) + * are available only on the `injects` (canonical) node for each AST node. + * Non-injects nodes are purely positional CFG nodes with no AST mapping. + */ +class ControlFlowNode extends CfgImpl::ControlFlowNode { + + /** Gets the syntactic element corresponding to this flow node, if any. */ + Py::AstNode getNode() { result = toAst(this) } + + /** Gets a predecessor of this flow node. */ + ControlFlowNode getAPredecessor() { this = result.getASuccessor() } + + /** Gets a successor of this flow node. */ + ControlFlowNode getASuccessor() { result = super.getASuccessor() } + + /** Gets a successor for this node if the relevant condition is True. */ + ControlFlowNode getATrueSuccessor() { + result = super.getASuccessor(any(BooleanSuccessor t | t.getValue() = true)) + } + + /** Gets a successor for this node if the relevant condition is False. */ + ControlFlowNode getAFalseSuccessor() { + result = super.getASuccessor(any(BooleanSuccessor t | t.getValue() = false)) + } + + /** Gets a successor for this node if an exception is raised. */ + ControlFlowNode getAnExceptionalSuccessor() { result = super.getAnExceptionSuccessor() } + + /** Gets a successor for this node if no exception is raised. */ + ControlFlowNode getANormalSuccessor() { result = super.getANormalSuccessor() } + + /** Gets the basic block containing this flow node. */ + BasicBlock getBasicBlock() { result = super.getBasicBlock() } + + /** Gets the scope containing this flow node. */ + Py::Scope getScope() { result = super.getEnclosingCallable().asScope() } + + /** Gets the enclosing module. */ + Py::Module getEnclosingModule() { result = this.getScope().getEnclosingModule() } + + /** Gets the immediate dominator of this flow node. */ + ControlFlowNode getImmediateDominator() { + // Defined positionally via the basic-block dominance tree. + exists(BasicBlock bb, int i | bb.getNode(i) = this | + // Predecessor within the same basic block. + i > 0 and result = bb.getNode(i - 1) + or + // First node of `bb`: dominator is the last node of the immediate dominator block. + i = 0 and result = bb.getImmediateDominator().getLastNode() + ) + } + + /** Holds if this strictly dominates `other`. */ + pragma[inline] + predicate strictlyDominates(ControlFlowNode other) { super.strictlyDominates(other) } + + /** Holds if this dominates `other` (reflexively). */ + pragma[inline] + predicate dominates(ControlFlowNode other) { super.dominates(other) } + + /** Holds if this is the first node in its enclosing scope. */ + predicate isEntryNode() { this instanceof CfgImpl::ControlFlow::EntryNode } + + /** Holds if this is the first node of a module. */ + predicate isModuleEntry() { + this.isEntryNode() and super.getAstNode().asScope() instanceof Py::Module + } + + /** Holds if this node may exit its scope by raising an exception. */ + predicate isExceptionalExit(Py::Scope s) { + this instanceof CfgImpl::ControlFlow::ExceptionalExitNode and + super.getEnclosingCallable().asScope() = s + } + + /** Holds if this node is a normal (non-exceptional) exit. */ + predicate isNormalExit() { this instanceof CfgImpl::ControlFlow::NormalExitNode } + + // ===== AST-shape predicates (bridges to the wrapped Python AST) ===== + /** + * Holds if this flow node is a load (including those in augmented + * assignments). + * + * Note: an augmented-assignment target (`x[i]` in `x[i] += 1`) is + * both a load and a store — `isLoad` and `isStore` both hold on the + * canonical CFG node. This mirrors Java's `VarAccess.isVarRead`, + * which holds on the destination of compound and unary assignments + * even though the destination is also a write. + */ + predicate isLoad() { exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 3, e)) } + + /** Holds if this flow node is a store (including those in augmented assignments). */ + predicate isStore() { + exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 5, e) or augstore(_, this)) + } + + /** Holds if this flow node is a delete. */ + predicate isDelete() { exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 2, e)) } + + /** Holds if this flow node is a parameter. */ + predicate isParameter() { exists(Py::Expr e | e = toAst(this) | py_expr_contexts(_, 4, e)) } + + /** Holds if this flow node is a store in an augmented assignment. */ + predicate isAugStore() { augstore(_, this) } + + /** Holds if this flow node is a load in an augmented assignment. */ + predicate isAugLoad() { augstore(this, _) } + + /** Holds if this flow node corresponds to a literal. */ + predicate isLiteral() { + toAst(this) instanceof Py::Bytes or + toAst(this) instanceof Py::Dict or + toAst(this) instanceof Py::DictComp or + toAst(this) instanceof Py::Set or + toAst(this) instanceof Py::SetComp or + toAst(this) instanceof Py::Ellipsis or + toAst(this) instanceof Py::GeneratorExp or + toAst(this) instanceof Py::Lambda or + toAst(this) instanceof Py::ListComp or + toAst(this) instanceof Py::List or + toAst(this) instanceof Py::Num or + toAst(this) instanceof Py::Tuple or + toAst(this) instanceof Py::Unicode or + toAst(this) instanceof Py::NameConstant + } + + /** Holds if this flow node corresponds to an attribute expression. */ + predicate isAttribute() { toAst(this) instanceof Py::Attribute } + + /** Holds if this flow node corresponds to a subscript expression. */ + predicate isSubscript() { toAst(this) instanceof Py::Subscript } + + /** Holds if this flow node corresponds to an import member. */ + predicate isImportMember() { toAst(this) instanceof Py::ImportMember } + + /** Holds if this flow node corresponds to a call. */ + predicate isCall() { toAst(this) instanceof Py::Call } + + /** Holds if this flow node corresponds to an import. */ + predicate isImport() { toAst(this) instanceof Py::ImportExpr } + + /** Holds if this flow node corresponds to a conditional expression. */ + predicate isIfExp() { toAst(this) instanceof Py::IfExp } + + /** Holds if this flow node corresponds to a function definition expression. */ + predicate isFunction() { toAst(this) instanceof Py::FunctionExpr } + + /** Holds if this flow node corresponds to a class definition expression. */ + predicate isClass() { toAst(this) instanceof Py::ClassExpr } + + /** + * Holds if this flow node is a branch (i.e. has both a true and a + * false successor). + */ + predicate isBranch() { exists(this.getATrueSuccessor()) or exists(this.getAFalseSuccessor()) } + + /** + * Gets a CFG child of this node, defined as a CFG node whose AST node + * is a child of this CFG node's AST node, restricted to nodes that + * dominate this one (so the child has been evaluated by the time we + * reach this node). + * + * Mirrors `Flow.qll`'s `getAChild`. UnaryExprNode is excluded because + * its operand is its CFG predecessor (handled separately). + */ + pragma[nomagic] + ControlFlowNode getAChild() { + toAst(this).(Py::Expr).getAChildNode() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) and + not this instanceof UnaryExprNode + } + + /** Holds if this flow node strictly reaches `other`. */ + predicate strictlyReaches(ControlFlowNode other) { this.getASuccessor+() = other } +} + +/** + * Holds if `load` is the load half of an augmented-assignment target, + * and `store` is the corresponding store half. + * + * In the legacy CFG (`Flow.qll`) the same Python `Name` had two + * distinct CFG nodes — a load node (context 3) earlier in the BB, and + * a store node (context 5) later. The legacy `augstore` related the + * pair via dominance. + * + * In the new (shared) CFG, the canonical node for an AST expression is + * unique, so `load` and `store` collapse onto the same CFG node. The + * predicate is therefore reflexive on the augmented-assignment + * target's canonical node. + */ +private predicate augstore(ControlFlowNode load, ControlFlowNode store) { + exists(Py::AugAssign aa | aa.getTarget() = toAst(load)) and + load = store +} + +/** + * A basic block — a maximal-length sequence of control flow nodes such + * that no node except the first has a predecessor outside the sequence, + * and no node except the last has a successor outside the sequence. + */ +class BasicBlock extends CfgImpl::BasicBlock { + /** Gets the `n`th node in this basic block. */ + ControlFlowNode getNode(int n) { result = super.getNode(n) } + + /** Gets a node in this basic block. */ + ControlFlowNode getANode() { result = super.getNode(_) } + + /** Gets the first node in this basic block. */ + ControlFlowNode firstNode() { result = this.getNode(0) } + + /** Gets the last node in this basic block. */ + ControlFlowNode getLastNode() { result = super.getLastNode() } + + /** Holds if this basic block contains `node`. */ + predicate contains(ControlFlowNode node) { node = this.getANode() } + + // Inherited from the shared library's `BasicBlock`: + // getASuccessor(), getASuccessor(SuccessorType), getAPredecessor(), + // strictlyDominates(), dominates(), getImmediateDominator(), + // length(), inLoop(). + // We shadow `getNode(int)` etc. to return `ControlFlowNode` (this + // facade's type) and add Python-style helpers below. + /** Gets a true successor to this basic block. */ + BasicBlock getATrueSuccessor() { + result = super.getASuccessor(any(BooleanSuccessor t | t.getValue() = true)) + } + + /** Gets a false successor to this basic block. */ + BasicBlock getAFalseSuccessor() { + result = super.getASuccessor(any(BooleanSuccessor t | t.getValue() = false)) + } + + /** Gets an unconditional successor to this basic block. */ + BasicBlock getAnUnconditionalSuccessor() { + result = super.getASuccessor() and + not result = this.getATrueSuccessor() and + not result = this.getAFalseSuccessor() + } + + /** Gets an exceptional successor to this basic block. */ + BasicBlock getAnExceptionalSuccessor() { result = super.getASuccessor(any(ExceptionSuccessor t)) } + + /** + * Holds if this basic block is in the dominance frontier of `df`. + * + * Note: implemented locally rather than via the shared lib, which + * doesn't currently expose a `dominanceFrontier` predicate at this + * level. + */ + predicate inDominanceFrontier(BasicBlock df) { + this = df.getAPredecessor() and not this = df.getImmediateDominator() + or + exists(BasicBlock prev | prev.inDominanceFrontier(df) | + this = prev.getImmediateDominator() and + not this = df.getImmediateDominator() + ) + } + + /** Holds if this basic block strictly reaches `other`. */ + predicate strictlyReaches(BasicBlock other) { super.getASuccessor+() = other } + + /** Holds if this basic block reaches `other` (reflexively). */ + predicate reaches(BasicBlock other) { this = other or this.strictlyReaches(other) } + + /** Holds if flow from this basic block reaches a normal exit from its scope. */ + predicate reachesExit() { + this.getANode() instanceof CfgImpl::ControlFlow::NormalExitNode + or + exists(BasicBlock succ | succ = super.getASuccessor() and succ.reachesExit()) + } + + /** Gets the scope of this basic block. */ + Py::Scope getScope() { exists(ControlFlowNode n | n = this.getANode() | result = n.getScope()) } + + /** Holds if flow from this BasicBlock always reaches `succ`. */ + predicate alwaysReaches(BasicBlock succ) { + succ = this + or + strictcount(BasicBlock s | s = super.getASuccessor()) = 1 and + succ = super.getASuccessor() + or + forex(BasicBlock immsucc | immsucc = super.getASuccessor() | immsucc.alwaysReaches(succ)) + } + + /** + * Holds if this basic block ends in a node that branches on a boolean + * outcome, and `other` is dominated by the corresponding successor + * for `branch` while not being reachable from the other branch + * without going through this BB. + * + * In other words: any execution that reaches `other` must have just + * evaluated the last node of this BB and taken the `branch` outcome. + * This mirrors the legacy `ConditionBlock.controls(BB, branch)`. + */ + predicate controls(BasicBlock other, boolean branch) { + exists(BasicBlock succ | + branch = true and succ = this.getATrueSuccessor() + or + branch = false and succ = this.getAFalseSuccessor() + | + succ.dominates(other) and + // The other branch must not also reach `other` — otherwise + // `other` is not actually controlled by `branch`. + not exists(BasicBlock otherSucc | + branch = true and otherSucc = this.getAFalseSuccessor() + or + branch = false and otherSucc = this.getATrueSuccessor() + | + otherSucc.reaches(other) + ) + ) + } +} + +// =========================================================================== +// Re-exports for SSA / dominance consumers +// +// The shared `BB::CfgSig` requires `EntryBasicBlock` and `dominatingEdge` in +// addition to the BasicBlock class we already expose. They are provided by +// the shared CFG library on the `BB::Make` instantiation produced by +// `AstNodeImpl.qll`. +// =========================================================================== +/** An entry basic block, that is, a basic block whose first node is an entry node. */ +class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock; + +/** + * Holds if `bb1` has `bb2` as a direct successor and the edge between `bb1` + * and `bb2` is a dominating edge. + */ +predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2; + +// =========================================================================== +// AST-shape subclasses of ControlFlowNode +// +// Each class is a thin wrapper around the canonical CFG node for a given +// kind of Python AST node. Methods that take/return CFG nodes look up +// related CFG nodes by AST identity (via `getNode()`), and the dominance +// constraint from the old CFG (`result.getBasicBlock().dominates(this.getBasicBlock())`) +// is preserved. +// =========================================================================== +/** Gets the canonical `ControlFlowNode` for AST expression `e`. */ +ControlFlowNode astExprToCfg(Py::Expr e) { result.getNode() = e } + +/** A control flow node corresponding to a `Name` or `PlaceHolder` expression. */ +class NameNode extends ControlFlowNode { + NameNode() { + toAst(this) instanceof Py::Name + or + toAst(this) instanceof Py::PlaceHolder + } + + /** + * Holds if this flow node defines the variable `v`. + * + * This includes augmented-assignment targets — `n += 1` is both a + * read and a write of `n`, so `defines(n)` and `uses(n)` both hold + * on the same canonical CFG node. Mirrors Java's `VariableUpdate` + * semantics where compound assignments register both a write + * (`VarWrite`) and a read (`VarRead`) on the destination. + */ + predicate defines(Py::Variable v) { exists(Py::Name n | n = toAst(this) and n.defines(v)) } + + /** Holds if this flow node deletes the variable `v`. */ + predicate deletes(Py::Variable v) { exists(Py::Name n | n = toAst(this) and n.deletes(v)) } + + /** Holds if this flow node uses the variable `v`. */ + predicate uses(Py::Variable v) { + this.isLoad() and + exists(Py::Name u | u = toAst(this) and u.uses(v)) + or + exists(Py::PlaceHolder u | + u = toAst(this) and u.getVariable() = v and u.getCtx() instanceof Py::Load + ) + } + + /** Gets the identifier of this name node. */ + string getId() { + result = toAst(this).(Py::Name).getId() + or + result = toAst(this).(Py::PlaceHolder).getId() + } + + /** Holds if this is a use of a local variable. */ + predicate isLocal() { exists(Py::Variable v | this.uses(v) and v instanceof Py::LocalVariable) } + + /** Holds if this is a use of a non-local variable. */ + predicate isNonLocal() { + exists(Py::Variable v | this.uses(v) and v.getScope() != this.getScope()) + } + + /** Holds if this is a use of a global (including builtin) variable. */ + predicate isGlobal() { exists(Py::Variable v | this.uses(v) and v instanceof Py::GlobalVariable) } + + /** + * Holds if this is a use of `self` — the first parameter of an + * enclosing method. + * + * AST-level approximation: matches when the Name uses a `Variable` + * that is the first parameter of an enclosing `Function` defined + * inside a `Class`. + */ + predicate isSelf() { + exists(Py::Variable v, Py::Function f, Py::Class c | + this.uses(v) and + f = c.getAMethod() and + v.getScope() = f and + v = f.getArg(0).(Py::Name).getVariable() + ) + } +} + +/** A control flow node corresponding to a named constant (`None`, `True`, `False`). */ +class NameConstantNode extends NameNode { + NameConstantNode() { toAst(this) instanceof Py::NameConstant } +} + +/** A control flow node corresponding to a call. */ +class CallNode extends ControlFlowNode { + CallNode() { toAst(this) instanceof Py::Call } + + override Py::Call getNode() { result = super.getNode() } + + /** Gets the underlying Python `Call`. */ + Py::Call getCall() { result = toAst(this) } + + /** Gets the flow node for the function component of this call. */ + ControlFlowNode getFunction() { + exists(Py::Call c | + c = toAst(this) and + c.getFunc() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for the `n`th positional argument. */ + ControlFlowNode getArg(int n) { + exists(Py::Call c | + c = toAst(this) and + c.getArg(n) = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for the named argument with name `name`. */ + ControlFlowNode getArgByName(string name) { + exists(Py::Call c, Py::Keyword k | + c = toAst(this) and + k = c.getANamedArg() and + k.getValue() = toAst(result) and + k.getArg() = name and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets a flow node corresponding to any argument. */ + ControlFlowNode getAnArg() { result = this.getArg(_) or result = this.getArgByName(_) } + + /** Gets the first tuple (`*args`) argument, if any. */ + ControlFlowNode getStarArg() { + exists(Py::Call c | + c = toAst(this) and + c.getStarArg() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets a dictionary (`**kwargs`) argument, if any. */ + ControlFlowNode getKwargs() { + exists(Py::Call c | + c = toAst(this) and + c.getKwargs() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + predicate isDecoratorCall() { this.isClassDecoratorCall() or this.isFunctionDecoratorCall() } + + predicate isClassDecoratorCall() { + exists(Py::ClassExpr cls | toAst(this) = cls.getADecoratorCall()) + } + + predicate isFunctionDecoratorCall() { + exists(Py::FunctionExpr func | toAst(this) = func.getADecoratorCall()) + } +} + +/** A control flow node corresponding to an attribute expression. */ +class AttrNode extends ControlFlowNode { + AttrNode() { toAst(this) instanceof Py::Attribute } + + override Py::Attribute getNode() { result = super.getNode() } + + /** Gets the flow node for the object of the attribute expression. */ + ControlFlowNode getObject() { + exists(Py::Attribute a | + a = toAst(this) and + a.getObject() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for the object of this attribute expression, with the matching name. */ + ControlFlowNode getObject(string name) { + exists(Py::Attribute a | + a = toAst(this) and + a.getObject() = toAst(result) and + a.getName() = name and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the attribute name. */ + string getName() { exists(Py::Attribute a | a = toAst(this) and a.getName() = result) } +} + +/** A control flow node corresponding to an import statement (`import x`). */ +class ImportExprNode extends ControlFlowNode { + ImportExprNode() { toAst(this) instanceof Py::ImportExpr } + + override Py::ImportExpr getNode() { result = super.getNode() } +} + +/** A control flow node corresponding to a `from ... import name` expression. */ +class ImportMemberNode extends ControlFlowNode { + ImportMemberNode() { toAst(this) instanceof Py::ImportMember } + + override Py::ImportMember getNode() { result = super.getNode() } + + /** Gets the flow node for the module being imported from, with the matching name. */ + ControlFlowNode getModule(string name) { + exists(Py::ImportMember i | + i = toAst(this) and + i.getModule() = toAst(result) and + i.getName() = name and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a `from ... import *` statement. */ +class ImportStarNode extends ControlFlowNode { + ImportStarNode() { toAst(this) instanceof Py::ImportStar } + + override Py::ImportStar getNode() { result = super.getNode() } + + /** Gets the flow node for the module being imported from. */ + ControlFlowNode getModule() { + exists(Py::ImportStar i | + i = toAst(this) and + i.getModuleExpr() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a subscript expression. */ +class SubscriptNode extends ControlFlowNode { + SubscriptNode() { toAst(this) instanceof Py::Subscript } + + override Py::Subscript getNode() { result = super.getNode() } + + /** Gets the flow node for the value being subscripted. */ + ControlFlowNode getObject() { + exists(Py::Subscript s | + s = toAst(this) and + s.getObject() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for the index expression. */ + ControlFlowNode getIndex() { + exists(Py::Subscript s | + s = toAst(this) and + s.getIndex() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a comparison operation. */ +class CompareNode extends ControlFlowNode { + CompareNode() { toAst(this) instanceof Py::Compare } + + override Py::Compare getNode() { result = super.getNode() } + + /** Holds if `left` and `right` are a pair of operands for this comparison. */ + predicate operands(ControlFlowNode left, Py::Cmpop op, ControlFlowNode right) { + exists(Py::Compare c, Py::Expr eleft, Py::Expr eright | + c = toAst(this) and eleft = toAst(left) and eright = toAst(right) + | + eleft = c.getLeft() and eright = c.getComparator(0) and op = c.getOp(0) + or + exists(int i | + eleft = c.getComparator(i - 1) and eright = c.getComparator(i) and op = c.getOp(i) + ) + ) and + left.getBasicBlock().dominates(this.getBasicBlock()) and + right.getBasicBlock().dominates(this.getBasicBlock()) + } +} + +/** A control flow node corresponding to a conditional expression (`x if c else y`). */ +class IfExprNode extends ControlFlowNode { + IfExprNode() { toAst(this) instanceof Py::IfExp } + + override Py::IfExp getNode() { result = super.getNode() } + + /** Gets the flow node for one of the value operands (true-branch or false-branch). */ + ControlFlowNode getAnOperand() { + exists(Py::IfExp ie | + ie = toAst(this) and + (toAst(result) = ie.getBody() or toAst(result) = ie.getOrelse()) + ) + } +} + +/** A control flow node corresponding to an assignment expression (walrus `:=`). */ +class AssignmentExprNode extends ControlFlowNode { + AssignmentExprNode() { toAst(this) instanceof Py::AssignExpr } + + override Py::AssignExpr getNode() { result = super.getNode() } + + /** Gets the flow node for the left-hand side. */ + ControlFlowNode getTarget() { + exists(Py::AssignExpr a | + a = toAst(this) and + a.getTarget() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for the right-hand side. */ + ControlFlowNode getValue() { + exists(Py::AssignExpr a | + a = toAst(this) and + a.getValue() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a binary expression (`a + b` etc.). */ +class BinaryExprNode extends ControlFlowNode { + BinaryExprNode() { toAst(this) instanceof Py::BinaryExpr } + + override Py::BinaryExpr getNode() { result = super.getNode() } + + ControlFlowNode getLeft() { + exists(Py::BinaryExpr be | + be = toAst(this) and + be.getLeft() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + ControlFlowNode getRight() { + exists(Py::BinaryExpr be | + be = toAst(this) and + be.getRight() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + Py::Operator getOp() { result = toAst(this).(Py::BinaryExpr).getOp() } + + /** Holds if `left` and `right` are the operands and `op` is the operator. */ + predicate operands(ControlFlowNode left, Py::Operator op, ControlFlowNode right) { + left = this.getLeft() and right = this.getRight() and op = this.getOp() + } + + /** Gets either operand. */ + ControlFlowNode getAnOperand() { result = this.getLeft() or result = this.getRight() } +} + +/** A control flow node corresponding to a boolean expression (`a and b`, `a or b`). */ +class BoolExprNode extends ControlFlowNode { + BoolExprNode() { toAst(this) instanceof Py::BoolExpr } + + override Py::BoolExpr getNode() { result = super.getNode() } + + Py::Boolop getOp() { result = toAst(this).(Py::BoolExpr).getOp() } + + /** Gets any operand of this boolean expression. */ + ControlFlowNode getAnOperand() { + exists(Py::BoolExpr be | + be = toAst(this) and + be.getAValue() = toAst(result) + ) + } +} + +/** A control flow node corresponding to a unary expression (`-x`, `not x`, etc.). */ +class UnaryExprNode extends ControlFlowNode { + UnaryExprNode() { toAst(this) instanceof Py::UnaryExpr } + + override Py::UnaryExpr getNode() { result = super.getNode() } + + ControlFlowNode getOperand() { + exists(Py::UnaryExpr u | + u = toAst(this) and + u.getOperand() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + Py::Unaryop getOp() { result = toAst(this).(Py::UnaryExpr).getOp() } +} + +/** + * A control flow node that is a definition: it appears in a context that + * binds a variable (assignment target, parameter, etc.). + */ +class DefinitionNode extends ControlFlowNode { + DefinitionNode() { this.isStore() or this.isParameter() } + + /** Gets the value assigned, if any. */ + ControlFlowNode getValue() { + // For-target: the value is the for-loop's iter expression (which + // is also where `Cfg::ForNode` lives — its `getNode()` returns the + // enclosing `Py::For` statement). Treated specially because there + // is no AST node holding the result of `iter(next(seq))`; we use + // the iter expression's CFG node as the stand-in. + exists(Py::For f | + f.getTarget() = toAst(this) and + toAst(result) = f.getIter() + ) + or + exists(Py::AstNode value | value = assignedValue(toAst(this)) | + toAst(result) = value and + ( + result.getBasicBlock().dominates(this.getBasicBlock()) + or + result.isImport() + or + // The default value for a parameter is evaluated in the same basic block as + // the function definition, but the parameter belongs to the basic block of the + // function, so there is no dominance relationship between the two. + exists(Py::Parameter param | toAst(this) = param.asName()) + ) + ) + } +} + +/** + * Gets the AST node that holds the value assigned to `lhs` in a binding + * context. Mirrors `Flow.qll::assigned_value`. + */ +private Py::AstNode assignedValue(Py::Expr lhs) { + // lhs = result + exists(Py::Assign a | a.getATarget() = lhs and result = a.getValue()) + or + // lhs := result + exists(Py::AssignExpr a | a.getTarget() = lhs and result = a.getValue()) + or + // lhs: annotation = result + exists(Py::AnnAssign a | a.getTarget() = lhs and result = a.getValue()) + or + // import result as lhs (also covers plain `import lhs`, where alias.getAsname() = lhs) + exists(Py::Alias a | a.getAsname() = lhs and result = a.getValue()) + or + // lhs += x -> result is the (lhs + x) binary expression + exists(Py::AugAssign a, Py::BinaryExpr b | + b = a.getOperation() and result = b and lhs = b.getLeft() + ) + or + // Nested sequence assign: ..., lhs, ... = ..., result, ... + exists(Py::Assign a | nestedSequenceAssign(a.getATarget(), a.getValue(), lhs, result)) + or + // Parameter default + exists(Py::Parameter param | lhs = param.asName() and result = param.getDefault()) +} + +/** + * Helper for nested sequence assignments such as `(a, b), c = (1, 2), 3`. + */ +private predicate nestedSequenceAssign( + Py::Expr leftParent, Py::Expr rightParent, Py::Expr left, Py::Expr right +) { + exists(int i | + leftParent.(Py::Tuple).getElt(i) = left and rightParent.(Py::Tuple).getElt(i) = right + or + leftParent.(Py::List).getElt(i) = left and rightParent.(Py::List).getElt(i) = right + ) + or + exists(Py::Expr leftMid, Py::Expr rightMid | + nestedSequenceAssign(leftParent, rightParent, leftMid, rightMid) and + nestedSequenceAssign(leftMid, rightMid, left, right) + ) +} + +/** A control flow node corresponding to a deletion (`del x`). */ +class DeletionNode extends ControlFlowNode { + DeletionNode() { this.isDelete() } +} + +/** A control flow node corresponding to a `for` loop target. */ +class ForNode extends ControlFlowNode { + ForNode() { exists(Py::For f | toAst(this) = f.getIter()) } + + /** Gets the iterable expression. */ + ControlFlowNode getIter() { + result = this and result = result // canonical "after" of the iterable + } + + /** Gets the sequence expression (alias for `getIter()`, matches legacy Flow naming). */ + ControlFlowNode getSequence() { result = this.getIter() } + + /** Gets the target (loop variable) of the `for` loop. */ + ControlFlowNode getTarget() { + exists(Py::For f | + f.getIter() = toAst(this) and + f.getTarget() = toAst(result) + ) + } + + /** Holds if `target` is the loop variable and `sequence` is the iterable. */ + predicate iterates(ControlFlowNode target, ControlFlowNode sequence) { + target = this.getTarget() and sequence = this.getSequence() + } +} + +/** A control flow node corresponding to a `raise` statement. */ +class RaiseStmtNode extends ControlFlowNode { + RaiseStmtNode() { toAst(this) instanceof Py::Raise } + + override Py::Raise getNode() { result = super.getNode() } + + /** Gets the exception expression, if any. */ + ControlFlowNode getException() { + exists(Py::Raise r | + r = toAst(this) and + r.getException() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a starred expression (`*x`). */ +class StarredNode extends ControlFlowNode { + StarredNode() { toAst(this) instanceof Py::Starred } + + /** Gets the value being starred. */ + ControlFlowNode getValue() { + exists(Py::Starred s | + s = toAst(this) and + s.getValue() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to an `except` clause's name binding. */ +class ExceptFlowNode extends ControlFlowNode { + ExceptFlowNode() { exists(Py::ExceptStmt e | toAst(this) = e.getName()) } + + /** Gets the CFG node for the bound `as`-name itself. */ + ControlFlowNode getName() { result = this } + + /** Gets the type expression of this exception handler. */ + ControlFlowNode getType() { + exists(Py::ExceptStmt e | + e.getName() = toAst(this) and + e.getType() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to an `except*` clause's name binding. */ +class ExceptGroupFlowNode extends ControlFlowNode { + ExceptGroupFlowNode() { exists(Py::ExceptGroupStmt e | toAst(this) = e.getName()) } + + /** Gets the CFG node for the bound `as`-name itself. */ + ControlFlowNode getName() { result = this } +} + +/** Abstract base class for sequence nodes (tuple, list). */ +abstract class SequenceNode extends ControlFlowNode { + /** Gets the `n`th element of this sequence. */ + abstract ControlFlowNode getElement(int n); + + /** Gets any element of this sequence. */ + ControlFlowNode getAnElement() { result = this.getElement(_) } +} + +/** A control flow node corresponding to a tuple literal. */ +class TupleNode extends SequenceNode { + TupleNode() { toAst(this) instanceof Py::Tuple } + + override ControlFlowNode getElement(int n) { + exists(Py::Tuple t | + t = toAst(this) and + t.getElt(n) = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a list literal. */ +class ListNode extends SequenceNode { + ListNode() { toAst(this) instanceof Py::List } + + override ControlFlowNode getElement(int n) { + exists(Py::List l | + l = toAst(this) and + l.getElt(n) = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a set literal. */ +class SetNode extends ControlFlowNode { + SetNode() { toAst(this) instanceof Py::Set } + + /** Gets the flow node for an element of the set. */ + ControlFlowNode getAnElement() { + exists(Py::Set s | + s = toAst(this) and + s.getAnElt() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to a dict literal. */ +class DictNode extends ControlFlowNode { + DictNode() { toAst(this) instanceof Py::Dict } + + /** Gets the flow node for a key of the dict. */ + ControlFlowNode getAKey() { + exists(Py::Dict d | + d = toAst(this) and + d.getAKey() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } + + /** Gets the flow node for a value of the dict. */ + ControlFlowNode getAValue() { + exists(Py::Dict d | + d = toAst(this) and + d.getAValue() = toAst(result) and + result.getBasicBlock().dominates(this.getBasicBlock()) + ) + } +} + +/** A control flow node corresponding to an iterable in a `for` loop. */ +class IterableNode extends ControlFlowNode { + IterableNode() { + this instanceof SequenceNode + or + this instanceof SetNode + } + + /** Gets the control flow node for an element of this iterable. */ + ControlFlowNode getAnElement() { + result = this.(SequenceNode).getAnElement() + or + result = this.(SetNode).getAnElement() + } +} diff --git a/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.expected b/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql b/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql new file mode 100644 index 000000000000..4ab2ef5be8fb --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/store-load/StoreLoadTest.ql @@ -0,0 +1,41 @@ +/** + * Inline-expectations test for the store/load/delete/parameter + * classification predicates on the new-CFG facade. + * + * Each tag fires when the corresponding predicate (`isLoad`, + * `isStore`, `isDelete`, `isParameter`, `isAugLoad`, `isAugStore`) + * holds on the canonical CFG node wrapping a `Py::Name` with the + * given identifier. Subscript and attribute stores are not covered + * by these tags — only the `Name`-typed targets/loads they involve. + */ + +import python +import semmle.python.controlflow.internal.Cfg as Cfg +import utils.test.InlineExpectationsTest + +module StoreLoadTest implements TestSig { + string getARelevantTag() { result = ["load", "store", "delete", "param", "augload", "augstore"] } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(Cfg::NameNode n | + location = n.getLocation() and + element = n.toString() and + value = n.getId() and + ( + n.isLoad() and not n.isAugLoad() and tag = "load" + or + n.isStore() and not n.isAugStore() and tag = "store" + or + n.isDelete() and tag = "delete" + or + n.isParameter() and tag = "param" + or + n.isAugLoad() and tag = "augload" + or + n.isAugStore() and tag = "augstore" + ) + ) + } +} + +import MakeTest diff --git a/python/ql/test/library-tests/ControlFlow/store-load/test.py b/python/ql/test/library-tests/ControlFlow/store-load/test.py new file mode 100644 index 000000000000..dfca45a0b47b --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/store-load/test.py @@ -0,0 +1,56 @@ +# Store/load/delete/parameter classification on the new-CFG facade. +# +# Each annotated location carries the (sorted, deduplicated) set of +# kinds the CFG facade reports there. Comparing against the legacy +# 'semmle.python.Flow' classification is done by the comparison query +# 'StoreLoadParity.ql' — annotations here are only the positive +# assertions for the new facade. +# +# Tags: +# load= -- isLoad() fires on the Name +# store= -- isStore() fires +# delete= -- isDelete() fires +# param= -- isParameter() fires +# augload= -- isAugLoad() fires (the LHS of x += ... when read) +# augstore= -- isAugStore() fires (the LHS of x += ... when written) + + +# --- plain load / store / delete --- + +x = 1 # $ store=x +y = x + 1 # $ store=y load=x +print(y) # $ load=print load=y +del x # $ delete=x + + +# --- function definitions (parameters) --- + +def f(a, b=2, *args, c, **kwargs): # $ store=f param=a param=b param=args param=c param=kwargs + return a + b + c # $ load=a load=b load=c + + +# --- augmented assignment splits one Name into load + store halves --- + +def aug(): # $ store=aug + n = 0 # $ store=n + n += 1 # $ augload=n augstore=n + return n # $ load=n + + +# --- subscript / attribute stores --- + +class C: # $ store=C + pass + + +def stores(obj, container, idx): # $ store=stores param=obj param=container param=idx + obj.attr = 1 # $ load=obj + container[idx] = 2 # $ load=container load=idx + return obj # $ load=obj + + +# --- tuple unpacking --- + +def unpack(pair): # $ store=unpack param=pair + a, b = pair # $ store=a store=b load=pair + return a + b # $ load=a load=b From c95c3fe638019f809f71d0a0d41183621adbd25d Mon Sep 17 00:00:00 2001 From: yoff Date: Wed, 3 Jun 2026 09:46:03 +0000 Subject: [PATCH 3/3] Python: model exception edges for raise-prone expressions inside try/with MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new CFG previously only emitted exception edges for explicit `raise` and `assert` statements. As a result, code that became reachable only via the exception path of an arbitrary expression (e.g., the body of an `except` handler following a try-body whose `call()` could raise) was classified as dead, breaking analyses like StackTraceExposure, FileNotAlwaysClosed, ExceptionInfo, UseOfExit, and CatchingBaseException. This commit adds a `mayThrow` predicate over expressions that are known sources of implicit exceptions in Python (calls, attribute access, subscripts, arithmetic/comparison operators, imports, await/yield/yield from) plus `from m import *` at the statement level, and routes them through the shared CFG's `beginAbruptCompletion(_, _, ExceptionSuccessor, always=false)` hook. The set of exception sources is restricted to nodes that are syntactically inside a `try`/`with` statement in the same scope. This mirrors Java's `ControlFlowGraph::mayThrow`, which only emits exception edges where local handling can observe them — outside such contexts, the edges add CFG complexity (weakening BarrierGuard precision and breaking SSA continuity around augmented assignments and subscript stores) without analysis benefit, since exceptions just propagate to the function exit anyway. Net effect on the test suite: ~100 alerts restored across the exception- related query tests (StackTraceExposure +29, ExceptionInfo +17, FileNotAlwaysClosed +52, UseOfExit +1, CatchingBaseException restored) with no precision regressions. Affected `.expected` files and the regression-guard `dead_under_no_raise.py` are updated accordingly. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../controlflow/internal/AstNodeImpl.qll | 88 +++++++++++++++++++ .../bindings/dead_under_no_raise.py | 37 ++++---- 2 files changed, 107 insertions(+), 18 deletions(-) diff --git a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll index 5dba3d96ea8e..fc289744b7f0 100644 --- a/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll +++ b/python/ql/lib/semmle/python/controlflow/internal/AstNodeImpl.qll @@ -1553,6 +1553,89 @@ private module Input implements InputSig1, InputSig2 { private string assertThrowTag() { result = "[assert-throw]" } + /** + * Holds if the AST node `n` may raise an exception at runtime as part of + * its normal evaluation (not via an explicit `raise`/`assert`, which are + * modelled separately). + * + * The set mirrors what the legacy CFG used to flag implicitly: function + * calls (anything can raise), attribute access (`AttributeError`), + * subscript access (`IndexError`/`KeyError`/`TypeError`), arithmetic and + * comparison operators (`TypeError`/`ZeroDivisionError`), imports + * (`ImportError`/`ModuleNotFoundError`), and generator/coroutine + * suspension points (`await`/`yield`/`yield from`). + * + * Bare `Name` reads are intentionally excluded — modelling every name + * read as `mayThrow` would explode CFG edge count for negligible + * analysis value. `BoolExpr`/`IfExp` containers are also excluded; the + * operands they evaluate contribute their own exception edges. + */ + private predicate exprMayThrow(Py::Expr e) { + e instanceof Py::Call + or + e instanceof Py::Attribute + or + e instanceof Py::Subscript + or + e instanceof Py::BinaryExpr + or + e instanceof Py::UnaryExpr + or + e instanceof Py::Compare + or + e instanceof Py::ImportExpr + or + e instanceof Py::ImportMember + or + e instanceof Py::Await + or + e instanceof Py::Yield + or + e instanceof Py::YieldFrom + } + + /** + * Holds if the statement `s` may raise an exception at runtime as part + * of its normal evaluation. Currently restricted to `from m import *` + * (which performs the import as a statement-level side effect). + */ + private predicate stmtMayThrow(Py::Stmt s) { s instanceof Py::ImportStar } + + /** + * Holds if `n` is syntactically inside the body, handlers, `else`, or + * `finally` of a `try` statement (or the body of a `with` statement, + * which compiles to an implicit try/finally for `__exit__`) in the + * same scope. + * + * This mirrors Java's `ControlFlowGraph::mayThrow`, which only emits + * exception edges when there is local exception handling that would + * observe them. Outside such contexts, exception edges would add CFG + * complexity (weakening BarrierGuard precision and breaking SSA + * continuity around augmented assignments and subscript stores) + * without any analysis benefit, since exceptions just propagate to + * the function exit anyway. + */ + private predicate inExceptionContext(Py::AstNode py) { + exists(Py::Try t | t.containsInScope(py)) + or + exists(Py::With w | w.containsInScope(py)) + } + + /** + * Holds if `n` may raise an exception during normal evaluation. See + * `exprMayThrow` and `stmtMayThrow` for the included AST classes. + * + * Restricted to nodes inside a `try`/`with` statement: matches Java's + * approach of only modelling exception flow where it can be observed + * by local handling. + */ + private predicate mayThrow(Ast::AstNode n) { + exists(Py::AstNode py | py = n.asExpr() or py = n.asStmt() | + (exprMayThrow(py) or stmtMayThrow(py)) and + inExceptionContext(py) + ) + } + predicate additionalNode(Ast::AstNode n, string tag, NormalSuccessor t) { n instanceof Ast::AssertStmt and tag = assertThrowTag() and t instanceof DirectSuccessor } @@ -1564,6 +1647,11 @@ private module Input implements InputSig1, InputSig2 { n.isAdditional(ast, assertThrowTag()) and c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and always = true + or + mayThrow(ast) and + n.isIn(ast) and + c.asSimpleAbruptCompletion() instanceof ExceptionSuccessor and + always = false } predicate endAbruptCompletion(Ast::AstNode ast, PreControlFlowNode n, AbruptCompletion c) { diff --git a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py index dbfb857b5360..9058f2b71165 100644 --- a/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py +++ b/python/ql/test/library-tests/ControlFlow/bindings/dead_under_no_raise.py @@ -1,15 +1,15 @@ -# Dead bindings under the "no expressions raise" CFG abstraction. +# Reachability of code following a try whose body always returns. # -# The new CFG does not currently model raise edges from arbitrary -# expressions. As a consequence, code that is only reachable through -# exception flow is (correctly) classified as dead and has no CFG node. -# Variable bindings in dead code do not need CFG nodes - SSA / dataflow -# over dead code is moot. +# The new CFG models exception edges for raise-prone expressions when +# they appear inside a `try` (or `with`) statement, mirroring Java's +# `mayThrow`. This means the body of a `try` has both a normal +# completion edge and an exception edge to its handlers, so code +# following the try-statement is reachable via the except-handler path +# even when the try-body would otherwise always return. # -# These tests act as a regression guard: the bindings below intentionally -# have no `cfgdefines=` annotations. If raise modelling is later added, -# the BindingsTest infrastructure will surface the new CFG nodes as -# unexpected results, and this file will need to be revisited. +# Code that is not reachable under either normal or exception flow +# (for example, the `else` clause of a try whose body unconditionally +# raises) remains correctly classified as dead. def f(obj): # $ cfgdefines=f cfgdefines=obj @@ -18,12 +18,12 @@ def f(obj): # $ cfgdefines=f cfgdefines=obj except TypeError: pass - # The first try's body always returns; its except handler does not - # raise or otherwise transfer control, so under "no expressions - # raise" the only paths out of the try-statement are dead. Everything - # below is unreachable. + # The try-body always returns, but `len(obj)` can raise (it is + # inside the try, so we model its exception edge). The + # `except TypeError: pass` handler falls through to here, making + # the code below reachable. try: - hint = type(obj).__length_hint__ + hint = type(obj).__length_hint__ # $ cfgdefines=hint except AttributeError: return None return hint @@ -35,7 +35,8 @@ def g(): # $ cfgdefines=g except: raise Exception("outer") else: - # Unreachable: the inner try body always raises, so the `else:` + # Unreachable: the inner try body always raises (via an explicit + # `raise`, which is modelled unconditionally), so the `else:` # clause never runs. hit_inner_else = True @@ -46,7 +47,7 @@ def h(cache, key): # $ cfgdefines=h cfgdefines=cache cfgdefines=key except KeyError: pass - # Same pattern as `f`: dead under "no expressions raise". - value = compute(key) + # Same pattern as `f`: reachable via the except-handler fall-through. + value = compute(key) # $ cfgdefines=value cache[key] = value return value