From 0961ef6495cd41943b51dd23864e723d1e45b6fa Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 17:40:50 +0800 Subject: [PATCH 01/21] fix(decorator): hoist consts referenced by emitted Ivy definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #287. When `@Component` metadata such as `providers` references a top-level `const` declared *after* the class, the emitted `static ɵcmp` field evaluates `ɵɵProvidersFeature([…])` eagerly at class-init time. With the binding still in TDZ, this threw `ReferenceError` at module load. Match Angular's official compiler by hoisting referenced VariableDeclaration statements above the earliest class that needs them. Identifier collection walks decorator metadata but stops at function/arrow/class bodies, so lazy references like `useFactory: () => DEP` don't trigger unnecessary moves. Function declarations are JS-hoisted already, and class declarations are intentionally skipped to avoid clobbering the transform pipeline's existing edits. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 481 ++++++++++++++++++ .../oxc_angular_compiler/src/component/mod.rs | 1 + .../src/component/transform.rs | 8 + .../tests/integration_test.rs | 243 +++++++++ 4 files changed, 733 insertions(+) create mode 100644 crates/oxc_angular_compiler/src/component/hoist.rs diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs new file mode 100644 index 000000000..361cdaba7 --- /dev/null +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -0,0 +1,481 @@ +//! TDZ-safe hoisting of file-scope `const`/`let`/`var` bindings referenced by +//! Angular decorator metadata. +//! +//! When `@Component`, `@Directive`, `@NgModule`, or other Angular decorators +//! reference a top-level binding declared *after* the decorated class, the +//! emitted Ivy definition (e.g. `static ɵcmp = … ɵɵProvidersFeature([{ +//! provide: TOKEN, … }])`) evaluates that reference eagerly at +//! class-definition time. Because the declaration is still in the temporal +//! dead zone, this throws `ReferenceError` at module load (issue #287). +//! +//! Angular's official compiler hoists such referenced declarations above the +//! decorated class. This module mirrors that behavior. +//! +//! The implementation is intentionally conservative: +//! * Only top-level `VariableDeclaration` statements are eligible for +//! hoisting. Function declarations are already JS-hoisted with their +//! bodies; class declarations are skipped because hoisting them would +//! clobber other edits the transform pipeline applies to the same span. +//! * Bindings declared *before* the decorated class are never touched. +//! * Identifier collection walks decorator metadata eagerly but stops at +//! function/arrow bodies and class expression bodies — references that +//! only fire when a factory or method runs (e.g. `useFactory: () => DEP`) +//! don't trigger a hoist. + +use std::collections::{HashMap, HashSet}; + +use oxc_ast::ast::{ + Argument, ArrayExpressionElement, BindingPattern, Class, Declaration, Decorator, + ExportDefaultDeclarationKind, Expression, ObjectPropertyKind, Program, Statement, +}; +use oxc_span::GetSpan; + +use crate::optimizer::Edit; + +/// One referenced-by-decorator top-level binding scheduled for hoisting. +#[derive(Clone, Copy)] +struct HoistEntry { + /// Span of the statement to relocate. + stmt_start: u32, + stmt_end: u32, + /// End of the deletion (extends `stmt_end` past trailing newline so the + /// hoist doesn't leave a stray blank line behind). + delete_end: u32, + /// Insertion target — the earliest referencing class's effective start. + insert_at: u32, +} + +/// Build edits that hoist top-level bindings referenced by decorator metadata +/// of any class but declared *after* that class. +/// +/// Returns a list of edits the caller appends to the wider edit set. Each +/// hoisted statement becomes a delete-at-original + insert-before-class pair. +/// Insert edits run at `HOIST_INSERT_PRIORITY` so they sort *after* the +/// existing `decls_before_class` insertion at the same offset; since +/// `apply_edits` applies higher-priority edits later — and each later +/// insertion at the same offset pushes earlier text further right — the +/// hoisted statements end up immediately above the class, with any +/// constant-pool declarations from the compiler in between. +pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec { + let bindings = collect_top_level_bindings(program, source); + if bindings.is_empty() { + return Vec::new(); + } + + // For each top-level decorated class, find the identifiers eagerly + // referenced in its decorator metadata. Record the earliest such class + // position per referenced binding so multiple references hoist exactly + // once, ahead of the first user. + let mut plan: HashMap<&'a str, HoistEntry> = HashMap::new(); + + for stmt in &program.body { + let Some((class, stmt_start)) = class_of(stmt) else { continue }; + + // Skip classes that don't carry any Angular decorator we care about. + // Walking every class would be safe but wastes work on unrelated code. + if !has_angular_decorator(class) { + continue; + } + + let mut referenced: HashSet<&'a str> = HashSet::new(); + for decorator in &class.decorators { + collect_decorator_idents(decorator, &mut referenced); + } + + if referenced.is_empty() { + continue; + } + + let class_body_end = class.body.span.end; + let effective_start = effective_class_start(class, stmt_start); + + for name in referenced { + let Some(info) = bindings.get(name) else { continue }; + // Only hoist declarations that start AFTER the class body ends. + // Anything before is already TDZ-safe. + if info.stmt_start <= class_body_end { + continue; + } + + plan.entry(name) + .and_modify(|existing| { + if effective_start < existing.insert_at { + existing.insert_at = effective_start; + } + }) + .or_insert(HoistEntry { + stmt_start: info.stmt_start, + stmt_end: info.stmt_end, + delete_end: info.delete_end, + insert_at: effective_start, + }); + } + } + + if plan.is_empty() { + return Vec::new(); + } + + // Sort entries by source position so multiple hoists preserve their + // original relative order in the output. + let mut entries: Vec = plan.into_values().collect(); + entries.sort_by_key(|e| e.stmt_start); + + // We want hoisted text to appear *above* `decls_before_class` (which + // contains constant-pool decls that may reference the hoisted identifiers). + // Existing `decls_before_class` runs at priority 0. apply_edits applies + // lower priority *first* at the same offset, and each later application + // pushes earlier text further right in the output — so a *higher* + // priority lands the hoisted text earlier in the result. Pick 5. + const HOIST_INSERT_PRIORITY: i32 = 5; + + // Group hoisted statements by their target insertion point so that + // multiple consts headed to the same class are emitted as a single + // insert edit, with their text concatenated in source order. Emitting + // them as separate edits would reverse their order, since each insert + // at the same offset prepends to the prior insert's text. + let mut emitted_stmts: HashSet = HashSet::new(); + let mut per_target: HashMap = HashMap::new(); + let mut edits = Vec::new(); + + for entry in &entries { + if !emitted_stmts.insert(entry.stmt_start) { + continue; + } + + let text = &source[entry.stmt_start as usize..entry.stmt_end as usize]; + let bucket = per_target.entry(entry.insert_at).or_default(); + bucket.push_str(text); + bucket.push('\n'); + + edits.push(Edit::delete(entry.stmt_start, entry.delete_end)); + } + + for (insert_at, text) in per_target { + edits.push(Edit::insert(insert_at, text).with_priority(HOIST_INSERT_PRIORITY)); + } + + edits +} + +/// Compute the effective start of a class statement, ignoring trailing +/// whitespace but spanning any leading decorators that will remain in the +/// source. We don't have access to the in-progress `decorator_spans_to_remove` +/// list here, so we conservatively use the earliest decorator span — the +/// hoisted text will land before *all* decorators, which is correct regardless +/// of which decorators end up being stripped. +fn effective_class_start(class: &Class<'_>, stmt_start: u32) -> u32 { + class.decorators.iter().map(|d| d.span.start).min().map_or(stmt_start, |d| d.min(stmt_start)) +} + +/// Locate the inner class declaration of a top-level statement, returning the +/// effective statement start (including any `export` keyword). +fn class_of<'a, 'src>(stmt: &'src Statement<'a>) -> Option<(&'src Class<'a>, u32)> { + match stmt { + Statement::ClassDeclaration(class) => Some((class.as_ref(), class.span.start)), + Statement::ExportDefaultDeclaration(export) => match &export.declaration { + ExportDefaultDeclarationKind::ClassDeclaration(class) => { + Some((class.as_ref(), export.span.start)) + } + _ => None, + }, + Statement::ExportNamedDeclaration(export) => match &export.declaration { + Some(Declaration::ClassDeclaration(class)) => Some((class.as_ref(), export.span.start)), + _ => None, + }, + _ => None, + } +} + +/// Does this class carry any decorator that Angular's compiler emits eager +/// definitions for? We don't try to be precise here — any of the well-known +/// Angular decorators makes the class a candidate. +fn has_angular_decorator(class: &Class<'_>) -> bool { + class.decorators.iter().any(|d| { + let callee = match &d.expression { + Expression::CallExpression(call) => &call.callee, + expr => expr, + }; + let name = match callee { + Expression::Identifier(id) => id.name.as_str(), + Expression::StaticMemberExpression(member) => member.property.name.as_str(), + _ => return false, + }; + matches!(name, "Component" | "Directive" | "Pipe" | "NgModule" | "Injectable") + }) +} + +/// Information about a top-level binding declaration's location. +#[derive(Clone, Copy)] +struct BindingInfo { + stmt_start: u32, + stmt_end: u32, + delete_end: u32, +} + +/// Walk top-level statements and index every variable binding identifier +/// they declare. Multiple identifiers from a combined declaration +/// (`const A = 1, B = 2;`) share the same statement span — hoisting one +/// hoists the whole statement, which is harmless because the other bindings +/// come along for the ride. +/// +/// Only `VariableDeclaration` (const/let/var) and the `export` form of it are +/// considered: +/// +/// * `function` declarations are fully hoisted by the JavaScript runtime +/// already (their bodies are available before their textual position), so +/// they never trigger TDZ. +/// * Class declarations are intentionally skipped here because hoisting them +/// would race the rest of the transform pipeline, which inserts static +/// fields and surrounding declarations at the class's original position. +/// Deleting the class's source range would clobber those inserts. +/// Forward-referenced classes are rare in real Angular code and out of +/// scope for this fix. +fn collect_top_level_bindings<'a>( + program: &Program<'a>, + source: &str, +) -> HashMap<&'a str, BindingInfo> { + let bytes = source.as_bytes(); + let mut out: HashMap<&'a str, BindingInfo> = HashMap::new(); + + for stmt in &program.body { + let stmt_span = stmt.span(); + let info = BindingInfo { + stmt_start: stmt_span.start, + stmt_end: stmt_span.end, + delete_end: end_with_trailing_newline(stmt_span.end, bytes), + }; + + let decl = match stmt { + Statement::VariableDeclaration(decl) => Some(decl.as_ref()), + Statement::ExportNamedDeclaration(export) => match &export.declaration { + Some(Declaration::VariableDeclaration(decl)) => Some(decl.as_ref()), + _ => None, + }, + _ => None, + }; + + let Some(decl) = decl else { continue }; + for declarator in &decl.declarations { + add_binding_names(&declarator.id, info, &mut out); + } + } + + out +} + +/// Extract identifier names from a binding pattern. We only handle plain +/// identifier patterns — anything destructured (`const { a } = x;`) is left +/// alone because hoisting destructuring would change observable behavior if +/// the right-hand side has side effects. +fn add_binding_names<'a>( + pat: &BindingPattern<'a>, + info: BindingInfo, + out: &mut HashMap<&'a str, BindingInfo>, +) { + if let BindingPattern::BindingIdentifier(id) = pat { + out.insert(id.name.as_str(), info); + } +} + +/// Advance `end` past one trailing line terminator so that deleting the +/// statement also removes its terminating newline, leaving a clean gap. +fn end_with_trailing_newline(end: u32, bytes: &[u8]) -> u32 { + let mut pos = end as usize; + while pos < bytes.len() { + match bytes[pos] { + b' ' | b'\t' | b'\r' => pos += 1, + b'\n' => { + pos += 1; + break; + } + _ => break, + } + } + pos as u32 +} + +/// Collect identifiers referenced inside the decorator argument expressions. +/// Only the decorator's call arguments (i.e. the metadata object) are walked. +fn collect_decorator_idents<'a>(decorator: &Decorator<'a>, out: &mut HashSet<&'a str>) { + let Expression::CallExpression(call) = &decorator.expression else { + return; + }; + for arg in &call.arguments { + match arg { + Argument::SpreadElement(spread) => { + collect_expr_idents(&spread.argument, out); + } + other => { + if let Some(expr) = argument_to_expression(other) { + collect_expr_idents(expr, out); + } + } + } + } +} + +fn argument_to_expression<'a, 'src>(arg: &'src Argument<'a>) -> Option<&'src Expression<'a>> { + if arg.is_expression() { Some(arg.to_expression()) } else { None } +} + +/// Walk an expression collecting every bare identifier reference. Walks +/// through arrays, object literals, spreads, conditionals, calls, etc. Skips: +/// +/// * The body of any function/arrow expression — references inside a factory +/// like `useFactory: () => new Service(DEP)` only fire when the factory is +/// invoked at injection time, never at class-definition time. +/// * The body of class expressions for the same lazy-evaluation reason. +/// * Property names that aren't computed — `{ provide: x }` references `x` +/// (the value) but not `provide` (the property name). +/// * Member expression property names — `Foo.BAR` references `Foo`; `BAR` is +/// a property access, not a bare identifier. +/// * TypeScript type annotations and assertions. +fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { + use Expression as E; + match expr { + E::Identifier(id) => { + out.insert(id.name.as_str()); + } + E::ArrayExpression(arr) => { + for el in &arr.elements { + collect_array_element_idents(el, out); + } + } + E::ObjectExpression(obj) => { + for prop in &obj.properties { + match prop { + ObjectPropertyKind::ObjectProperty(p) => { + // Computed keys (e.g. `{ [TOKEN]: 1 }`) reference the + // key identifier; static keys don't. + if p.computed { + if let Some(key_expr) = p.key.as_expression() { + collect_expr_idents(key_expr, out); + } + } + collect_expr_idents(&p.value, out); + } + ObjectPropertyKind::SpreadProperty(spread) => { + collect_expr_idents(&spread.argument, out); + } + } + } + } + E::CallExpression(call) => { + collect_callee_idents(&call.callee, out); + for arg in &call.arguments { + match arg { + Argument::SpreadElement(s) => collect_expr_idents(&s.argument, out), + other => { + if let Some(e) = argument_to_expression(other) { + collect_expr_idents(e, out); + } + } + } + } + // Type arguments may carry identifier references but typed code + // is erased; they're irrelevant at runtime. + } + E::NewExpression(new) => { + collect_expr_idents(&new.callee, out); + for arg in &new.arguments { + match arg { + Argument::SpreadElement(s) => collect_expr_idents(&s.argument, out), + other => { + if let Some(e) = argument_to_expression(other) { + collect_expr_idents(e, out); + } + } + } + } + } + E::ConditionalExpression(cond) => { + collect_expr_idents(&cond.test, out); + collect_expr_idents(&cond.consequent, out); + collect_expr_idents(&cond.alternate, out); + } + E::LogicalExpression(log) => { + collect_expr_idents(&log.left, out); + collect_expr_idents(&log.right, out); + } + E::BinaryExpression(bin) => { + collect_expr_idents(&bin.left, out); + collect_expr_idents(&bin.right, out); + } + E::UnaryExpression(un) => { + collect_expr_idents(&un.argument, out); + } + E::SequenceExpression(seq) => { + for e in &seq.expressions { + collect_expr_idents(e, out); + } + } + E::ParenthesizedExpression(p) => { + collect_expr_idents(&p.expression, out); + } + E::TemplateLiteral(tpl) => { + for e in &tpl.expressions { + collect_expr_idents(e, out); + } + } + E::TaggedTemplateExpression(tagged) => { + collect_expr_idents(&tagged.tag, out); + for e in &tagged.quasi.expressions { + collect_expr_idents(e, out); + } + } + E::StaticMemberExpression(member) => { + collect_expr_idents(&member.object, out); + } + E::ComputedMemberExpression(member) => { + collect_expr_idents(&member.object, out); + collect_expr_idents(&member.expression, out); + } + E::PrivateFieldExpression(member) => { + collect_expr_idents(&member.object, out); + } + E::AwaitExpression(a) => collect_expr_idents(&a.argument, out), + E::YieldExpression(y) => { + if let Some(arg) = &y.argument { + collect_expr_idents(arg, out); + } + } + E::TSAsExpression(ts) => collect_expr_idents(&ts.expression, out), + E::TSSatisfiesExpression(ts) => collect_expr_idents(&ts.expression, out), + E::TSNonNullExpression(ts) => collect_expr_idents(&ts.expression, out), + E::TSTypeAssertion(ts) => collect_expr_idents(&ts.expression, out), + E::TSInstantiationExpression(ts) => collect_expr_idents(&ts.expression, out), + // Class expressions inside metadata are exceedingly rare and their + // bodies aren't eagerly evaluated; treat them as opaque. + E::ClassExpression(_) => {} + // Function and arrow bodies run lazily — references inside don't + // affect class-init evaluation. + E::ArrowFunctionExpression(_) | E::FunctionExpression(_) => {} + // Literals and `this`/`super` carry no identifier references. + _ => {} + } +} + +fn collect_callee_idents<'a>(callee: &Expression<'a>, out: &mut HashSet<&'a str>) { + collect_expr_idents(callee, out); +} + +fn collect_array_element_idents<'a>(el: &ArrayExpressionElement<'a>, out: &mut HashSet<&'a str>) { + match el { + ArrayExpressionElement::SpreadElement(spread) => { + collect_expr_idents(&spread.argument, out); + } + ArrayExpressionElement::Elision(_) => {} + other => { + if let Some(expr) = array_element_to_expression(other) { + collect_expr_idents(expr, out); + } + } + } +} + +fn array_element_to_expression<'a, 'src>( + el: &'src ArrayExpressionElement<'a>, +) -> Option<&'src Expression<'a>> { + if el.is_expression() { Some(el.to_expression()) } else { None } +} diff --git a/crates/oxc_angular_compiler/src/component/mod.rs b/crates/oxc_angular_compiler/src/component/mod.rs index 37b5d2816..1e4a35ce2 100644 --- a/crates/oxc_angular_compiler/src/component/mod.rs +++ b/crates/oxc_angular_compiler/src/component/mod.rs @@ -11,6 +11,7 @@ mod cross_file_elision; mod decorator; mod definition; mod dependency; +mod hoist; mod import_elision; mod metadata; mod namespace_registry; diff --git a/crates/oxc_angular_compiler/src/component/transform.rs b/crates/oxc_angular_compiler/src/component/transform.rs index 181a211cb..57e670808 100644 --- a/crates/oxc_angular_compiler/src/component/transform.rs +++ b/crates/oxc_angular_compiler/src/component/transform.rs @@ -27,6 +27,7 @@ use super::decorator::{ extract_component_metadata, find_component_decorator, find_component_decorator_span, }; use super::definition::{const_value_to_expression, generate_component_definitions}; +use super::hoist::collect_hoist_edits; use super::import_elision::{ImportElisionAnalyzer, import_elision_edits}; use super::metadata::{AngularVersion, ComponentMetadata, HostMetadata}; use super::namespace_registry::NamespaceRegistry; @@ -2567,6 +2568,13 @@ pub fn transform_angular_file( } } + // 5e. TDZ-safe hoisting of top-level bindings referenced by decorator + // metadata but declared after the decorated class. Without this, the + // emitted `ɵcmp` static field's `ɵɵProvidersFeature` would evaluate the + // reference at class-definition time and throw `ReferenceError`. See + // issue #287. + edits.extend(collect_hoist_edits(&parser_ret.program, source)); + // Apply all edits in one pass if options.sourcemap { let (code, map) = apply_edits_with_sourcemap(source, edits, path); diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 4972c845b..9a6afbcb6 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10295,3 +10295,246 @@ export class UnresolvedComponent {} result.code ); } + +// ============================================================================= +// Issue #287: TDZ-safe hoisting of consts referenced by emitted Ivy definitions +// ============================================================================= +// When `@Component` metadata references a `const` (or other binding) declared +// *after* the class, the emitted Ivy definition (`ɵcmp` static field) evaluates +// the providers array eagerly in the class body. Because the const is still in +// the temporal dead zone at that point, this throws `ReferenceError: Cannot +// access 'TOKEN' before initialization` at module load. +// +// Angular's official compiler hoists such consts above the class declaration. +// These tests pin that behavior. + +/// A `const` referenced by `providers` and declared after the class must be +/// hoisted above the class so the eagerly-evaluated `ɵɵProvidersFeature` does +/// not hit the TDZ at class-init time. +#[test] +fn component_providers_const_after_class_is_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 1 }] }) +export class TestComponent {} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + // The const TOKEN must appear before `class TestComponent` in the output + // so it is initialized before the static `ɵcmp` field evaluates providers. + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "`const TOKEN` must be hoisted above `class TestComponent`. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + + // Must only appear once: the original must have been deleted from its + // original location. + let count = result.code.matches("const TOKEN").count(); + assert_eq!( + count, 1, + "`const TOKEN` should appear exactly once (original deleted). Got {count}.\nCode:\n{}", + result.code + ); +} + +/// `viewProviders` is also evaluated eagerly via `ɵɵProvidersFeature` — consts +/// it references must be hoisted too. +#[test] +fn component_view_providers_const_after_class_is_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ + selector: 'x', + template: '', + viewProviders: [{ provide: VIEW_TOKEN, useValue: 2 }], +}) +export class TestComponent {} +const VIEW_TOKEN = 'view-tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result.code.find("const VIEW_TOKEN").unwrap_or_else(|| { + panic!("Expected `const VIEW_TOKEN` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "`const VIEW_TOKEN` must be hoisted above `class TestComponent`. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); +} + +/// Multiple distinct providers consts after the class — all referenced by +/// metadata — must be hoisted, preserving their original relative order. +#[test] +fn component_multiple_provider_consts_after_class_are_hoisted_in_order() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ + selector: 'x', + template: '', + providers: [ + { provide: TOKEN_A, useValue: 1 }, + { provide: TOKEN_B, useValue: 2 }, + ], +}) +export class TestComponent {} +const TOKEN_A = 'a'; +const TOKEN_B = 'b'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let a_pos = result.code.find("const TOKEN_A").expect("TOKEN_A missing"); + let b_pos = result.code.find("const TOKEN_B").expect("TOKEN_B missing"); + let class_pos = result.code.find("class TestComponent").expect("class missing"); + assert!( + a_pos < class_pos && b_pos < class_pos, + "Both consts must be hoisted above the class. \ + a@{a_pos} b@{b_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + a_pos < b_pos, + "Relative order of consts must be preserved (A before B).\nCode:\n{}", + result.code + ); +} + +/// `useFactory` referencing a const declared later still hoists the const, +/// because the const is captured in the providers array argument which +/// `ɵɵProvidersFeature` evaluates at class-init time. Note: identifiers +/// referenced *inside* the factory's arrow-function body fire lazily when the +/// factory is invoked, so they don't need hoisting — only top-level metadata +/// references do. +#[test] +fn component_use_factory_dependency_const_is_hoisted_when_referenced_at_top_level() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ + selector: 'x', + template: '', + providers: [{ provide: TOKEN, useFactory: () => 'val', deps: [DEP_TOKEN] }], +}) +export class TestComponent {} +const TOKEN = 'tok'; +const DEP_TOKEN = 'dep'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").expect("class missing"); + let token_pos = result.code.find("const TOKEN").expect("TOKEN missing"); + let dep_pos = result.code.find("const DEP_TOKEN").expect("DEP_TOKEN missing"); + assert!(token_pos < class_pos, "TOKEN (provider key) must be hoisted.\nCode:\n{}", result.code); + assert!( + dep_pos < class_pos, + "DEP_TOKEN (deps array entry) must be hoisted.\nCode:\n{}", + result.code + ); +} + +/// Two `@Component` classes in the same file that both reference the same +/// later-declared const must hoist it exactly once, ahead of the earliest +/// referencing class. +#[test] +fn component_shared_provider_const_is_hoisted_once_for_multiple_classes() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'a', template: '', providers: [{ provide: SHARED, useValue: 1 }] }) +export class A {} +@Component({ selector: 'b', template: '', providers: [{ provide: SHARED, useValue: 2 }] }) +export class B {} +const SHARED = 'shared'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let count = result.code.matches("const SHARED").count(); + assert_eq!(count, 1, "`const SHARED` should appear exactly once.\nCode:\n{}", result.code); + + let shared_pos = result.code.find("const SHARED").unwrap(); + let a_pos = result.code.find("class A").unwrap(); + let b_pos = result.code.find("class B").unwrap(); + assert!( + shared_pos < a_pos && shared_pos < b_pos, + "const must be hoisted above both classes.\nshared@{shared_pos} a@{a_pos} b@{b_pos}\nCode:\n{}", + result.code + ); +} + +/// Identifiers referenced *only* inside a factory function body fire when +/// the factory is invoked, never at class-definition time. They do NOT need +/// to be hoisted. This guards against over-hoisting that could break code +/// that relies on the original declaration order (e.g. a const initialized +/// using values not yet computed at module load). +#[test] +fn component_const_referenced_only_inside_factory_body_is_not_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ + selector: 'x', + template: '', + providers: [{ provide: 'k', useFactory: () => LAZY_VALUE }], +}) +export class TestComponent {} +const LAZY_VALUE = 'lazy'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let lazy_pos = result.code.find("const LAZY_VALUE").expect("LAZY_VALUE missing"); + let class_pos = result.code.find("class TestComponent").expect("class missing"); + assert!( + lazy_pos > class_pos, + "Const referenced only inside the factory body should NOT be hoisted.\n\ + lazy@{lazy_pos} class@{class_pos}\nCode:\n{}", + result.code + ); +} + +/// A const declared *before* the class must NOT be moved — only post-class +/// declarations need hoisting. The compiler must not pointlessly rewrite +/// already-valid code. +#[test] +fn component_provider_const_before_class_is_not_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const TOKEN = 'tok'; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 1 }] }) +export class TestComponent {} +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + // The const must still appear once (we did not duplicate it). + let count = result.code.matches("const TOKEN").count(); + assert_eq!(count, 1, "`const TOKEN` should still appear once.\nCode:\n{}", result.code); + + // And it must come before the class (its original position). + let token_pos = result.code.find("const TOKEN").unwrap(); + let class_pos = result.code.find("class TestComponent").unwrap(); + assert!(token_pos < class_pos, "Order should be preserved.\nCode:\n{}", result.code); +} From 8ceae120b04a044337466224fd32195bbc57cea9 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 17:51:28 +0800 Subject: [PATCH 02/21] =?UTF-8?q?test:=20scope=20unresolved-interpolation?= =?UTF-8?q?=20assertion=20to=20=C9=B5cmp=20selectors=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After #299 turned `emit_class_metadata` on by default, the raw `${UNRESOLVED}-tag` template literal is intentionally preserved verbatim inside `ɵsetClassMetadata(...)` to mirror ngc — that's runtime metadata, not the compiled selector. The original test asserted on the full output and now panics on every CI run since d83445f landed on main. Narrow the assertion to the `ɵcmp` definition itself and additionally verify the compiled selector falls back to `ng-component`, matching the test's stated intent ("unresolved interpolation must not produce a partial/garbage selector"). Co-Authored-By: Claude Opus 4.7 --- .../tests/integration_test.rs | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 9a6afbcb6..af1041bf3 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10271,6 +10271,13 @@ export class AppComponent {} /// An interpolated `${...}` whose identifier is NOT a known const must NOT /// crash and must NOT produce a partial/garbage selector — the field is /// dropped (same fallback as today for any unresolvable identifier). +/// +/// Scope: this test asserts ONLY on the `ɵcmp` selectors field. Since #299 +/// turned `emit_class_metadata` on by default, the raw `${UNRESOLVED}-tag` +/// template literal is intentionally preserved verbatim inside +/// `ɵsetClassMetadata(..., [{ type: Component, args: [...] }], ...)` to +/// mirror ngc's behavior — that's metadata for runtime tooling and is not +/// the compiled selector itself. #[test] fn component_template_literal_unresolved_identifier_drops_field() { let allocator = Allocator::default(); @@ -10288,11 +10295,22 @@ export class UnresolvedComponent {} // Must not crash. assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); - // Must not emit a selector containing an unresolved literal. + // The unresolved interpolation must not appear inside the `ɵcmp`'s + // `selectors:` slot — that's the compiled selector that actually drives + // template matching. + let cmp_start = result.code.find("ɵɵdefineComponent({").expect("ɵcmp missing"); + let cmp_section = &result.code[cmp_start..]; + let cmp_end = cmp_section.find("})").expect("ɵcmp not terminated"); + let cmp_def = &cmp_section[..cmp_end]; assert!( - !result.code.contains("${UNRESOLVED}-tag"), - "Unresolved interpolation must not leak verbatim into selectors.\nCode:\n{}", - result.code + !cmp_def.contains("${UNRESOLVED}-tag"), + "Unresolved interpolation must not leak verbatim into ɵcmp.\nɵcmp:\n{cmp_def}" + ); + // And the compiled selector must fall back to the default tag, matching + // ngc's behavior when a metadata interpolation can't be resolved. + assert!( + cmp_def.contains(r#"selectors:[["ng-component"]]"#), + "Selector should fall back to `ng-component`.\nɵcmp:\n{cmp_def}" ); } From 96f8c9afbe7ef5a12e9bb35976e592452bdfe534 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 18:16:57 +0800 Subject: [PATCH 03/21] fix(hoist): transitive deps + deterministic dedup for shared statements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two PR-review bugs in the hoist pass: 1. **Transitive deps were dropped.** When `@Component({ providers: PROVIDERS })` was followed by `const PROVIDERS = [{ provide: TOKEN, ... }]; const TOKEN = ...;`, only `PROVIDERS` got hoisted. The hoisted `const PROVIDERS = [...]` then evaluated above the class and TDZ-threw on `TOKEN` — just moving the same `ReferenceError` one frame deeper. 2. **Multi-declarator dedup was nondeterministic.** The plan was keyed by binding *name*, so `const A = 1, B = 2;` referenced by two different classes produced two `HoistEntry` values sharing `stmt_start` but carrying different `insert_at` targets. The `emitted_stmts` dedup kept whichever HashMap iteration visited first — often the *later* class — leaving the earlier class in the TDZ. Fix: * Key the plan by `stmt_start` and merge collisions by taking MIN `insert_at` (no more nondeterministic dedup; multi-declarators collapse to one entry by construction). * Per-statement, collect the union of identifier references across every declarator initializer and feed them back into a BFS worklist — so hoisting `PROVIDERS` also schedules `TOKEN` (transitive closure). * Topologically sort the planned statements before emission so dependencies land *before* their dependents in the hoisted prelude (e.g. `const TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`). DFS is iterative to avoid stack overflow on deep chains; cycles are broken silently because they can't yield a valid evaluation order anyway. Adds two integration tests that lock in both fixes: * `component_provider_aggregate_const_pulls_in_transitive_tdz_dep` * `component_shared_multideclarator_const_hoists_above_earliest_referencer` (the latter was flaky against the old code — 7/20 failure rate locally before this change, 30/30 passes after). Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 310 +++++++++++------- .../tests/integration_test.rs | 149 +++++++++ 2 files changed, 348 insertions(+), 111 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 361cdaba7..31cf648df 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -21,6 +21,11 @@ //! function/arrow bodies and class expression bodies — references that //! only fire when a factory or method runs (e.g. `useFactory: () => DEP`) //! don't trigger a hoist. +//! * Hoisting is *transitive*: if a hoisted binding's initializer references +//! another later-declared top-level binding, that one is hoisted too. The +//! final emission order is a topological sort of the dependency graph, so +//! `const PROVIDERS = [{ provide: TOKEN, ... }]` ends up *after* +//! `const TOKEN = ...` in the hoisted prelude. use std::collections::{HashMap, HashSet}; @@ -32,14 +37,26 @@ use oxc_span::GetSpan; use crate::optimizer::Edit; -/// One referenced-by-decorator top-level binding scheduled for hoisting. +/// Per-statement record collected during the initial scan. Multi-declarator +/// statements (`const A = 1, B = 2;`) get a single entry shared by every name +/// they bind; `init_idents` is the union of identifier references across all +/// declarator initializers. +struct StmtInfo<'a> { + stmt_end: u32, + /// End of the deletion (extends `stmt_end` past one trailing newline so + /// the hoist doesn't leave a stray blank line behind). + delete_end: u32, + /// Identifier references appearing in any declarator's initializer in + /// this statement. Used to drive transitive hoisting. + init_idents: HashSet<&'a str>, +} + +/// One statement scheduled for hoisting, keyed by its `stmt_start`. Multiple +/// classes that need the same statement collapse into a single entry whose +/// `insert_at` is the MIN of all referencers' effective starts. #[derive(Clone, Copy)] -struct HoistEntry { - /// Span of the statement to relocate. - stmt_start: u32, +struct PlanEntry { stmt_end: u32, - /// End of the deletion (extends `stmt_end` past trailing newline so the - /// hoist doesn't leave a stray blank line behind). delete_end: u32, /// Insertion target — the earliest referencing class's effective start. insert_at: u32, @@ -57,58 +74,82 @@ struct HoistEntry { /// hoisted statements end up immediately above the class, with any /// constant-pool declarations from the compiler in between. pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec { - let bindings = collect_top_level_bindings(program, source); - if bindings.is_empty() { + // Step 1: index top-level bindings. + // - `binding_to_stmt`: identifier name → containing statement's `start`. + // - `stmt_info`: statement start → end/delete bounds and the union of + // identifier references across the statement's initializers. + let (binding_to_stmt, stmt_info) = collect_top_level_bindings(program, source); + if binding_to_stmt.is_empty() { return Vec::new(); } - // For each top-level decorated class, find the identifiers eagerly - // referenced in its decorator metadata. Record the earliest such class - // position per referenced binding so multiple references hoist exactly - // once, ahead of the first user. - let mut plan: HashMap<&'a str, HoistEntry> = HashMap::new(); + // Step 2: for every Angular-decorated class, BFS through binding + // initializers starting from the identifiers directly referenced in the + // decorator metadata. The plan is keyed by `stmt_start` (not name) so + // multi-declarator statements collapse into a single entry, and the + // `insert_at` is updated to the MIN across all referencers — that guards + // against the nondeterministic dedup bug where, with `const A = 1, B = 2;` + // referenced by two different classes, the surviving entry's `insert_at` + // depended on HashMap iteration order and could land *after* the earlier + // class. See PR #302 review. + let mut plan: HashMap = HashMap::new(); for stmt in &program.body { - let Some((class, stmt_start)) = class_of(stmt) else { continue }; - - // Skip classes that don't carry any Angular decorator we care about. - // Walking every class would be safe but wastes work on unrelated code. + let Some((class, stmt_start_pos)) = class_of(stmt) else { continue }; if !has_angular_decorator(class) { continue; } - let mut referenced: HashSet<&'a str> = HashSet::new(); + let mut direct: HashSet<&'a str> = HashSet::new(); for decorator in &class.decorators { - collect_decorator_idents(decorator, &mut referenced); + collect_decorator_idents(decorator, &mut direct); } - - if referenced.is_empty() { + if direct.is_empty() { continue; } let class_body_end = class.body.span.end; - let effective_start = effective_class_start(class, stmt_start); + let effective_start = effective_class_start(class, stmt_start_pos); - for name in referenced { - let Some(info) = bindings.get(name) else { continue }; - // Only hoist declarations that start AFTER the class body ends. - // Anything before is already TDZ-safe. - if info.stmt_start <= class_body_end { + let mut worklist: Vec<&'a str> = direct.into_iter().collect(); + let mut visited: HashSet<&'a str> = HashSet::new(); + while let Some(name) = worklist.pop() { + if !visited.insert(name) { + continue; + } + let Some(&stmt_start) = binding_to_stmt.get(name) else { continue }; + let Some(info) = stmt_info.get(&stmt_start) else { continue }; + // Skip bindings declared *before* this class — they're already + // initialized when the class evaluates. + if stmt_start <= class_body_end { continue; } - plan.entry(name) - .and_modify(|existing| { - if effective_start < existing.insert_at { - existing.insert_at = effective_start; + plan.entry(stmt_start) + .and_modify(|p| { + if effective_start < p.insert_at { + p.insert_at = effective_start; } }) - .or_insert(HoistEntry { - stmt_start: info.stmt_start, + .or_insert(PlanEntry { stmt_end: info.stmt_end, delete_end: info.delete_end, insert_at: effective_start, }); + + // Transitive hoist: if this binding's initializer references + // another later-declared binding, that one must move above the + // class too — otherwise the *hoisted* statement itself TDZ-throws + // when its initializer runs. Without this, `providers: PROVIDERS` + // followed by `const PROVIDERS = [{ provide: TOKEN, ... }]; const + // TOKEN = ...;` moves `PROVIDERS` but leaves `TOKEN` below, so + // module evaluation now throws inside the hoisted `PROVIDERS` + // initializer. See PR #302 review. + for n in &info.init_idents { + if !visited.contains(n) { + worklist.push(n); + } + } } } @@ -116,39 +157,28 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec return Vec::new(); } - // Sort entries by source position so multiple hoists preserve their - // original relative order in the output. - let mut entries: Vec = plan.into_values().collect(); - entries.sort_by_key(|e| e.stmt_start); - - // We want hoisted text to appear *above* `decls_before_class` (which - // contains constant-pool decls that may reference the hoisted identifiers). - // Existing `decls_before_class` runs at priority 0. apply_edits applies - // lower priority *first* at the same offset, and each later application - // pushes earlier text further right in the output — so a *higher* - // priority lands the hoisted text earlier in the result. Pick 5. + // Step 3: topologically sort the planned statements so dependencies are + // emitted *before* their dependents in the hoisted prelude. Within a + // single bucket (same `insert_at`), this guarantees that e.g. `const + // TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`. + let order = topological_order(&plan, &binding_to_stmt, &stmt_info); + + // Step 4: emit edits. Group by `insert_at` so multiple statements headed + // to the same class become a single insert edit whose text is the + // concatenation in topological order. Emitting them as separate edits at + // the same offset would invert their order (each insert at the same + // position prepends to the prior insert's text). const HOIST_INSERT_PRIORITY: i32 = 5; - - // Group hoisted statements by their target insertion point so that - // multiple consts headed to the same class are emitted as a single - // insert edit, with their text concatenated in source order. Emitting - // them as separate edits would reverse their order, since each insert - // at the same offset prepends to the prior insert's text. - let mut emitted_stmts: HashSet = HashSet::new(); let mut per_target: HashMap = HashMap::new(); - let mut edits = Vec::new(); - - for entry in &entries { - if !emitted_stmts.insert(entry.stmt_start) { - continue; - } + let mut edits: Vec = Vec::new(); - let text = &source[entry.stmt_start as usize..entry.stmt_end as usize]; - let bucket = per_target.entry(entry.insert_at).or_default(); + for stmt_start in &order { + let p = &plan[stmt_start]; + let text = &source[*stmt_start as usize..p.stmt_end as usize]; + let bucket = per_target.entry(p.insert_at).or_default(); bucket.push_str(text); bucket.push('\n'); - - edits.push(Edit::delete(entry.stmt_start, entry.delete_end)); + edits.push(Edit::delete(*stmt_start, p.delete_end)); } for (insert_at, text) in per_target { @@ -158,6 +188,81 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec edits } +/// Iterative post-order DFS yielding a topological ordering of planned +/// statements: dependencies first, then dependents. The seed iteration is in +/// ascending `stmt_start` so the result is deterministic. Cycles (which would +/// require ill-formed source where two consts reference each other) are +/// broken silently — they can't produce a valid evaluation order anyway. +fn topological_order( + plan: &HashMap, + binding_to_stmt: &HashMap<&str, u32>, + stmt_info: &HashMap>, +) -> Vec { + let plan_starts: HashSet = plan.keys().copied().collect(); + + // Adjacency list: stmt_start -> stmt_starts it depends on (must come + // *before* it). Filter to only edges that land inside the plan; deps that + // resolve outside (declared before the class, or not top-level) are + // already TDZ-safe. + let mut deps: HashMap> = HashMap::with_capacity(plan_starts.len()); + for &start in &plan_starts { + let Some(info) = stmt_info.get(&start) else { + deps.insert(start, Vec::new()); + continue; + }; + let mut edges: Vec = info + .init_idents + .iter() + .filter_map(|n| binding_to_stmt.get(n)) + .copied() + .filter(|s| *s != start && plan_starts.contains(s)) + .collect(); + edges.sort_unstable(); + edges.dedup(); + deps.insert(start, edges); + } + + let mut all_starts: Vec = plan_starts.into_iter().collect(); + all_starts.sort_unstable(); + + // States: 0 = unvisited, 1 = on stack (visiting), 2 = done. + let mut state: HashMap = HashMap::new(); + let mut order: Vec = Vec::new(); + + // Iterative DFS via an explicit stack of (node, child_index). When all of + // a node's children are processed we move it from "visiting" to "done" + // and push it onto `order`. Recursion would be simpler but risks stack + // overflow on pathological inputs. + for seed in all_starts { + if matches!(state.get(&seed).copied(), Some(2)) { + continue; + } + let mut stack: Vec<(u32, usize)> = vec![(seed, 0)]; + state.insert(seed, 1); + while let Some(&(node, idx)) = stack.last() { + let children = deps.get(&node).map(Vec::as_slice).unwrap_or(&[]); + if idx < children.len() { + let child = children[idx]; + stack.last_mut().unwrap().1 += 1; + match state.get(&child).copied() { + Some(2) => {} // already emitted + Some(1) => {} // cycle — skip back-edge + _ => { + state.insert(child, 1); + stack.push((child, 0)); + } + } + } else { + state.insert(node, 2); + order.push(node); + stack.pop(); + } + } + } + + order +} + /// Compute the effective start of a class statement, ignoring trailing /// whitespace but spanning any leading decorators that will remain in the /// source. We don't have access to the in-progress `decorator_spans_to_remove` @@ -205,47 +310,32 @@ fn has_angular_decorator(class: &Class<'_>) -> bool { }) } -/// Information about a top-level binding declaration's location. -#[derive(Clone, Copy)] -struct BindingInfo { - stmt_start: u32, - stmt_end: u32, - delete_end: u32, -} - /// Walk top-level statements and index every variable binding identifier -/// they declare. Multiple identifiers from a combined declaration -/// (`const A = 1, B = 2;`) share the same statement span — hoisting one -/// hoists the whole statement, which is harmless because the other bindings -/// come along for the ride. +/// they declare, returning two complementary maps: +/// * `binding_to_stmt`: identifier name → containing statement's `start`. Used +/// to look up hoist info from an identifier reference. +/// * `stmt_info`: statement `start` → end/delete bounds and the union of +/// identifier references across every declarator's initializer. Used to +/// drive transitive hoisting and the topological sort. /// /// Only `VariableDeclaration` (const/let/var) and the `export` form of it are /// considered: -/// /// * `function` declarations are fully hoisted by the JavaScript runtime /// already (their bodies are available before their textual position), so /// they never trigger TDZ. -/// * Class declarations are intentionally skipped here because hoisting them -/// would race the rest of the transform pipeline, which inserts static -/// fields and surrounding declarations at the class's original position. -/// Deleting the class's source range would clobber those inserts. -/// Forward-referenced classes are rare in real Angular code and out of -/// scope for this fix. +/// * Class declarations are intentionally skipped because hoisting them would +/// race the rest of the transform pipeline, which inserts static fields and +/// surrounding declarations at the class's original position. Deleting the +/// class's source range would clobber those inserts. fn collect_top_level_bindings<'a>( program: &Program<'a>, source: &str, -) -> HashMap<&'a str, BindingInfo> { +) -> (HashMap<&'a str, u32>, HashMap>) { let bytes = source.as_bytes(); - let mut out: HashMap<&'a str, BindingInfo> = HashMap::new(); + let mut binding_to_stmt: HashMap<&'a str, u32> = HashMap::new(); + let mut stmt_info: HashMap> = HashMap::new(); for stmt in &program.body { - let stmt_span = stmt.span(); - let info = BindingInfo { - stmt_start: stmt_span.start, - stmt_end: stmt_span.end, - delete_end: end_with_trailing_newline(stmt_span.end, bytes), - }; - let decl = match stmt { Statement::VariableDeclaration(decl) => Some(decl.as_ref()), Statement::ExportNamedDeclaration(export) => match &export.declaration { @@ -254,28 +344,30 @@ fn collect_top_level_bindings<'a>( }, _ => None, }; - let Some(decl) = decl else { continue }; + + let span = stmt.span(); + let stmt_start = span.start; + let mut info = StmtInfo { + stmt_end: span.end, + delete_end: end_with_trailing_newline(span.end, bytes), + init_idents: HashSet::new(), + }; + for declarator in &decl.declarations { - add_binding_names(&declarator.id, info, &mut out); + if let BindingPattern::BindingIdentifier(id) = &declarator.id { + binding_to_stmt.insert(id.name.as_str(), stmt_start); + } + // Destructuring patterns are deliberately ignored — see + // collect_top_level_bindings docstring above. + if let Some(init) = &declarator.init { + collect_expr_idents(init, &mut info.init_idents); + } } + stmt_info.insert(stmt_start, info); } - out -} - -/// Extract identifier names from a binding pattern. We only handle plain -/// identifier patterns — anything destructured (`const { a } = x;`) is left -/// alone because hoisting destructuring would change observable behavior if -/// the right-hand side has side effects. -fn add_binding_names<'a>( - pat: &BindingPattern<'a>, - info: BindingInfo, - out: &mut HashMap<&'a str, BindingInfo>, -) { - if let BindingPattern::BindingIdentifier(id) = pat { - out.insert(id.name.as_str(), info); - } + (binding_to_stmt, stmt_info) } /// Advance `end` past one trailing line terminator so that deleting the @@ -362,7 +454,7 @@ fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { } } E::CallExpression(call) => { - collect_callee_idents(&call.callee, out); + collect_expr_idents(&call.callee, out); for arg in &call.arguments { match arg { Argument::SpreadElement(s) => collect_expr_idents(&s.argument, out), @@ -456,10 +548,6 @@ fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { } } -fn collect_callee_idents<'a>(callee: &Expression<'a>, out: &mut HashSet<&'a str>) { - collect_expr_idents(callee, out); -} - fn collect_array_element_idents<'a>(el: &ArrayExpressionElement<'a>, out: &mut HashSet<&'a str>) { match el { ArrayExpressionElement::SpreadElement(spread) => { diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index af1041bf3..5c105d4c5 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10556,3 +10556,152 @@ export class TestComponent {} let class_pos = result.code.find("class TestComponent").unwrap(); assert!(token_pos < class_pos, "Order should be preserved.\nCode:\n{}", result.code); } + +/// Reproducer for PR #302 review feedback: when two bindings from the *same* +/// multi-declarator statement (`const A = 1, B = 2;`) are referenced by +/// different decorated classes, the hoist plan keys entries by binding name, +/// producing two `HoistEntry` values that share the same `stmt_start` but +/// carry different `insert_at` targets. The dedup loop in `collect_hoist_edits` +/// keeps whichever entry HashMap iteration visits first and drops the other — +/// so the chosen `insert_at` is nondeterministic, and can land *after* the +/// earliest referencing class. That leaves the earlier class still inside the +/// TDZ of the hoisted statement. +/// +/// Scenario from the Codex review: +/// * `class A` (decorated) references `B`. +/// * `class C` (decorated) references `A`. +/// * Both classes are declared *before* `const A = 1, B = 2;`. +/// +/// The correct behavior is to hoist the shared statement to *above the +/// earliest* referencing class (class A here), so both `A` and `B` are +/// initialized before either decorator runs. +#[test] +fn component_shared_multideclarator_const_hoists_above_earliest_referencer() { + let allocator = Allocator::default(); + // `Acomp` references `Bval` in its decorator metadata. + // `Ccomp` references `Aval` in its decorator metadata. + // The const declaring both `Aval` and `Bval` is declared *after* both + // classes, so both must be hoisted above the earliest class (`Acomp`). + let source = r#" +import { Component } from '@angular/core'; +@Component({ + selector: 'a-comp', + template: '', + providers: [{ provide: 'k', useValue: Bval }], +}) +export class Acomp {} +@Component({ + selector: 'c-comp', + template: '', + providers: [{ provide: 'k', useValue: Aval }], +}) +export class Ccomp {} +const Aval = 1, Bval = 2; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + // The shared declaration must appear exactly once (original deleted, single + // hoisted copy emitted). + let const_count = result.code.matches("const Aval").count(); + assert_eq!( + const_count, 1, + "`const Aval = 1, Bval = 2;` should appear exactly once. Got {const_count}.\nCode:\n{}", + result.code + ); + + let const_pos = result.code.find("const Aval").expect("`const Aval` must appear in the output"); + let acomp_pos = + result.code.find("class Acomp").expect("`class Acomp` must appear in the output"); + let ccomp_pos = + result.code.find("class Ccomp").expect("`class Ccomp` must appear in the output"); + + // The hoisted shared statement must precede BOTH classes — not just the + // later one (`Ccomp`). If the dedup logic picks `Ccomp`'s `insert_at`, + // the const will land between the two classes, leaving `Acomp` in the + // TDZ of `Bval`. + assert!( + const_pos < acomp_pos, + "`const Aval, Bval` must be hoisted above the *earliest* referencer (Acomp). \ + const@{const_pos} Acomp@{acomp_pos} Ccomp@{ccomp_pos}\nCode:\n{}", + result.code + ); + assert!( + const_pos < ccomp_pos, + "`const Aval, Bval` must also be hoisted above Ccomp. \ + const@{const_pos} Acomp@{acomp_pos} Ccomp@{ccomp_pos}\nCode:\n{}", + result.code + ); +} + +/// Regression for transitive TDZ deps: when decorator metadata references an +/// aggregate binding (e.g. `providers: PROVIDERS`) and that aggregate's +/// initializer transitively references *another* later-declared top-level +/// binding (`TOKEN`), the hoister must pull both bindings above the class. +/// +/// Without this, `PROVIDERS` gets moved above the class but `TOKEN` stays +/// below, so `PROVIDERS`'s own initializer throws `ReferenceError: Cannot +/// access 'TOKEN' before initialization` at module evaluation — strictly +/// worse than before the hoist. +#[test] +fn component_provider_aggregate_const_pulls_in_transitive_tdz_dep() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: PROVIDERS }) +export class TestComponent {} +const PROVIDERS = [{ provide: TOKEN, useValue: 1 }]; +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let providers_pos = result.code.find("const PROVIDERS").unwrap_or_else(|| { + panic!("Expected `const PROVIDERS` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + // Both must be hoisted above the class. + assert!( + providers_pos < class_pos, + "`const PROVIDERS` must be hoisted above the class. \ + providers@{providers_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + token_pos < class_pos, + "`const TOKEN` (transitively referenced by PROVIDERS' initializer) \ + must also be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + + // And `TOKEN` must come before `PROVIDERS` so PROVIDERS' initializer can + // actually read it at module load. + assert!( + token_pos < providers_pos, + "`const TOKEN` must precede `const PROVIDERS` in the hoisted region. \ + token@{token_pos} providers@{providers_pos}\nCode:\n{}", + result.code + ); + + // Neither should be duplicated. + assert_eq!( + result.code.matches("const PROVIDERS").count(), + 1, + "`const PROVIDERS` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 2893ba7b07c93129abe19c53c7c09df9409caf60 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 18:41:05 +0800 Subject: [PATCH 04/21] fix(hoist): boundary + function-call transitive TDZ deps Two PR #302 review fixes for the const-hoist: 1. Off-by-one at the class-body boundary (Cursor): the check `stmt_start <= class_body_end` skipped a const declared at exactly `class.body.span.end` (no whitespace between `}` and `const`). `class.body.span.end` is the exclusive end of the body, so a stmt starting there is the very next byte after the class and still needs hoisting. Switched to `<`. To keep the new delete from chewing into the `decls_after_class` insert at the same offset, the hoist delete now runs with a negative priority so it applies before that insert. 2. Transitive deps through function calls (Codex): a hoisted initializer that calls a top-level function (`const PROVIDERS = makeProviders()`) evaluates that function's body at module load, so any later-declared binding the body reads still TDZ-throws. Indexed top-level function declarations via `oxc_ast_visit::Visit` (skipping nested function / arrow / class expression bodies for the same lazy-evaluation reason as the existing expression walker), and chase identifiers through them in the BFS and in the topological-sort edge expansion. Added two regression tests: - `component_provider_const_immediately_after_class_brace_is_hoisted` - `component_provider_const_via_function_call_pulls_in_transitive_tdz_dep` Co-Authored-By: Claude Opus 4.7 --- Cargo.lock | 2 + Cargo.toml | 1 + crates/oxc_angular_compiler/Cargo.toml | 2 + .../src/component/hoist.rs | 256 ++++++++++++++---- .../tests/integration_test.rs | 113 ++++++++ 5 files changed, 314 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2a5a9214..84ed4edc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1087,6 +1087,7 @@ dependencies = [ "oxc-miette", "oxc_allocator", "oxc_ast", + "oxc_ast_visit", "oxc_codegen", "oxc_diagnostics", "oxc_parser", @@ -1095,6 +1096,7 @@ dependencies = [ "oxc_sourcemap 7.0.0", "oxc_span", "oxc_str", + "oxc_syntax", "oxc_transformer", "pathdiff", "rustc-hash", diff --git a/Cargo.toml b/Cargo.toml index 7372a2313..fc907122d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ oxc_napi = "0.133" oxc_parser = "0.133" oxc_semantic = "0.133" oxc_span = "0.133" +oxc_syntax = "0.133" oxc_sourcemap = "7.0.0" oxc_str = "0.133" oxc_transformer = "0.133" diff --git a/crates/oxc_angular_compiler/Cargo.toml b/crates/oxc_angular_compiler/Cargo.toml index 384d1c231..9ac749661 100644 --- a/crates/oxc_angular_compiler/Cargo.toml +++ b/crates/oxc_angular_compiler/Cargo.toml @@ -18,10 +18,12 @@ doctest = false [dependencies] oxc_allocator = { workspace = true } oxc_ast = { workspace = true } +oxc_ast_visit = { workspace = true } oxc_diagnostics = { workspace = true } oxc_parser = { workspace = true } oxc_semantic = { workspace = true } oxc_span = { workspace = true } +oxc_syntax = { workspace = true } oxc_sourcemap = { workspace = true } oxc_str = { workspace = true } oxc_transformer = { workspace = true } diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 31cf648df..e5158516e 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -31,8 +31,10 @@ use std::collections::{HashMap, HashSet}; use oxc_ast::ast::{ Argument, ArrayExpressionElement, BindingPattern, Class, Declaration, Decorator, - ExportDefaultDeclarationKind, Expression, ObjectPropertyKind, Program, Statement, + ExportDefaultDeclarationKind, Expression, IdentifierReference, ObjectPropertyKind, Program, + Statement, }; +use oxc_ast_visit::Visit; use oxc_span::GetSpan; use crate::optimizer::Edit; @@ -78,8 +80,15 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec // - `binding_to_stmt`: identifier name → containing statement's `start`. // - `stmt_info`: statement start → end/delete bounds and the union of // identifier references across the statement's initializers. - let (binding_to_stmt, stmt_info) = collect_top_level_bindings(program, source); - if binding_to_stmt.is_empty() { + // - `fn_body_idents`: top-level function name → identifier references in + // its body. Top-level function *declarations* are JS-hoisted so they + // never need physical hoisting, but if a hoisted initializer *calls* + // them (`const PROVIDERS = makeProviders()`), the function body runs + // at module load and any later-declared binding it touches still + // TDZ-throws. The BFS consults this map to chase identifiers through + // function-call boundaries. + let (binding_to_stmt, stmt_info, fn_body_idents) = collect_top_level_bindings(program, source); + if binding_to_stmt.is_empty() && fn_body_idents.is_empty() { return Vec::new(); } @@ -117,37 +126,55 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec if !visited.insert(name) { continue; } - let Some(&stmt_start) = binding_to_stmt.get(name) else { continue }; - let Some(info) = stmt_info.get(&stmt_start) else { continue }; - // Skip bindings declared *before* this class — they're already - // initialized when the class evaluates. - if stmt_start <= class_body_end { - continue; - } + if let Some(&stmt_start) = binding_to_stmt.get(name) { + let Some(info) = stmt_info.get(&stmt_start) else { continue }; + // Skip bindings declared *before* this class — they're + // already initialized when the class evaluates. + // `class_body_end` is the exclusive end of the class body + // (one byte past `}`), so a statement starting at exactly + // `class_body_end` is the very next byte after the class — + // declared *after* and still needs hoisting. + if stmt_start < class_body_end { + continue; + } - plan.entry(stmt_start) - .and_modify(|p| { - if effective_start < p.insert_at { - p.insert_at = effective_start; + plan.entry(stmt_start) + .and_modify(|p| { + if effective_start < p.insert_at { + p.insert_at = effective_start; + } + }) + .or_insert(PlanEntry { + stmt_end: info.stmt_end, + delete_end: info.delete_end, + insert_at: effective_start, + }); + + // Transitive hoist: if this binding's initializer references + // another later-declared binding, that one must move above + // the class too — otherwise the *hoisted* statement itself + // TDZ-throws when its initializer runs. Without this, + // `providers: PROVIDERS` followed by `const PROVIDERS = [{ + // provide: TOKEN, ... }]; const TOKEN = ...;` moves + // `PROVIDERS` but leaves `TOKEN` below, so module evaluation + // now throws inside the hoisted `PROVIDERS` initializer. + // See PR #302 review. + for n in &info.init_idents { + if !visited.contains(n) { + worklist.push(n); + } + } + } else if let Some(body_refs) = fn_body_idents.get(name) { + // The name resolves to a top-level function declaration. + // Don't hoist the function itself (JS already hoists fn + // decls), but if its body references later bindings, those + // references fire whenever the function is called — and a + // hoisted initializer *will* call it at module load. Chase + // them through the worklist. See PR #302 review (Codex). + for n in body_refs { + if !visited.contains(n) { + worklist.push(n); } - }) - .or_insert(PlanEntry { - stmt_end: info.stmt_end, - delete_end: info.delete_end, - insert_at: effective_start, - }); - - // Transitive hoist: if this binding's initializer references - // another later-declared binding, that one must move above the - // class too — otherwise the *hoisted* statement itself TDZ-throws - // when its initializer runs. Without this, `providers: PROVIDERS` - // followed by `const PROVIDERS = [{ provide: TOKEN, ... }]; const - // TOKEN = ...;` moves `PROVIDERS` but leaves `TOKEN` below, so - // module evaluation now throws inside the hoisted `PROVIDERS` - // initializer. See PR #302 review. - for n in &info.init_idents { - if !visited.contains(n) { - worklist.push(n); } } } @@ -161,14 +188,26 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec // emitted *before* their dependents in the hoisted prelude. Within a // single bucket (same `insert_at`), this guarantees that e.g. `const // TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`. - let order = topological_order(&plan, &binding_to_stmt, &stmt_info); + let order = topological_order(&plan, &binding_to_stmt, &stmt_info, &fn_body_idents); // Step 4: emit edits. Group by `insert_at` so multiple statements headed // to the same class become a single insert edit whose text is the // concatenation in topological order. Emitting them as separate edits at // the same offset would invert their order (each insert at the same // position prepends to the prior insert's text). + // + // `HOIST_INSERT_PRIORITY` (positive) keeps hoisted text *above* the + // `decls_before_class` insertion at the same offset (which uses default + // priority 0). + // + // `HOIST_DELETE_PRIORITY` (negative) lets a hoist delete that starts at + // exactly `class.body.span.end` — the byte right after `}`, where a + // const declared with no whitespace lives — apply *before* the + // `decls_after_class` insert at the same offset. Without the priority + // skew, the insert ran first and the delete would then chew into the + // newly inserted IIFE/metadata text instead of the original const. const HOIST_INSERT_PRIORITY: i32 = 5; + const HOIST_DELETE_PRIORITY: i32 = -1; let mut per_target: HashMap = HashMap::new(); let mut edits: Vec = Vec::new(); @@ -178,7 +217,7 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec let bucket = per_target.entry(p.insert_at).or_default(); bucket.push_str(text); bucket.push('\n'); - edits.push(Edit::delete(*stmt_start, p.delete_end)); + edits.push(Edit::delete(*stmt_start, p.delete_end).with_priority(HOIST_DELETE_PRIORITY)); } for (insert_at, text) in per_target { @@ -193,10 +232,11 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec /// ascending `stmt_start` so the result is deterministic. Cycles (which would /// require ill-formed source where two consts reference each other) are /// broken silently — they can't produce a valid evaluation order anyway. -fn topological_order( +fn topological_order<'a>( plan: &HashMap, - binding_to_stmt: &HashMap<&str, u32>, - stmt_info: &HashMap>, + binding_to_stmt: &HashMap<&'a str, u32>, + stmt_info: &HashMap>, + fn_body_idents: &HashMap<&'a str, HashSet<&'a str>>, ) -> Vec { let plan_starts: HashSet = plan.keys().copied().collect(); @@ -204,14 +244,22 @@ fn topological_order( // *before* it). Filter to only edges that land inside the plan; deps that // resolve outside (declared before the class, or not top-level) are // already TDZ-safe. + // + // The "effective init idents" of a planned statement are the transitive + // closure of its direct `init_idents` through `fn_body_idents`: if the + // initializer calls a function, the function body's identifier reads also + // count as references that fire when the hoisted statement evaluates. So + // `const PROVIDERS = makeProviders()` with `function makeProviders() { + // return [{ provide: TOKEN }]; }` must end up after `const TOKEN` in the + // hoisted prelude. See PR #302 review (Codex). let mut deps: HashMap> = HashMap::with_capacity(plan_starts.len()); for &start in &plan_starts { let Some(info) = stmt_info.get(&start) else { deps.insert(start, Vec::new()); continue; }; - let mut edges: Vec = info - .init_idents + let effective = expand_through_functions(&info.init_idents, fn_body_idents); + let mut edges: Vec = effective .iter() .filter_map(|n| binding_to_stmt.get(n)) .copied() @@ -263,6 +311,36 @@ fn topological_order( order } +/// Take a set of identifier references and expand it transitively through +/// `fn_body_idents`: every time we encounter a name that resolves to a +/// top-level function, we add the function body's own identifier references +/// (and recurse). The result is the union of every identifier that the +/// initial set "reaches" via function calls — what would actually fire if +/// you ran the initializer at module load. A `seen` set guards against +/// mutual recursion between top-level functions. +fn expand_through_functions<'a>( + seed: &HashSet<&'a str>, + fn_body_idents: &HashMap<&'a str, HashSet<&'a str>>, +) -> HashSet<&'a str> { + let mut out: HashSet<&'a str> = HashSet::new(); + let mut worklist: Vec<&'a str> = seed.iter().copied().collect(); + let mut seen: HashSet<&'a str> = HashSet::new(); + while let Some(name) = worklist.pop() { + if !seen.insert(name) { + continue; + } + out.insert(name); + if let Some(body_refs) = fn_body_idents.get(name) { + for n in body_refs { + if !seen.contains(n) { + worklist.push(n); + } + } + } + } + out +} + /// Compute the effective start of a class statement, ignoring trailing /// whitespace but spanning any leading decorators that will remain in the /// source. We don't have access to the in-progress `decorator_spans_to_remove` @@ -330,13 +408,14 @@ fn has_angular_decorator(class: &Class<'_>) -> bool { fn collect_top_level_bindings<'a>( program: &Program<'a>, source: &str, -) -> (HashMap<&'a str, u32>, HashMap>) { +) -> (HashMap<&'a str, u32>, HashMap>, HashMap<&'a str, HashSet<&'a str>>) { let bytes = source.as_bytes(); let mut binding_to_stmt: HashMap<&'a str, u32> = HashMap::new(); let mut stmt_info: HashMap> = HashMap::new(); + let mut fn_body_idents: HashMap<&'a str, HashSet<&'a str>> = HashMap::new(); for stmt in &program.body { - let decl = match stmt { + let var_decl = match stmt { Statement::VariableDeclaration(decl) => Some(decl.as_ref()), Statement::ExportNamedDeclaration(export) => match &export.declaration { Some(Declaration::VariableDeclaration(decl)) => Some(decl.as_ref()), @@ -344,30 +423,87 @@ fn collect_top_level_bindings<'a>( }, _ => None, }; - let Some(decl) = decl else { continue }; - - let span = stmt.span(); - let stmt_start = span.start; - let mut info = StmtInfo { - stmt_end: span.end, - delete_end: end_with_trailing_newline(span.end, bytes), - init_idents: HashSet::new(), - }; - - for declarator in &decl.declarations { - if let BindingPattern::BindingIdentifier(id) = &declarator.id { - binding_to_stmt.insert(id.name.as_str(), stmt_start); + if let Some(decl) = var_decl { + let span = stmt.span(); + let stmt_start = span.start; + let mut info = StmtInfo { + stmt_end: span.end, + delete_end: end_with_trailing_newline(span.end, bytes), + init_idents: HashSet::new(), + }; + + for declarator in &decl.declarations { + if let BindingPattern::BindingIdentifier(id) = &declarator.id { + binding_to_stmt.insert(id.name.as_str(), stmt_start); + } + // Destructuring patterns are deliberately ignored — see + // collect_top_level_bindings docstring above. + if let Some(init) = &declarator.init { + collect_expr_idents(init, &mut info.init_idents); + } } - // Destructuring patterns are deliberately ignored — see - // collect_top_level_bindings docstring above. - if let Some(init) = &declarator.init { - collect_expr_idents(init, &mut info.init_idents); + stmt_info.insert(stmt_start, info); + continue; + } + + // Top-level `function foo() { ... }` (also `export function` / + // `export default function foo`). Function declarations are + // JS-hoisted whole-body, so we never *move* them; we only index + // their body references so the BFS can chase TDZ-relevant + // identifiers across function-call boundaries. + let func = match stmt { + Statement::FunctionDeclaration(f) => Some(f.as_ref()), + Statement::ExportNamedDeclaration(export) => match &export.declaration { + Some(Declaration::FunctionDeclaration(f)) => Some(f.as_ref()), + _ => None, + }, + Statement::ExportDefaultDeclaration(export) => match &export.declaration { + ExportDefaultDeclarationKind::FunctionDeclaration(f) => Some(f.as_ref()), + _ => None, + }, + _ => None, + }; + if let Some(func) = func { + if let (Some(id), Some(body)) = (&func.id, &func.body) { + let mut refs: HashSet<&'a str> = HashSet::new(); + let mut visitor = FunctionBodyIdentVisitor { out: &mut refs }; + visitor.visit_function_body(body); + fn_body_idents.insert(id.name.as_str(), refs); } } - stmt_info.insert(stmt_start, info); } - (binding_to_stmt, stmt_info) + (binding_to_stmt, stmt_info, fn_body_idents) +} + +/// AST visitor that collects every `IdentifierReference` reachable from a +/// function body, with the same "lazy bodies are opaque" rule the existing +/// expression walker uses: nested function/arrow expressions inside the body +/// don't run when the outer function is called, so their bodies are skipped. +struct FunctionBodyIdentVisitor<'a, 'b> { + out: &'b mut HashSet<&'a str>, +} + +impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { + fn visit_identifier_reference(&mut self, it: &IdentifierReference<'a>) { + self.out.insert(it.name.as_str()); + } + + // Nested function/arrow expressions only execute when *they* are called, + // not when the enclosing function is. Don't descend. + fn visit_function( + &mut self, + _it: &oxc_ast::ast::Function<'a>, + _flags: oxc_syntax::scope::ScopeFlags, + ) { + } + + fn visit_arrow_function_expression(&mut self, _it: &oxc_ast::ast::ArrowFunctionExpression<'a>) { + } + + // Class expressions inside the body define methods that don't run at + // call time of the outer function. Skip. + fn visit_class(&mut self, _it: &Class<'a>) {} } /// Advance `end` past one trailing line terminator so that deleting the diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 5c105d4c5..9372b924f 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10705,3 +10705,116 @@ const TOKEN = 'tok'; result.code ); } + +/// When `providers: PROVIDERS` references a `const PROVIDERS = makeProviders()` +/// whose initializer *calls* a later-declared `function makeProviders()`, and +/// that function reads another later-declared `const TOKEN`, the hoister must +/// also pull `TOKEN` above the class — otherwise the hoisted `PROVIDERS` +/// initializer invokes `makeProviders()` before `TOKEN` is initialized and +/// throws `ReferenceError: Cannot access 'TOKEN' before initialization`. +/// +/// Regression test for Codex bot review on PR #302 (line 340 of hoist.rs). +#[test] +fn component_provider_const_via_function_call_pulls_in_transitive_tdz_dep() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: PROVIDERS }) +class TestComponent {} +const TOKEN = 'tok'; +const PROVIDERS = makeProviders(); +function makeProviders() { return [{ provide: TOKEN }]; } +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let providers_pos = result.code.find("const PROVIDERS").unwrap_or_else(|| { + panic!("Expected `const PROVIDERS` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + // Both must be hoisted above the class. + assert!( + providers_pos < class_pos, + "`const PROVIDERS` must be hoisted above the class. \ + providers@{providers_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + token_pos < class_pos, + "`const TOKEN` (transitively read by makeProviders() at module init) \ + must also be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + + // And `TOKEN` must come before `PROVIDERS` so `makeProviders()` can read it + // when the hoisted `PROVIDERS` initializer evaluates at module load. + assert!( + token_pos < providers_pos, + "`const TOKEN` must precede `const PROVIDERS` in the hoisted region. \ + token@{token_pos} providers@{providers_pos}\nCode:\n{}", + result.code + ); + + // Neither should be duplicated. + assert_eq!( + result.code.matches("const PROVIDERS").count(), + 1, + "`const PROVIDERS` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// `class.body.span.end` is the exclusive byte offset one past the closing +/// `}`. A `VariableDeclaration` whose statement starts at *exactly* that +/// offset (no whitespace between `}` and `const`) is positioned immediately +/// after the class body and is still in the TDZ when the class's static +/// fields evaluate. The hoist must move it; using `<=` for the +/// "before-class" check accidentally skips this boundary case. +/// +/// Regression test for Cursor bot review on PR #302 (line 124 of hoist.rs). +#[test] +fn component_provider_const_immediately_after_class_brace_is_hoisted() { + let allocator = Allocator::default(); + // No whitespace at all between `}` and `const` — `const` starts at + // exactly `class.body.span.end`. + let source = "import { Component } from '@angular/core';\n\ +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 1 }] })\n\ +export class TestComponent {}const TOKEN = 'tok';\n"; + + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "Boundary-case `const TOKEN` (decl at exactly class.body.span.end) must \ + still be hoisted above the class. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once (original deleted).\nCode:\n{}", + result.code + ); +} From 616588614a1022c143faaa984d33e28b2952f7f6 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 19:20:34 +0800 Subject: [PATCH 05/21] refactor(hoist): resolve bindings via oxc_semantic SymbolIds Replace the hand-rolled name-string indexing in `collect_hoist_edits` with SymbolId-based resolution backed by `oxc_semantic`. The AOT path in `transform_angular_file` now builds a `Semantic` and threads it into the hoister so every `IdentifierReference` resolves through the symbol table to the actual declaring binding. Previously, `binding_to_stmt: HashMap<&str, u32>` and `fn_body_idents: HashMap<&str, HashSet<&str>>` keyed everything by the identifier's spelling. If a nested scope shadowed a top-level binding with the same name, the walker couldn't tell them apart and might count a non-top-level reference as a TDZ-relevant hit on the top-level binding. After this refactor, every reference is resolved to a `SymbolId` and matched against the top-level binding set, so shadows are impossible. No observable behavior change is intended: all 344 integration tests (including the two regression tests from PR #302 `component_provider_const_immediately_after_class_brace_is_hoisted` and `component_provider_const_via_function_call_pulls_in_transitive_tdz_dep`) continue to pass. The hoist priorities, off-by-one boundary check, topological sort, and lazy-body skipping are all preserved. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 332 +++++++++++------- .../src/component/transform.rs | 24 +- 2 files changed, 219 insertions(+), 137 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index e5158516e..0d08ae4e9 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -26,6 +26,11 @@ //! final emission order is a topological sort of the dependency graph, so //! `const PROVIDERS = [{ provide: TOKEN, ... }]` ends up *after* //! `const TOKEN = ...` in the hoisted prelude. +//! +//! Binding resolution is performed via `oxc_semantic`'s symbol table: +//! every identifier reference resolves through its `ReferenceId` to a +//! `SymbolId`, so a nested-scope shadow of a top-level name can't be +//! mistaken for the top-level binding. use std::collections::{HashMap, HashSet}; @@ -35,22 +40,24 @@ use oxc_ast::ast::{ Statement, }; use oxc_ast_visit::Visit; +use oxc_semantic::Semantic; use oxc_span::GetSpan; +use oxc_syntax::symbol::SymbolId; use crate::optimizer::Edit; /// Per-statement record collected during the initial scan. Multi-declarator -/// statements (`const A = 1, B = 2;`) get a single entry shared by every name -/// they bind; `init_idents` is the union of identifier references across all -/// declarator initializers. -struct StmtInfo<'a> { +/// statements (`const A = 1, B = 2;`) get a single entry shared by every +/// symbol they bind; `init_symbols` is the union of identifier references +/// (resolved to `SymbolId`) across all declarator initializers. +struct StmtInfo { stmt_end: u32, /// End of the deletion (extends `stmt_end` past one trailing newline so /// the hoist doesn't leave a stray blank line behind). delete_end: u32, - /// Identifier references appearing in any declarator's initializer in - /// this statement. Used to drive transitive hoisting. - init_idents: HashSet<&'a str>, + /// Symbols referenced inside any declarator's initializer in this + /// statement. Used to drive transitive hoisting. + init_symbols: HashSet, } /// One statement scheduled for hoisting, keyed by its `stmt_start`. Multiple @@ -75,26 +82,31 @@ struct PlanEntry { /// insertion at the same offset pushes earlier text further right — the /// hoisted statements end up immediately above the class, with any /// constant-pool declarations from the compiler in between. -pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec { - // Step 1: index top-level bindings. - // - `binding_to_stmt`: identifier name → containing statement's `start`. +pub fn collect_hoist_edits<'a>( + program: &Program<'a>, + source: &str, + semantic: &Semantic<'a>, +) -> Vec { + // Step 1: index top-level bindings (keyed by SymbolId). + // - `symbol_to_stmt`: binding SymbolId → containing statement's `start`. // - `stmt_info`: statement start → end/delete bounds and the union of - // identifier references across the statement's initializers. - // - `fn_body_idents`: top-level function name → identifier references in - // its body. Top-level function *declarations* are JS-hoisted so they - // never need physical hoisting, but if a hoisted initializer *calls* - // them (`const PROVIDERS = makeProviders()`), the function body runs - // at module load and any later-declared binding it touches still - // TDZ-throws. The BFS consults this map to chase identifiers through - // function-call boundaries. - let (binding_to_stmt, stmt_info, fn_body_idents) = collect_top_level_bindings(program, source); - if binding_to_stmt.is_empty() && fn_body_idents.is_empty() { + // symbol references across the statement's initializers. + // - `fn_body_symbol_refs`: top-level function SymbolId → set of symbol + // references in its body. Top-level function *declarations* are + // JS-hoisted so they never need physical hoisting, but if a hoisted + // initializer *calls* them (`const PROVIDERS = makeProviders()`), the + // function body runs at module load and any later-declared binding it + // touches still TDZ-throws. The BFS consults this map to chase + // identifiers through function-call boundaries. + let (symbol_to_stmt, stmt_info, fn_body_symbol_refs) = + collect_top_level_bindings(program, source, semantic); + if symbol_to_stmt.is_empty() && fn_body_symbol_refs.is_empty() { return Vec::new(); } // Step 2: for every Angular-decorated class, BFS through binding - // initializers starting from the identifiers directly referenced in the - // decorator metadata. The plan is keyed by `stmt_start` (not name) so + // initializers starting from the symbols directly referenced in the + // decorator metadata. The plan is keyed by `stmt_start` (not symbol) so // multi-declarator statements collapse into a single entry, and the // `insert_at` is updated to the MIN across all referencers — that guards // against the nondeterministic dedup bug where, with `const A = 1, B = 2;` @@ -109,9 +121,9 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec continue; } - let mut direct: HashSet<&'a str> = HashSet::new(); + let mut direct: HashSet = HashSet::new(); for decorator in &class.decorators { - collect_decorator_idents(decorator, &mut direct); + collect_decorator_symbols(decorator, semantic, &mut direct); } if direct.is_empty() { continue; @@ -120,13 +132,13 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec let class_body_end = class.body.span.end; let effective_start = effective_class_start(class, stmt_start_pos); - let mut worklist: Vec<&'a str> = direct.into_iter().collect(); - let mut visited: HashSet<&'a str> = HashSet::new(); - while let Some(name) = worklist.pop() { - if !visited.insert(name) { + let mut worklist: Vec = direct.into_iter().collect(); + let mut visited: HashSet = HashSet::new(); + while let Some(symbol) = worklist.pop() { + if !visited.insert(symbol) { continue; } - if let Some(&stmt_start) = binding_to_stmt.get(name) { + if let Some(&stmt_start) = symbol_to_stmt.get(&symbol) { let Some(info) = stmt_info.get(&stmt_start) else { continue }; // Skip bindings declared *before* this class — they're // already initialized when the class evaluates. @@ -159,21 +171,21 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec // `PROVIDERS` but leaves `TOKEN` below, so module evaluation // now throws inside the hoisted `PROVIDERS` initializer. // See PR #302 review. - for n in &info.init_idents { - if !visited.contains(n) { - worklist.push(n); + for &s in &info.init_symbols { + if !visited.contains(&s) { + worklist.push(s); } } - } else if let Some(body_refs) = fn_body_idents.get(name) { - // The name resolves to a top-level function declaration. + } else if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { + // The symbol resolves to a top-level function declaration. // Don't hoist the function itself (JS already hoists fn // decls), but if its body references later bindings, those // references fire whenever the function is called — and a // hoisted initializer *will* call it at module load. Chase // them through the worklist. See PR #302 review (Codex). - for n in body_refs { - if !visited.contains(n) { - worklist.push(n); + for &s in body_refs { + if !visited.contains(&s) { + worklist.push(s); } } } @@ -188,7 +200,7 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec // emitted *before* their dependents in the hoisted prelude. Within a // single bucket (same `insert_at`), this guarantees that e.g. `const // TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`. - let order = topological_order(&plan, &binding_to_stmt, &stmt_info, &fn_body_idents); + let order = topological_order(&plan, &symbol_to_stmt, &stmt_info, &fn_body_symbol_refs); // Step 4: emit edits. Group by `insert_at` so multiple statements headed // to the same class become a single insert edit whose text is the @@ -232,11 +244,11 @@ pub fn collect_hoist_edits<'a>(program: &Program<'a>, source: &str) -> Vec /// ascending `stmt_start` so the result is deterministic. Cycles (which would /// require ill-formed source where two consts reference each other) are /// broken silently — they can't produce a valid evaluation order anyway. -fn topological_order<'a>( +fn topological_order( plan: &HashMap, - binding_to_stmt: &HashMap<&'a str, u32>, - stmt_info: &HashMap>, - fn_body_idents: &HashMap<&'a str, HashSet<&'a str>>, + symbol_to_stmt: &HashMap, + stmt_info: &HashMap, + fn_body_symbol_refs: &HashMap>, ) -> Vec { let plan_starts: HashSet = plan.keys().copied().collect(); @@ -245,23 +257,23 @@ fn topological_order<'a>( // resolve outside (declared before the class, or not top-level) are // already TDZ-safe. // - // The "effective init idents" of a planned statement are the transitive - // closure of its direct `init_idents` through `fn_body_idents`: if the - // initializer calls a function, the function body's identifier reads also - // count as references that fire when the hoisted statement evaluates. So - // `const PROVIDERS = makeProviders()` with `function makeProviders() { - // return [{ provide: TOKEN }]; }` must end up after `const TOKEN` in the - // hoisted prelude. See PR #302 review (Codex). + // The "effective init symbols" of a planned statement are the transitive + // closure of its direct `init_symbols` through `fn_body_symbol_refs`: if + // the initializer calls a function, the function body's identifier reads + // also count as references that fire when the hoisted statement + // evaluates. So `const PROVIDERS = makeProviders()` with `function + // makeProviders() { return [{ provide: TOKEN }]; }` must end up after + // `const TOKEN` in the hoisted prelude. See PR #302 review (Codex). let mut deps: HashMap> = HashMap::with_capacity(plan_starts.len()); for &start in &plan_starts { let Some(info) = stmt_info.get(&start) else { deps.insert(start, Vec::new()); continue; }; - let effective = expand_through_functions(&info.init_idents, fn_body_idents); + let effective = expand_through_functions(&info.init_symbols, fn_body_symbol_refs); let mut edges: Vec = effective .iter() - .filter_map(|n| binding_to_stmt.get(n)) + .filter_map(|s| symbol_to_stmt.get(s)) .copied() .filter(|s| *s != start && plan_starts.contains(s)) .collect(); @@ -311,29 +323,29 @@ fn topological_order<'a>( order } -/// Take a set of identifier references and expand it transitively through -/// `fn_body_idents`: every time we encounter a name that resolves to a -/// top-level function, we add the function body's own identifier references -/// (and recurse). The result is the union of every identifier that the +/// Take a set of symbol references and expand it transitively through +/// `fn_body_symbol_refs`: every time we encounter a symbol that resolves to a +/// top-level function, we add the function body's own symbol references +/// (and recurse). The result is the union of every symbol that the /// initial set "reaches" via function calls — what would actually fire if /// you ran the initializer at module load. A `seen` set guards against /// mutual recursion between top-level functions. -fn expand_through_functions<'a>( - seed: &HashSet<&'a str>, - fn_body_idents: &HashMap<&'a str, HashSet<&'a str>>, -) -> HashSet<&'a str> { - let mut out: HashSet<&'a str> = HashSet::new(); - let mut worklist: Vec<&'a str> = seed.iter().copied().collect(); - let mut seen: HashSet<&'a str> = HashSet::new(); - while let Some(name) = worklist.pop() { - if !seen.insert(name) { +fn expand_through_functions( + seed: &HashSet, + fn_body_symbol_refs: &HashMap>, +) -> HashSet { + let mut out: HashSet = HashSet::new(); + let mut worklist: Vec = seed.iter().copied().collect(); + let mut seen: HashSet = HashSet::new(); + while let Some(symbol) = worklist.pop() { + if !seen.insert(symbol) { continue; } - out.insert(name); - if let Some(body_refs) = fn_body_idents.get(name) { - for n in body_refs { - if !seen.contains(n) { - worklist.push(n); + out.insert(symbol); + if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { + for &s in body_refs { + if !seen.contains(&s) { + worklist.push(s); } } } @@ -388,13 +400,40 @@ fn has_angular_decorator(class: &Class<'_>) -> bool { }) } +/// Cheap pre-check: does `program` contain any top-level class statement +/// carrying one of the Angular decorators recognized by [`has_angular_decorator`]? +/// +/// Used by the AOT transform pipeline to skip the `Semantic` build and the +/// full hoist scan for files with no decorated classes (plain TS helpers, +/// type-only modules, services without `@Injectable`, …). This walks +/// `program.body` only and never descends into class bodies or expressions, +/// so it's O(top-level statements) with a tiny per-statement cost. +pub(crate) fn program_has_angular_decorated_class(program: &Program<'_>) -> bool { + program.body.iter().any(|stmt| match class_of(stmt) { + Some((class, _)) => has_angular_decorator(class), + None => false, + }) +} + +/// Resolve an `IdentifierReference` to a `SymbolId` via the semantic model. +/// Returns `None` when the reference is unresolved (e.g. globals, imports +/// without a local binding, or undeclared identifiers). The caller silently +/// skips unresolved references — they can't refer to a top-level `const` +/// binding in this module anyway. +fn resolve_symbol(id: &IdentifierReference<'_>, semantic: &Semantic<'_>) -> Option { + let reference_id = id.reference_id.get()?; + semantic.scoping().get_reference(reference_id).symbol_id() +} + /// Walk top-level statements and index every variable binding identifier -/// they declare, returning two complementary maps: -/// * `binding_to_stmt`: identifier name → containing statement's `start`. Used -/// to look up hoist info from an identifier reference. +/// they declare, returning three complementary maps: +/// * `symbol_to_stmt`: binding `SymbolId` → containing statement's `start`. /// * `stmt_info`: statement `start` → end/delete bounds and the union of -/// identifier references across every declarator's initializer. Used to -/// drive transitive hoisting and the topological sort. +/// symbol references across every declarator's initializer. Used to drive +/// transitive hoisting and the topological sort. +/// * `fn_body_symbol_refs`: top-level function `SymbolId` → symbols +/// referenced in its body. Used to chase TDZ-relevant identifiers across +/// function-call boundaries. /// /// Only `VariableDeclaration` (const/let/var) and the `export` form of it are /// considered: @@ -408,11 +447,12 @@ fn has_angular_decorator(class: &Class<'_>) -> bool { fn collect_top_level_bindings<'a>( program: &Program<'a>, source: &str, -) -> (HashMap<&'a str, u32>, HashMap>, HashMap<&'a str, HashSet<&'a str>>) { + semantic: &Semantic<'a>, +) -> (HashMap, HashMap, HashMap>) { let bytes = source.as_bytes(); - let mut binding_to_stmt: HashMap<&'a str, u32> = HashMap::new(); - let mut stmt_info: HashMap> = HashMap::new(); - let mut fn_body_idents: HashMap<&'a str, HashSet<&'a str>> = HashMap::new(); + let mut symbol_to_stmt: HashMap = HashMap::new(); + let mut stmt_info: HashMap = HashMap::new(); + let mut fn_body_symbol_refs: HashMap> = HashMap::new(); for stmt in &program.body { let var_decl = match stmt { @@ -429,17 +469,19 @@ fn collect_top_level_bindings<'a>( let mut info = StmtInfo { stmt_end: span.end, delete_end: end_with_trailing_newline(span.end, bytes), - init_idents: HashSet::new(), + init_symbols: HashSet::new(), }; for declarator in &decl.declarations { if let BindingPattern::BindingIdentifier(id) = &declarator.id { - binding_to_stmt.insert(id.name.as_str(), stmt_start); + if let Some(symbol_id) = id.symbol_id.get() { + symbol_to_stmt.insert(symbol_id, stmt_start); + } } // Destructuring patterns are deliberately ignored — see // collect_top_level_bindings docstring above. if let Some(init) = &declarator.init { - collect_expr_idents(init, &mut info.init_idents); + collect_expr_symbols(init, semantic, &mut info.init_symbols); } } stmt_info.insert(stmt_start, info); @@ -465,28 +507,33 @@ fn collect_top_level_bindings<'a>( }; if let Some(func) = func { if let (Some(id), Some(body)) = (&func.id, &func.body) { - let mut refs: HashSet<&'a str> = HashSet::new(); - let mut visitor = FunctionBodyIdentVisitor { out: &mut refs }; + let Some(fn_symbol) = id.symbol_id.get() else { continue }; + let mut refs: HashSet = HashSet::new(); + let mut visitor = FunctionBodyIdentVisitor { semantic, out: &mut refs }; visitor.visit_function_body(body); - fn_body_idents.insert(id.name.as_str(), refs); + fn_body_symbol_refs.insert(fn_symbol, refs); } } } - (binding_to_stmt, stmt_info, fn_body_idents) + (symbol_to_stmt, stmt_info, fn_body_symbol_refs) } /// AST visitor that collects every `IdentifierReference` reachable from a -/// function body, with the same "lazy bodies are opaque" rule the existing -/// expression walker uses: nested function/arrow expressions inside the body -/// don't run when the outer function is called, so their bodies are skipped. +/// function body, resolving each to a `SymbolId` via the semantic model, with +/// the same "lazy bodies are opaque" rule the existing expression walker +/// uses: nested function/arrow expressions inside the body don't run when +/// the outer function is called, so their bodies are skipped. struct FunctionBodyIdentVisitor<'a, 'b> { - out: &'b mut HashSet<&'a str>, + semantic: &'b Semantic<'a>, + out: &'b mut HashSet, } impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_identifier_reference(&mut self, it: &IdentifierReference<'a>) { - self.out.insert(it.name.as_str()); + if let Some(symbol) = resolve_symbol(it, self.semantic) { + self.out.insert(symbol); + } } // Nested function/arrow expressions only execute when *they* are called, @@ -523,20 +570,24 @@ fn end_with_trailing_newline(end: u32, bytes: &[u8]) -> u32 { pos as u32 } -/// Collect identifiers referenced inside the decorator argument expressions. +/// Collect symbols referenced inside the decorator argument expressions. /// Only the decorator's call arguments (i.e. the metadata object) are walked. -fn collect_decorator_idents<'a>(decorator: &Decorator<'a>, out: &mut HashSet<&'a str>) { +fn collect_decorator_symbols<'a>( + decorator: &Decorator<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, +) { let Expression::CallExpression(call) = &decorator.expression else { return; }; for arg in &call.arguments { match arg { Argument::SpreadElement(spread) => { - collect_expr_idents(&spread.argument, out); + collect_expr_symbols(&spread.argument, semantic, out); } other => { if let Some(expr) = argument_to_expression(other) { - collect_expr_idents(expr, out); + collect_expr_symbols(expr, semantic, out); } } } @@ -547,8 +598,9 @@ fn argument_to_expression<'a, 'src>(arg: &'src Argument<'a>) -> Option<&'src Exp if arg.is_expression() { Some(arg.to_expression()) } else { None } } -/// Walk an expression collecting every bare identifier reference. Walks -/// through arrays, object literals, spreads, conditionals, calls, etc. Skips: +/// Walk an expression collecting every bare identifier reference (resolved +/// to a `SymbolId` via the semantic model). Walks through arrays, object +/// literals, spreads, conditionals, calls, etc. Skips: /// /// * The body of any function/arrow expression — references inside a factory /// like `useFactory: () => new Service(DEP)` only fire when the factory is @@ -559,15 +611,21 @@ fn argument_to_expression<'a, 'src>(arg: &'src Argument<'a>) -> Option<&'src Exp /// * Member expression property names — `Foo.BAR` references `Foo`; `BAR` is /// a property access, not a bare identifier. /// * TypeScript type annotations and assertions. -fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { +fn collect_expr_symbols<'a>( + expr: &Expression<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, +) { use Expression as E; match expr { E::Identifier(id) => { - out.insert(id.name.as_str()); + if let Some(symbol) = resolve_symbol(id, semantic) { + out.insert(symbol); + } } E::ArrayExpression(arr) => { for el in &arr.elements { - collect_array_element_idents(el, out); + collect_array_element_symbols(el, semantic, out); } } E::ObjectExpression(obj) => { @@ -578,25 +636,25 @@ fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { // key identifier; static keys don't. if p.computed { if let Some(key_expr) = p.key.as_expression() { - collect_expr_idents(key_expr, out); + collect_expr_symbols(key_expr, semantic, out); } } - collect_expr_idents(&p.value, out); + collect_expr_symbols(&p.value, semantic, out); } ObjectPropertyKind::SpreadProperty(spread) => { - collect_expr_idents(&spread.argument, out); + collect_expr_symbols(&spread.argument, semantic, out); } } } } E::CallExpression(call) => { - collect_expr_idents(&call.callee, out); + collect_expr_symbols(&call.callee, semantic, out); for arg in &call.arguments { match arg { - Argument::SpreadElement(s) => collect_expr_idents(&s.argument, out), + Argument::SpreadElement(s) => collect_expr_symbols(&s.argument, semantic, out), other => { if let Some(e) = argument_to_expression(other) { - collect_expr_idents(e, out); + collect_expr_symbols(e, semantic, out); } } } @@ -605,74 +663,74 @@ fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { // is erased; they're irrelevant at runtime. } E::NewExpression(new) => { - collect_expr_idents(&new.callee, out); + collect_expr_symbols(&new.callee, semantic, out); for arg in &new.arguments { match arg { - Argument::SpreadElement(s) => collect_expr_idents(&s.argument, out), + Argument::SpreadElement(s) => collect_expr_symbols(&s.argument, semantic, out), other => { if let Some(e) = argument_to_expression(other) { - collect_expr_idents(e, out); + collect_expr_symbols(e, semantic, out); } } } } } E::ConditionalExpression(cond) => { - collect_expr_idents(&cond.test, out); - collect_expr_idents(&cond.consequent, out); - collect_expr_idents(&cond.alternate, out); + collect_expr_symbols(&cond.test, semantic, out); + collect_expr_symbols(&cond.consequent, semantic, out); + collect_expr_symbols(&cond.alternate, semantic, out); } E::LogicalExpression(log) => { - collect_expr_idents(&log.left, out); - collect_expr_idents(&log.right, out); + collect_expr_symbols(&log.left, semantic, out); + collect_expr_symbols(&log.right, semantic, out); } E::BinaryExpression(bin) => { - collect_expr_idents(&bin.left, out); - collect_expr_idents(&bin.right, out); + collect_expr_symbols(&bin.left, semantic, out); + collect_expr_symbols(&bin.right, semantic, out); } E::UnaryExpression(un) => { - collect_expr_idents(&un.argument, out); + collect_expr_symbols(&un.argument, semantic, out); } E::SequenceExpression(seq) => { for e in &seq.expressions { - collect_expr_idents(e, out); + collect_expr_symbols(e, semantic, out); } } E::ParenthesizedExpression(p) => { - collect_expr_idents(&p.expression, out); + collect_expr_symbols(&p.expression, semantic, out); } E::TemplateLiteral(tpl) => { for e in &tpl.expressions { - collect_expr_idents(e, out); + collect_expr_symbols(e, semantic, out); } } E::TaggedTemplateExpression(tagged) => { - collect_expr_idents(&tagged.tag, out); + collect_expr_symbols(&tagged.tag, semantic, out); for e in &tagged.quasi.expressions { - collect_expr_idents(e, out); + collect_expr_symbols(e, semantic, out); } } E::StaticMemberExpression(member) => { - collect_expr_idents(&member.object, out); + collect_expr_symbols(&member.object, semantic, out); } E::ComputedMemberExpression(member) => { - collect_expr_idents(&member.object, out); - collect_expr_idents(&member.expression, out); + collect_expr_symbols(&member.object, semantic, out); + collect_expr_symbols(&member.expression, semantic, out); } E::PrivateFieldExpression(member) => { - collect_expr_idents(&member.object, out); + collect_expr_symbols(&member.object, semantic, out); } - E::AwaitExpression(a) => collect_expr_idents(&a.argument, out), + E::AwaitExpression(a) => collect_expr_symbols(&a.argument, semantic, out), E::YieldExpression(y) => { if let Some(arg) = &y.argument { - collect_expr_idents(arg, out); + collect_expr_symbols(arg, semantic, out); } } - E::TSAsExpression(ts) => collect_expr_idents(&ts.expression, out), - E::TSSatisfiesExpression(ts) => collect_expr_idents(&ts.expression, out), - E::TSNonNullExpression(ts) => collect_expr_idents(&ts.expression, out), - E::TSTypeAssertion(ts) => collect_expr_idents(&ts.expression, out), - E::TSInstantiationExpression(ts) => collect_expr_idents(&ts.expression, out), + E::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), + E::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), + E::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), + E::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out), + E::TSInstantiationExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), // Class expressions inside metadata are exceedingly rare and their // bodies aren't eagerly evaluated; treat them as opaque. E::ClassExpression(_) => {} @@ -684,15 +742,19 @@ fn collect_expr_idents<'a>(expr: &Expression<'a>, out: &mut HashSet<&'a str>) { } } -fn collect_array_element_idents<'a>(el: &ArrayExpressionElement<'a>, out: &mut HashSet<&'a str>) { +fn collect_array_element_symbols<'a>( + el: &ArrayExpressionElement<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, +) { match el { ArrayExpressionElement::SpreadElement(spread) => { - collect_expr_idents(&spread.argument, out); + collect_expr_symbols(&spread.argument, semantic, out); } ArrayExpressionElement::Elision(_) => {} other => { if let Some(expr) = array_element_to_expression(other) { - collect_expr_idents(expr, out); + collect_expr_symbols(expr, semantic, out); } } } diff --git a/crates/oxc_angular_compiler/src/component/transform.rs b/crates/oxc_angular_compiler/src/component/transform.rs index 57e670808..767653af4 100644 --- a/crates/oxc_angular_compiler/src/component/transform.rs +++ b/crates/oxc_angular_compiler/src/component/transform.rs @@ -27,7 +27,7 @@ use super::decorator::{ extract_component_metadata, find_component_decorator, find_component_decorator_span, }; use super::definition::{const_value_to_expression, generate_component_definitions}; -use super::hoist::collect_hoist_edits; +use super::hoist::{collect_hoist_edits, program_has_angular_decorated_class}; use super::import_elision::{ImportElisionAnalyzer, import_elision_edits}; use super::metadata::{AngularVersion, ComponentMetadata, HostMetadata}; use super::namespace_registry::NamespaceRegistry; @@ -2573,7 +2573,27 @@ pub fn transform_angular_file( // emitted `ɵcmp` static field's `ɵɵProvidersFeature` would evaluate the // reference at class-definition time and throw `ReferenceError`. See // issue #287. - edits.extend(collect_hoist_edits(&parser_ret.program, source)); + // + // The hoister resolves identifier references through `oxc_semantic` so + // a nested-scope shadow of a top-level name can't be mistaken for the + // top-level binding itself. + // + // Gate the Semantic build behind a cheap top-level scan: a real Angular + // codebase contains plenty of plain `.ts` helpers, type-only modules, and + // services without `@Injectable` that we route through this function. For + // those, building a full symbol table just to discover there's nothing to + // hoist is pure overhead. + if program_has_angular_decorated_class(&parser_ret.program) { + // Semantic builder errors (redeclarations, etc.) are intentionally + // dropped: the parser already captured syntax errors into + // `result.diagnostics` upstream, and Semantic-level diagnostics here + // aren't actionable for the hoist pass — we treat the input as + // best-effort and rely on the host build to surface genuine errors. + // The JIT path (see ~line 1380) follows the same convention. + let hoist_semantic = + oxc_semantic::SemanticBuilder::new().build(&parser_ret.program).semantic; + edits.extend(collect_hoist_edits(&parser_ret.program, source, &hoist_semantic)); + } // Apply all edits in one pass if options.sourcemap { From 4d4a3fc9f0991e90b792eb564b589311e276be9a Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 20:06:53 +0800 Subject: [PATCH 06/21] fix(hoist): lazy fn-value, optional chain, destructured bindings Three Codex review findings on PR #302: 1. Lazy fn-value over-hoisting: when a top-level function is referenced from decorator metadata as a *value* (e.g. `useFactory: makeFactory`) the BFS still chased its body refs and hoisted them above the class. Since Angular invokes such factories lazily, this could introduce a fresh TDZ that didn't exist in the source. Introduce an `eagerly_called` closure (direct callees in any top-level initializer or decorator metadata, transitively expanded through `fn_body_called_symbols`) and gate the BFS body-chase branch plus `expand_through_functions` on it. Regression test: `component_provider_useFactory_function_value_does_not_hoist_body_deps`. 2. `Expression::ChainExpression` swallowed by catch-all: optional chaining like `{ provide: TOKEN?.id, useValue: 1 }` recorded no reference to `TOKEN`. Add an explicit arm that dispatches each `ChainElement` variant (`CallExpression`, member variants, `TSNonNullExpression`) to the matching collection logic, also recording the inner call's direct callee for `f?.()`. Regression test: `component_provider_optional_chain_token_is_hoisted`. 3. Destructured top-level bindings not indexed: `collect_top_level_bindings` only handled `BindingPattern::BindingIdentifier`, so `const { TOKEN } = TOKENS;` never made it into `symbol_to_stmt`. Add `for_each_binding_identifier` recursive walker covering `ObjectPattern`/`ArrayPattern`/`AssignmentPattern` (plus rest elements and nested patterns). Regression test: `component_provider_destructured_top_level_token_is_hoisted`. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 449 ++++++++++++++---- .../tests/integration_test.rs | 124 +++++ 2 files changed, 486 insertions(+), 87 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 0d08ae4e9..6a2ec6574 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -35,7 +35,7 @@ use std::collections::{HashMap, HashSet}; use oxc_ast::ast::{ - Argument, ArrayExpressionElement, BindingPattern, Class, Declaration, Decorator, + Argument, ArrayExpressionElement, BindingPattern, ChainElement, Class, Declaration, Decorator, ExportDefaultDeclarationKind, Expression, IdentifierReference, ObjectPropertyKind, Program, Statement, }; @@ -58,6 +58,15 @@ struct StmtInfo { /// Symbols referenced inside any declarator's initializer in this /// statement. Used to drive transitive hoisting. init_symbols: HashSet, + /// Subset of `init_symbols` that appears as a *direct callee* (the + /// callee of `CallExpression` / `NewExpression`, including the inner + /// call of an optional `f?.()`) somewhere in the initializer. Used to + /// seed the "eagerly called" closure: if `f` is in this set and `f` is + /// a top-level function, the function body's references fire at module + /// load when this statement evaluates. Symbols referenced but never + /// called (e.g. `useFactory: f` — Angular's injector invokes `f` lazily) + /// do NOT belong here. + init_called_symbols: HashSet, } /// One statement scheduled for hoisting, keyed by its `stmt_start`. Multiple @@ -90,45 +99,69 @@ pub fn collect_hoist_edits<'a>( // Step 1: index top-level bindings (keyed by SymbolId). // - `symbol_to_stmt`: binding SymbolId → containing statement's `start`. // - `stmt_info`: statement start → end/delete bounds and the union of - // symbol references across the statement's initializers. + // symbol references across the statement's initializers, plus the + // subset that appears as a *direct callee* in the initializer. // - `fn_body_symbol_refs`: top-level function SymbolId → set of symbol // references in its body. Top-level function *declarations* are // JS-hoisted so they never need physical hoisting, but if a hoisted // initializer *calls* them (`const PROVIDERS = makeProviders()`), the // function body runs at module load and any later-declared binding it // touches still TDZ-throws. The BFS consults this map to chase - // identifiers through function-call boundaries. - let (symbol_to_stmt, stmt_info, fn_body_symbol_refs) = + // identifiers through function-call boundaries — but only when the + // function is actually invoked, not merely referenced as a value. + // - `fn_body_called_symbols`: top-level function SymbolId → set of + // symbols directly called in its body. Seeds the transitive + // "eagerly called" closure. + let (symbol_to_stmt, stmt_info, fn_body_symbol_refs, fn_body_called_symbols) = collect_top_level_bindings(program, source, semantic); if symbol_to_stmt.is_empty() && fn_body_symbol_refs.is_empty() { return Vec::new(); } - // Step 2: for every Angular-decorated class, BFS through binding - // initializers starting from the symbols directly referenced in the - // decorator metadata. The plan is keyed by `stmt_start` (not symbol) so - // multi-declarator statements collapse into a single entry, and the - // `insert_at` is updated to the MIN across all referencers — that guards - // against the nondeterministic dedup bug where, with `const A = 1, B = 2;` - // referenced by two different classes, the surviving entry's `insert_at` - // depended on HashMap iteration order and could land *after* the earlier - // class. See PR #302 review. - let mut plan: HashMap = HashMap::new(); - + // Step 2a: gather per-class decorator-metadata symbols (both the full + // reference set and the "direct callee" subset). The direct-callee + // subsets across all classes plus every top-level initializer's + // direct-callee subset seed the `eagerly_called` closure, expanded + // through `fn_body_called_symbols` to fixed point. + // + // The closure represents "every top-level function whose body runs at + // module load". In the BFS the function-body-chasing branch fires + // only for symbols in this set — otherwise a function stored as a + // value (`useFactory: makeFactory`) would pull its body's references + // into the hoist plan and introduce a fresh TDZ that didn't exist + // before. See PR #302 review (Codex). + let mut classes: Vec<(&Class<'a>, u32, HashSet)> = Vec::new(); + let mut decorator_called: HashSet = HashSet::new(); for stmt in &program.body { let Some((class, stmt_start_pos)) = class_of(stmt) else { continue }; if !has_angular_decorator(class) { continue; } - let mut direct: HashSet = HashSet::new(); for decorator in &class.decorators { - collect_decorator_symbols(decorator, semantic, &mut direct); + collect_decorator_symbols(decorator, semantic, &mut direct, &mut decorator_called); } if direct.is_empty() { continue; } + classes.push((class, stmt_start_pos, direct)); + } + let eagerly_called = + compute_eagerly_called(&stmt_info, &decorator_called, &fn_body_called_symbols); + + // Step 2b: for every Angular-decorated class, BFS through binding + // initializers starting from the symbols directly referenced in the + // decorator metadata. The plan is keyed by `stmt_start` (not symbol) so + // multi-declarator statements collapse into a single entry, and the + // `insert_at` is updated to the MIN across all referencers — that guards + // against the nondeterministic dedup bug where, with `const A = 1, B = 2;` + // referenced by two different classes, the surviving entry's `insert_at` + // depended on HashMap iteration order and could land *after* the earlier + // class. See PR #302 review. + let mut plan: HashMap = HashMap::new(); + + for (class, stmt_start_pos, direct) in classes { let class_body_end = class.body.span.end; let effective_start = effective_class_start(class, stmt_start_pos); @@ -176,16 +209,19 @@ pub fn collect_hoist_edits<'a>( worklist.push(s); } } - } else if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { - // The symbol resolves to a top-level function declaration. + } else if eagerly_called.contains(&symbol) { + // The symbol resolves to a top-level function declaration + // that is *actually called* (transitively) at module load. // Don't hoist the function itself (JS already hoists fn - // decls), but if its body references later bindings, those - // references fire whenever the function is called — and a - // hoisted initializer *will* call it at module load. Chase - // them through the worklist. See PR #302 review (Codex). - for &s in body_refs { - if !visited.contains(&s) { - worklist.push(s); + // decls), but its body's identifier reads fire whenever + // it runs — and "eagerly_called" guarantees it does run + // at module load. Chase those references. See PR #302 + // review (Codex). + if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { + for &s in body_refs { + if !visited.contains(&s) { + worklist.push(s); + } } } } @@ -200,7 +236,13 @@ pub fn collect_hoist_edits<'a>( // emitted *before* their dependents in the hoisted prelude. Within a // single bucket (same `insert_at`), this guarantees that e.g. `const // TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`. - let order = topological_order(&plan, &symbol_to_stmt, &stmt_info, &fn_body_symbol_refs); + let order = topological_order( + &plan, + &symbol_to_stmt, + &stmt_info, + &fn_body_symbol_refs, + &eagerly_called, + ); // Step 4: emit edits. Group by `insert_at` so multiple statements headed // to the same class become a single insert edit whose text is the @@ -249,6 +291,7 @@ fn topological_order( symbol_to_stmt: &HashMap, stmt_info: &HashMap, fn_body_symbol_refs: &HashMap>, + eagerly_called: &HashSet, ) -> Vec { let plan_starts: HashSet = plan.keys().copied().collect(); @@ -258,19 +301,20 @@ fn topological_order( // already TDZ-safe. // // The "effective init symbols" of a planned statement are the transitive - // closure of its direct `init_symbols` through `fn_body_symbol_refs`: if - // the initializer calls a function, the function body's identifier reads - // also count as references that fire when the hoisted statement - // evaluates. So `const PROVIDERS = makeProviders()` with `function - // makeProviders() { return [{ provide: TOKEN }]; }` must end up after - // `const TOKEN` in the hoisted prelude. See PR #302 review (Codex). + // closure of its direct `init_symbols` through `fn_body_symbol_refs`, + // **restricted to functions in `eagerly_called`**. If the initializer + // calls a function (directly or transitively), the function body's + // identifier reads count as references that fire when the hoisted + // statement evaluates. Functions only stored as values are NOT expanded + // — their bodies don't run at module load. See PR #302 review (Codex). let mut deps: HashMap> = HashMap::with_capacity(plan_starts.len()); for &start in &plan_starts { let Some(info) = stmt_info.get(&start) else { deps.insert(start, Vec::new()); continue; }; - let effective = expand_through_functions(&info.init_symbols, fn_body_symbol_refs); + let effective = + expand_through_functions(&info.init_symbols, fn_body_symbol_refs, eagerly_called); let mut edges: Vec = effective .iter() .filter_map(|s| symbol_to_stmt.get(s)) @@ -324,15 +368,15 @@ fn topological_order( } /// Take a set of symbol references and expand it transitively through -/// `fn_body_symbol_refs`: every time we encounter a symbol that resolves to a -/// top-level function, we add the function body's own symbol references -/// (and recurse). The result is the union of every symbol that the -/// initial set "reaches" via function calls — what would actually fire if -/// you ran the initializer at module load. A `seen` set guards against -/// mutual recursion between top-level functions. +/// `fn_body_symbol_refs`, but only across functions that are in +/// `eagerly_called`. A function only stored as a value (never invoked at +/// module load) doesn't run, so its body's reads must not count toward the +/// hoist plan — chasing them would invent a fresh TDZ. The `seen` set guards +/// against mutual recursion between top-level functions. fn expand_through_functions( seed: &HashSet, fn_body_symbol_refs: &HashMap>, + eagerly_called: &HashSet, ) -> HashSet { let mut out: HashSet = HashSet::new(); let mut worklist: Vec = seed.iter().copied().collect(); @@ -342,6 +386,9 @@ fn expand_through_functions( continue; } out.insert(symbol); + if !eagerly_called.contains(&symbol) { + continue; + } if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { for &s in body_refs { if !seen.contains(&s) { @@ -353,6 +400,53 @@ fn expand_through_functions( out } +/// Compute the transitive closure of "top-level functions that actually run +/// at module load". Seeded with every direct callee in either: +/// * a top-level `VariableDeclaration` initializer (`stmt_info[*].init_called_symbols`) +/// * Angular decorator metadata on any class (`decorator_called`). +/// +/// Expanded through `fn_body_called_symbols`: if `f` is in the set and `f` +/// directly calls `g`, then `g` is too. Fixed-point — runs until the worklist +/// drains. A function stored as a value (referenced but not called) is NOT +/// added. +/// +/// Used by both the BFS (to gate the function-body-chasing branch) and the +/// topological sort (to gate `expand_through_functions`). Without this, a +/// `useFactory: makeFactory` in providers — where Angular invokes +/// `makeFactory` lazily at injection time, NOT at class-definition time — +/// would still pull in `makeFactory`'s body refs and hoist them above the +/// class, sometimes inventing a new TDZ. +fn compute_eagerly_called( + stmt_info: &HashMap, + decorator_called: &HashSet, + fn_body_called_symbols: &HashMap>, +) -> HashSet { + let mut out: HashSet = HashSet::new(); + let mut worklist: Vec = Vec::new(); + for info in stmt_info.values() { + for &s in &info.init_called_symbols { + if out.insert(s) { + worklist.push(s); + } + } + } + for &s in decorator_called { + if out.insert(s) { + worklist.push(s); + } + } + while let Some(symbol) = worklist.pop() { + if let Some(calls) = fn_body_called_symbols.get(&symbol) { + for &s in calls { + if out.insert(s) { + worklist.push(s); + } + } + } + } + out +} + /// Compute the effective start of a class statement, ignoring trailing /// whitespace but spanning any leading decorators that will remain in the /// source. We don't have access to the in-progress `decorator_spans_to_remove` @@ -426,7 +520,7 @@ fn resolve_symbol(id: &IdentifierReference<'_>, semantic: &Semantic<'_>) -> Opti } /// Walk top-level statements and index every variable binding identifier -/// they declare, returning three complementary maps: +/// they declare, returning four complementary maps: /// * `symbol_to_stmt`: binding `SymbolId` → containing statement's `start`. /// * `stmt_info`: statement `start` → end/delete bounds and the union of /// symbol references across every declarator's initializer. Used to drive @@ -434,6 +528,10 @@ fn resolve_symbol(id: &IdentifierReference<'_>, semantic: &Semantic<'_>) -> Opti /// * `fn_body_symbol_refs`: top-level function `SymbolId` → symbols /// referenced in its body. Used to chase TDZ-relevant identifiers across /// function-call boundaries. +/// * `fn_body_called_symbols`: top-level function `SymbolId` → symbols of +/// functions/`new` targets directly invoked inside its body. Feeds +/// `compute_eagerly_called` so the BFS only chases bodies that are +/// eagerly reachable from the decorator metadata's call graph. /// /// Only `VariableDeclaration` (const/let/var) and the `export` form of it are /// considered: @@ -448,11 +546,17 @@ fn collect_top_level_bindings<'a>( program: &Program<'a>, source: &str, semantic: &Semantic<'a>, -) -> (HashMap, HashMap, HashMap>) { +) -> ( + HashMap, + HashMap, + HashMap>, + HashMap>, +) { let bytes = source.as_bytes(); let mut symbol_to_stmt: HashMap = HashMap::new(); let mut stmt_info: HashMap = HashMap::new(); let mut fn_body_symbol_refs: HashMap> = HashMap::new(); + let mut fn_body_called_symbols: HashMap> = HashMap::new(); for stmt in &program.body { let var_decl = match stmt { @@ -470,18 +574,29 @@ fn collect_top_level_bindings<'a>( stmt_end: span.end, delete_end: end_with_trailing_newline(span.end, bytes), init_symbols: HashSet::new(), + init_called_symbols: HashSet::new(), }; for declarator in &decl.declarations { - if let BindingPattern::BindingIdentifier(id) = &declarator.id { + // Walk the declarator's `BindingPattern` recursively so that + // destructuring forms (`const { TOKEN } = obj;`, `const [a, b] + // = arr;`, `const { a: { b } } = obj;`, …) also index every + // binding identifier they introduce. Without this, decorator + // metadata referencing such a binding never resolves to its + // declaring statement and the hoist is skipped. See PR #302 + // Codex review. + for_each_binding_identifier(&declarator.id, &mut |id| { if let Some(symbol_id) = id.symbol_id.get() { symbol_to_stmt.insert(symbol_id, stmt_start); } - } - // Destructuring patterns are deliberately ignored — see - // collect_top_level_bindings docstring above. + }); if let Some(init) = &declarator.init { - collect_expr_symbols(init, semantic, &mut info.init_symbols); + collect_expr_symbols( + init, + semantic, + &mut info.init_symbols, + &mut info.init_called_symbols, + ); } } stmt_info.insert(stmt_start, info); @@ -509,14 +624,59 @@ fn collect_top_level_bindings<'a>( if let (Some(id), Some(body)) = (&func.id, &func.body) { let Some(fn_symbol) = id.symbol_id.get() else { continue }; let mut refs: HashSet = HashSet::new(); - let mut visitor = FunctionBodyIdentVisitor { semantic, out: &mut refs }; + let mut called: HashSet = HashSet::new(); + let mut visitor = + FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; visitor.visit_function_body(body); fn_body_symbol_refs.insert(fn_symbol, refs); + fn_body_called_symbols.insert(fn_symbol, called); } } } - (symbol_to_stmt, stmt_info, fn_body_symbol_refs) + (symbol_to_stmt, stmt_info, fn_body_symbol_refs, fn_body_called_symbols) +} + +/// Walk a `BindingPattern` and invoke `f` for every nested `BindingIdentifier` +/// it introduces. Handles `BindingIdentifier` (the simple `const x` case), +/// `ObjectPattern` (each `BindingProperty`'s `value`, plus `rest`), +/// `ArrayPattern` (each element `Option`, plus `rest`), and +/// `AssignmentPattern` (the `left` pattern of `const { x = 1 } = obj`). +/// Default expressions on `AssignmentPattern` (e.g. `const { x = SOMETHING } +/// = obj`) are nested *inside* the binding pattern but are NOT walked here — +/// a deliberate conservative choice. In the rare case where a default +/// expression references a later-declared top-level binding, that binding +/// will not be transitively hoisted. Decorator metadata almost never uses +/// destructured names with such defaults, so this gap is accepted rather +/// than implemented. +fn for_each_binding_identifier<'a>( + pat: &BindingPattern<'a>, + f: &mut impl FnMut(&oxc_ast::ast::BindingIdentifier<'a>), +) { + match pat { + BindingPattern::BindingIdentifier(id) => f(id), + BindingPattern::ObjectPattern(obj) => { + for prop in &obj.properties { + for_each_binding_identifier(&prop.value, f); + } + if let Some(rest) = &obj.rest { + for_each_binding_identifier(&rest.argument, f); + } + } + BindingPattern::ArrayPattern(arr) => { + for el in &arr.elements { + if let Some(el) = el { + for_each_binding_identifier(el, f); + } + } + if let Some(rest) = &arr.rest { + for_each_binding_identifier(&rest.argument, f); + } + } + BindingPattern::AssignmentPattern(assign) => { + for_each_binding_identifier(&assign.left, f); + } + } } /// AST visitor that collects every `IdentifierReference` reachable from a @@ -524,9 +684,16 @@ fn collect_top_level_bindings<'a>( /// the same "lazy bodies are opaque" rule the existing expression walker /// uses: nested function/arrow expressions inside the body don't run when /// the outer function is called, so their bodies are skipped. +/// +/// `called` receives the subset of `out` that appears as a *direct callee* +/// of a `CallExpression` / `NewExpression` (including the inner call of a +/// `f?.()` chain) inside the body. Used to drive the "eagerly called" +/// closure: if function `f` is called at module load, then the symbols +/// `f`'s body directly calls fire too, transitively. struct FunctionBodyIdentVisitor<'a, 'b> { semantic: &'b Semantic<'a>, out: &'b mut HashSet, + called: &'b mut HashSet, } impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { @@ -536,6 +703,18 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { } } + fn visit_call_expression(&mut self, it: &oxc_ast::ast::CallExpression<'a>) { + record_direct_callee(&it.callee, self.semantic, self.called); + // Continue default traversal so identifier references inside callee + // and arguments still feed `self.out`. + oxc_ast_visit::walk::walk_call_expression(self, it); + } + + fn visit_new_expression(&mut self, it: &oxc_ast::ast::NewExpression<'a>) { + record_direct_callee(&it.callee, self.semantic, self.called); + oxc_ast_visit::walk::walk_new_expression(self, it); + } + // Nested function/arrow expressions only execute when *they* are called, // not when the enclosing function is. Don't descend. fn visit_function( @@ -572,10 +751,13 @@ fn end_with_trailing_newline(end: u32, bytes: &[u8]) -> u32 { /// Collect symbols referenced inside the decorator argument expressions. /// Only the decorator's call arguments (i.e. the metadata object) are walked. +/// `called` receives the subset of `out` that appears as a *direct callee* +/// of a call/new expression — used to drive the "eagerly called" closure. fn collect_decorator_symbols<'a>( decorator: &Decorator<'a>, semantic: &Semantic<'a>, out: &mut HashSet, + called: &mut HashSet, ) { let Expression::CallExpression(call) = &decorator.expression else { return; @@ -583,11 +765,11 @@ fn collect_decorator_symbols<'a>( for arg in &call.arguments { match arg { Argument::SpreadElement(spread) => { - collect_expr_symbols(&spread.argument, semantic, out); + collect_expr_symbols(&spread.argument, semantic, out, called); } other => { if let Some(expr) = argument_to_expression(other) { - collect_expr_symbols(expr, semantic, out); + collect_expr_symbols(expr, semantic, out, called); } } } @@ -615,6 +797,7 @@ fn collect_expr_symbols<'a>( expr: &Expression<'a>, semantic: &Semantic<'a>, out: &mut HashSet, + called: &mut HashSet, ) { use Expression as E; match expr { @@ -625,7 +808,7 @@ fn collect_expr_symbols<'a>( } E::ArrayExpression(arr) => { for el in &arr.elements { - collect_array_element_symbols(el, semantic, out); + collect_array_element_symbols(el, semantic, out, called); } } E::ObjectExpression(obj) => { @@ -636,25 +819,28 @@ fn collect_expr_symbols<'a>( // key identifier; static keys don't. if p.computed { if let Some(key_expr) = p.key.as_expression() { - collect_expr_symbols(key_expr, semantic, out); + collect_expr_symbols(key_expr, semantic, out, called); } } - collect_expr_symbols(&p.value, semantic, out); + collect_expr_symbols(&p.value, semantic, out, called); } ObjectPropertyKind::SpreadProperty(spread) => { - collect_expr_symbols(&spread.argument, semantic, out); + collect_expr_symbols(&spread.argument, semantic, out, called); } } } } E::CallExpression(call) => { - collect_expr_symbols(&call.callee, semantic, out); + record_direct_callee(&call.callee, semantic, called); + collect_expr_symbols(&call.callee, semantic, out, called); for arg in &call.arguments { match arg { - Argument::SpreadElement(s) => collect_expr_symbols(&s.argument, semantic, out), + Argument::SpreadElement(s) => { + collect_expr_symbols(&s.argument, semantic, out, called); + } other => { if let Some(e) = argument_to_expression(other) { - collect_expr_symbols(e, semantic, out); + collect_expr_symbols(e, semantic, out, called); } } } @@ -663,74 +849,87 @@ fn collect_expr_symbols<'a>( // is erased; they're irrelevant at runtime. } E::NewExpression(new) => { - collect_expr_symbols(&new.callee, semantic, out); + record_direct_callee(&new.callee, semantic, called); + collect_expr_symbols(&new.callee, semantic, out, called); for arg in &new.arguments { match arg { - Argument::SpreadElement(s) => collect_expr_symbols(&s.argument, semantic, out), + Argument::SpreadElement(s) => { + collect_expr_symbols(&s.argument, semantic, out, called); + } other => { if let Some(e) = argument_to_expression(other) { - collect_expr_symbols(e, semantic, out); + collect_expr_symbols(e, semantic, out, called); } } } } } E::ConditionalExpression(cond) => { - collect_expr_symbols(&cond.test, semantic, out); - collect_expr_symbols(&cond.consequent, semantic, out); - collect_expr_symbols(&cond.alternate, semantic, out); + collect_expr_symbols(&cond.test, semantic, out, called); + collect_expr_symbols(&cond.consequent, semantic, out, called); + collect_expr_symbols(&cond.alternate, semantic, out, called); } E::LogicalExpression(log) => { - collect_expr_symbols(&log.left, semantic, out); - collect_expr_symbols(&log.right, semantic, out); + collect_expr_symbols(&log.left, semantic, out, called); + collect_expr_symbols(&log.right, semantic, out, called); } E::BinaryExpression(bin) => { - collect_expr_symbols(&bin.left, semantic, out); - collect_expr_symbols(&bin.right, semantic, out); + collect_expr_symbols(&bin.left, semantic, out, called); + collect_expr_symbols(&bin.right, semantic, out, called); } E::UnaryExpression(un) => { - collect_expr_symbols(&un.argument, semantic, out); + collect_expr_symbols(&un.argument, semantic, out, called); } E::SequenceExpression(seq) => { for e in &seq.expressions { - collect_expr_symbols(e, semantic, out); + collect_expr_symbols(e, semantic, out, called); } } E::ParenthesizedExpression(p) => { - collect_expr_symbols(&p.expression, semantic, out); + collect_expr_symbols(&p.expression, semantic, out, called); } E::TemplateLiteral(tpl) => { for e in &tpl.expressions { - collect_expr_symbols(e, semantic, out); + collect_expr_symbols(e, semantic, out, called); } } E::TaggedTemplateExpression(tagged) => { - collect_expr_symbols(&tagged.tag, semantic, out); + record_direct_callee(&tagged.tag, semantic, called); + collect_expr_symbols(&tagged.tag, semantic, out, called); for e in &tagged.quasi.expressions { - collect_expr_symbols(e, semantic, out); + collect_expr_symbols(e, semantic, out, called); } } E::StaticMemberExpression(member) => { - collect_expr_symbols(&member.object, semantic, out); + collect_expr_symbols(&member.object, semantic, out, called); } E::ComputedMemberExpression(member) => { - collect_expr_symbols(&member.object, semantic, out); - collect_expr_symbols(&member.expression, semantic, out); + collect_expr_symbols(&member.object, semantic, out, called); + collect_expr_symbols(&member.expression, semantic, out, called); } E::PrivateFieldExpression(member) => { - collect_expr_symbols(&member.object, semantic, out); + collect_expr_symbols(&member.object, semantic, out, called); } - E::AwaitExpression(a) => collect_expr_symbols(&a.argument, semantic, out), + E::AwaitExpression(a) => collect_expr_symbols(&a.argument, semantic, out, called), E::YieldExpression(y) => { if let Some(arg) = &y.argument { - collect_expr_symbols(arg, semantic, out); + collect_expr_symbols(arg, semantic, out, called); } } - E::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), - E::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), - E::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), - E::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out), - E::TSInstantiationExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out), + E::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + E::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + E::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + E::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + E::TSInstantiationExpression(ts) => { + collect_expr_symbols(&ts.expression, semantic, out, called); + } + // Optional-chaining expressions (`TOKEN?.id`, `f?.()`). The inner + // `ChainElement` mirrors a small subset of `Expression`; dispatch + // each variant to the same logic the matching `Expression` arm + // uses so identifier references inside the chain are collected. + E::ChainExpression(chain) => { + collect_chain_element_symbols(&chain.expression, semantic, out, called); + } // Class expressions inside metadata are exceedingly rare and their // bodies aren't eagerly evaluated; treat them as opaque. E::ClassExpression(_) => {} @@ -742,19 +941,95 @@ fn collect_expr_symbols<'a>( } } +/// If `callee` is a *direct* identifier reference (peeling through +/// parentheses and TS type-only wrappers), record its symbol in `called`. +/// Member callees (`foo.bar()`) and other complex expressions are skipped +/// — only direct callees of `CallExpression`/`NewExpression` count as +/// eager invocations of a top-level function. +fn record_direct_callee<'a>( + callee: &Expression<'a>, + semantic: &Semantic<'a>, + called: &mut HashSet, +) { + use Expression as E; + let mut cur = callee; + loop { + match cur { + E::Identifier(id) => { + if let Some(symbol) = resolve_symbol(id, semantic) { + called.insert(symbol); + } + return; + } + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return, + } + } +} + +/// Mirror of [`collect_expr_symbols`] for the small set of node kinds that +/// can appear directly inside an `Expression::ChainExpression`. Without this, +/// optional-chaining (`TOKEN?.id`, `f?.()`) would be silently dropped by +/// the catch-all in `collect_expr_symbols` — and decorator metadata +/// referencing the chained binding wouldn't hoist it. +fn collect_chain_element_symbols<'a>( + el: &ChainElement<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + match el { + ChainElement::CallExpression(call) => { + record_direct_callee(&call.callee, semantic, called); + collect_expr_symbols(&call.callee, semantic, out, called); + for arg in &call.arguments { + match arg { + Argument::SpreadElement(s) => { + collect_expr_symbols(&s.argument, semantic, out, called); + } + other => { + if let Some(e) = argument_to_expression(other) { + collect_expr_symbols(e, semantic, out, called); + } + } + } + } + } + ChainElement::StaticMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + ChainElement::ComputedMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + collect_expr_symbols(&member.expression, semantic, out, called); + } + ChainElement::PrivateFieldExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + ChainElement::TSNonNullExpression(ts) => { + collect_expr_symbols(&ts.expression, semantic, out, called); + } + } +} + fn collect_array_element_symbols<'a>( el: &ArrayExpressionElement<'a>, semantic: &Semantic<'a>, out: &mut HashSet, + called: &mut HashSet, ) { match el { ArrayExpressionElement::SpreadElement(spread) => { - collect_expr_symbols(&spread.argument, semantic, out); + collect_expr_symbols(&spread.argument, semantic, out, called); } ArrayExpressionElement::Elision(_) => {} other => { if let Some(expr) = array_element_to_expression(other) { - collect_expr_symbols(expr, semantic, out); + collect_expr_symbols(expr, semantic, out, called); } } } diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 9372b924f..45dfb21ec 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10818,3 +10818,127 @@ export class TestComponent {}const TOKEN = 'tok';\n"; result.code ); } + +/// A top-level function referenced from decorator metadata as a *value* +/// (e.g. `useFactory: makeFactory`) is NOT called at class-definition time — +/// Angular's injector calls it later, when the provider is actually resolved. +/// So later-declared bindings reachable only through that function's body +/// must NOT be hoisted. Hoisting them would create a NEW TDZ that didn't +/// exist in the original source. +/// +/// Regression test for PR #302 Codex review: BFS function-body chasing +/// branch must only fire when the function is eagerly called. +#[test] +fn component_provider_useFactory_function_value_does_not_hoist_body_deps() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useFactory: makeFactory }] }) +class TestComponent {} +function makeFactory() { return TOKEN; } +const TOKEN = TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); + assert!( + token_pos > class_pos, + "`const TOKEN` must NOT be hoisted — `makeFactory` is stored as a value, not \ + called at module load. Hoisting `TOKEN` above the class would TDZ on \ + `TestComponent`. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); +} + +/// `Expression::ChainExpression` (optional chaining, `TOKEN?.id` or `f?.()`) +/// must contribute identifier references to the decorator-metadata symbol +/// scan, so that the referenced top-level binding gets hoisted. +/// +/// Regression test for PR #302 Codex review: the catch-all `_ => {}` arm +/// in `collect_expr_symbols` was silently dropping `ChainExpression`. +#[test] +fn component_provider_optional_chain_token_is_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN?.id, useValue: 1 }] }) +class TestComponent {} +const TOKEN = { id: 'tok' }; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "`const TOKEN` (referenced via `TOKEN?.id` in providers) must be \ + hoisted above the class. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Top-level destructuring patterns must be indexed: `const { TOKEN } = X;` +/// binds `TOKEN`, and decorator metadata referencing `TOKEN` must hoist that +/// declaration above the class. +/// +/// Regression test for PR #302 Codex review: `collect_top_level_bindings` +/// only handled `BindingPattern::BindingIdentifier`, ignoring object/array +/// destructuring patterns entirely. +#[test] +fn component_provider_destructured_top_level_token_is_hoisted() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const TOKENS = { TOKEN: 'tok' }; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 1 }] }) +class TestComponent {} +const { TOKEN } = TOKENS; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result.code.find("const { TOKEN }").unwrap_or_else(|| { + panic!("Expected `const {{ TOKEN }}` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "`const {{ TOKEN }}` (destructured from `TOKENS`) must be hoisted \ + above the class. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const { TOKEN }").count(), + 1, + "`const {{ TOKEN }}` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 62de526973c38f8d161ba6999596d0f31f194553 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 21:25:05 +0800 Subject: [PATCH 07/21] fix(hoist): per-class eager-call, IIFE metadata, multi-decl class-ref guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bot-review findings on the post-refactor hoist: 1. Multi-declarator over-hoist (Codex). When a `const TOKEN = 'tok', BACKREF = TestComponent;` declarator list is hoisted because TOKEN is referenced in decorator metadata, the peer `BACKREF = TestComponent` moves above the class and introduces a new TDZ. Added a safe-skip: refuse to hoist a statement when any of its initializer symbols resolves to a top-level class declared at position >= `effective_start`. Indexed via a new `collect_top_level_class_positions` helper. (Split-hoist is intentionally out of scope; the safe-skip preserves the "no regressions" invariant — the user's existing TDZ on the directly-referenced symbol stays, but we don't introduce a new one.) 2. IIFE bodies in decorator metadata missed (Codex). `providers: (() => [{ provide: TOKEN }])()` runs the arrow body eagerly at class init, but the lazy-bodies rule was treating every arrow/function expression body as opaque. New `walk_iife_callee_body` detects an immediately- invoked function/arrow callee (peeling parens + TS wrappers) and walks the body via `FunctionBodyIdentVisitor`; the wider lazy rule still applies elsewhere. Symmetric handling for `NewExpression` and `ChainElement::CallExpression`. 3. Global `eagerly_called` bleeds across classes (Cursor). Previously seeded from every top-level initializer's call sites + every decorator's call sites, so `const X = foo()` in one part of the module would mark `foo` as eagerly-called for every other class too — even classes that only reference `foo` as a value (`useFactory: foo`). Replaced the global precomputation with a per-class closure inside the BFS: seeded only from THIS class's `decorator_called`, extended incrementally as the BFS plans bindings (adding each planned init's `init_called_symbols` and re-closing through `fn_body_called_symbols`). Functions popped before they became eagerly_called are deferred and belatedly chased when promotion happens. Topological sort uses the union of per-class sets so dependency edges still see every fn whose body fires at module load for some hoisted statement. Regression tests for each finding: - component_provider_multi_declarator_with_class_self_ref_skips_hoist - component_provider_iife_metadata_hoists_inner_token - component_provider_useFactory_value_does_not_chase_global_eager_caller All 350 integration tests pass; fmt clean. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 314 ++++++++++++++---- .../tests/integration_test.rs | 140 ++++++++ 2 files changed, 387 insertions(+), 67 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 6a2ec6574..53976412b 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -118,38 +118,45 @@ pub fn collect_hoist_edits<'a>( return Vec::new(); } + // Index every top-level class declaration by its binding `SymbolId` → + // the class's `span.start`. Used by the BFS to refuse hoisting any + // statement whose initializer references a class that lives at-or-after + // the protect site — see the safe-skip guard near `plan.entry(...)`. + // Regression for Codex review #3310709319 on PR #302. + let top_level_class_positions = collect_top_level_class_positionss(program); + // Step 2a: gather per-class decorator-metadata symbols (both the full - // reference set and the "direct callee" subset). The direct-callee - // subsets across all classes plus every top-level initializer's - // direct-callee subset seed the `eagerly_called` closure, expanded - // through `fn_body_called_symbols` to fixed point. + // reference set and the "direct callee" subset). Each class gets its + // OWN `decorator_called` set; it seeds a *per-class* `eagerly_called` + // closure computed inside the BFS loop below. // - // The closure represents "every top-level function whose body runs at - // module load". In the BFS the function-body-chasing branch fires - // only for symbols in this set — otherwise a function stored as a - // value (`useFactory: makeFactory`) would pull its body's references - // into the hoist plan and introduce a fresh TDZ that didn't exist - // before. See PR #302 review (Codex). - let mut classes: Vec<(&Class<'a>, u32, HashSet)> = Vec::new(); - let mut decorator_called: HashSet = HashSet::new(); + // Why per-class (not global): the `eagerly_called` closure represents + // "every top-level function whose body runs at module load *because of + // this class's evaluation*". If `function foo() { return TOKEN; }` is + // called by `const X = foo()` elsewhere in the module but only + // referenced as a *value* in this class's metadata + // (`useFactory: foo`), foo's body does NOT fire when this class + // evaluates — and chasing TOKEN would invent a new TDZ on the class + // (when `TOKEN = TestComponent`). A global `eagerly_called` (seeded + // from every module-init call site) over-reaches across classes. See + // PR #302 review (Cursor #3310734461). + let mut classes: Vec<(&Class<'a>, u32, HashSet, HashSet)> = Vec::new(); for stmt in &program.body { let Some((class, stmt_start_pos)) = class_of(stmt) else { continue }; if !has_angular_decorator(class) { continue; } let mut direct: HashSet = HashSet::new(); + let mut decorator_called: HashSet = HashSet::new(); for decorator in &class.decorators { collect_decorator_symbols(decorator, semantic, &mut direct, &mut decorator_called); } if direct.is_empty() { continue; } - classes.push((class, stmt_start_pos, direct)); + classes.push((class, stmt_start_pos, direct, decorator_called)); } - let eagerly_called = - compute_eagerly_called(&stmt_info, &decorator_called, &fn_body_called_symbols); - // Step 2b: for every Angular-decorated class, BFS through binding // initializers starting from the symbols directly referenced in the // decorator metadata. The plan is keyed by `stmt_start` (not symbol) so @@ -160,13 +167,43 @@ pub fn collect_hoist_edits<'a>( // depended on HashMap iteration order and could land *after* the earlier // class. See PR #302 review. let mut plan: HashMap = HashMap::new(); + // Union of per-class `eagerly_called` sets for all classes that + // contributed to the plan. The topological sort's edge expansion + // (`expand_through_functions`) must see every function whose body + // could fire at module load *for some class in the plan*, so that + // dependency edges between planned statements are computed against + // the same eager-evaluation set used to plan them. + let mut combined_eagerly_called: HashSet = HashSet::new(); - for (class, stmt_start_pos, direct) in classes { + for (class, stmt_start_pos, direct, decorator_called) in classes { let class_body_end = class.body.span.end; let effective_start = effective_class_start(class, stmt_start_pos); + // Per-class `eagerly_called`, seeded only from THIS class's + // decorator metadata direct-callees and closed through + // `fn_body_called_symbols`. As the BFS visits new binding + // statements, we splice each statement's `init_called_symbols` + // into the set and re-close — so a hoisted binding whose + // initializer calls `g()` makes `g` (and everything `g` + // transitively calls) eagerly evaluated for the chase. + let mut eagerly_called: HashSet = HashSet::new(); + let mut call_worklist: Vec = Vec::new(); + for &s in &decorator_called { + if eagerly_called.insert(s) { + call_worklist.push(s); + } + } + close_eagerly_called(&mut eagerly_called, &mut call_worklist, &fn_body_called_symbols); + let mut worklist: Vec = direct.into_iter().collect(); let mut visited: HashSet = HashSet::new(); + // Track function symbols whose bodies we've already chased so we + // can belatedly chase them if they become eagerly_called *after* + // the BFS has already popped them. + let mut chased_fn_bodies: HashSet = HashSet::new(); + // Functions popped before they became eagerly_called — their body + // refs need to be re-pushed when they do. + let mut deferred_fns: HashSet = HashSet::new(); while let Some(symbol) = worklist.pop() { if !visited.insert(symbol) { continue; @@ -183,6 +220,33 @@ pub fn collect_hoist_edits<'a>( continue; } + // Safe-skip guard: if hoisting this statement would put any + // of its initializer's references to a top-level class + // ahead of that class's declaration, don't hoist. The + // user's existing TDZ on the directly-referenced binding + // (e.g. `TOKEN`) is *not* fixed here — but at least we + // don't *introduce* a new TDZ on the class. + // + // Concretely guards against the multi-declarator case + // `const TOKEN = 'tok', BACKREF = TestComponent;` where + // hoisting the whole statement above `class TestComponent` + // would leave `BACKREF = TestComponent` reading a not-yet- + // declared class. The conservative alternative — splitting + // the statement into per-declarator emissions — is out of + // scope; this safe-skip is the minimal "no regressions" + // defense. + // + // The check uses `>=`: a class declared at exactly + // `effective_start` is itself the class we're protecting + // — definitely blocking. Regression for Codex review + // #3310709319 on PR #302. + let stmt_references_later_class = info.init_symbols.iter().any(|s| { + top_level_class_positions.get(s).is_some_and(|&pos| pos >= effective_start) + }); + if stmt_references_later_class { + continue; + } + plan.entry(stmt_start) .and_modify(|p| { if effective_start < p.insert_at { @@ -195,6 +259,41 @@ pub fn collect_hoist_edits<'a>( insert_at: effective_start, }); + // The hoisted statement's initializer also runs at module + // load. Any function it calls (directly or transitively + // through `fn_body_called_symbols`) joins the eagerly- + // called set, so its body refs are chased too. Belatedly + // chase any function we already popped from the worklist + // *before* it became eagerly_called. + let mut newly_called: Vec = Vec::new(); + for &s in &info.init_called_symbols { + if eagerly_called.insert(s) { + newly_called.push(s); + } + } + close_eagerly_called( + &mut eagerly_called, + &mut newly_called, + &fn_body_called_symbols, + ); + // Belated chase: any fn we already saw but skipped because + // it wasn't eagerly_called at the time. Re-push its body + // refs onto the worklist. + let now_eager: Vec = + deferred_fns.iter().copied().filter(|s| eagerly_called.contains(s)).collect(); + for s in now_eager { + deferred_fns.remove(&s); + if chased_fn_bodies.insert(s) { + if let Some(body_refs) = fn_body_symbol_refs.get(&s) { + for &r in body_refs { + if !visited.contains(&r) { + worklist.push(r); + } + } + } + } + } + // Transitive hoist: if this binding's initializer references // another later-declared binding, that one must move above // the class too — otherwise the *hoisted* statement itself @@ -211,21 +310,33 @@ pub fn collect_hoist_edits<'a>( } } else if eagerly_called.contains(&symbol) { // The symbol resolves to a top-level function declaration - // that is *actually called* (transitively) at module load. - // Don't hoist the function itself (JS already hoists fn - // decls), but its body's identifier reads fire whenever - // it runs — and "eagerly_called" guarantees it does run - // at module load. Chase those references. See PR #302 - // review (Codex). - if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { - for &s in body_refs { - if !visited.contains(&s) { - worklist.push(s); + // that is *actually called* (transitively) at module load + // *for this class*. Don't hoist the function itself (JS + // already hoists fn decls), but its body's identifier + // reads fire whenever it runs. Chase those references. + // See PR #302 review (Codex). + if chased_fn_bodies.insert(symbol) { + if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { + for &s in body_refs { + if !visited.contains(&s) { + worklist.push(s); + } } } } + } else if fn_body_symbol_refs.contains_key(&symbol) { + // Top-level function not (yet) in eagerly_called for this + // class. Defer — if a later visit promotes it (because some + // planned binding's initializer calls it), we'll belatedly + // chase its body. See PR #302 review (Cursor). + deferred_fns.insert(symbol); } } + // Fold this class's eagerly_called into the combined set used by + // the topological sort below. + for s in eagerly_called { + combined_eagerly_called.insert(s); + } } if plan.is_empty() { @@ -241,7 +352,7 @@ pub fn collect_hoist_edits<'a>( &symbol_to_stmt, &stmt_info, &fn_body_symbol_refs, - &eagerly_called, + &combined_eagerly_called, ); // Step 4: emit edits. Group by `insert_at` so multiple statements headed @@ -400,51 +511,37 @@ fn expand_through_functions( out } -/// Compute the transitive closure of "top-level functions that actually run -/// at module load". Seeded with every direct callee in either: -/// * a top-level `VariableDeclaration` initializer (`stmt_info[*].init_called_symbols`) -/// * Angular decorator metadata on any class (`decorator_called`). +/// Close the `eagerly_called` set under `fn_body_called_symbols`: pop each +/// symbol from `worklist`, for every function it directly calls, insert +/// into `eagerly_called` and (if newly inserted) push onto the worklist. +/// Runs until the worklist drains. /// -/// Expanded through `fn_body_called_symbols`: if `f` is in the set and `f` -/// directly calls `g`, then `g` is too. Fixed-point — runs until the worklist -/// drains. A function stored as a value (referenced but not called) is NOT -/// added. +/// Used by the per-class BFS in [`collect_hoist_edits`]. The caller seeds +/// `eagerly_called` and `worklist` with that class's `decorator_called` +/// (plus, on incremental updates, the `init_called_symbols` of newly +/// planned bindings); we extend the closure to fixed point. A function +/// stored as a value (referenced but not called) is NOT added — that's +/// what prevents `useFactory: makeFactory` from invoking `makeFactory`'s +/// body refs at class-init time. /// -/// Used by both the BFS (to gate the function-body-chasing branch) and the -/// topological sort (to gate `expand_through_functions`). Without this, a -/// `useFactory: makeFactory` in providers — where Angular invokes -/// `makeFactory` lazily at injection time, NOT at class-definition time — -/// would still pull in `makeFactory`'s body refs and hoist them above the -/// class, sometimes inventing a new TDZ. -fn compute_eagerly_called( - stmt_info: &HashMap, - decorator_called: &HashSet, +/// Per-class scoping: the seed is THIS class's call graph only. A function +/// invoked elsewhere in the module but only referenced as a value in this +/// class's metadata does not enter this class's set. See PR #302 review +/// (Cursor #3310734461). +fn close_eagerly_called( + eagerly_called: &mut HashSet, + worklist: &mut Vec, fn_body_called_symbols: &HashMap>, -) -> HashSet { - let mut out: HashSet = HashSet::new(); - let mut worklist: Vec = Vec::new(); - for info in stmt_info.values() { - for &s in &info.init_called_symbols { - if out.insert(s) { - worklist.push(s); - } - } - } - for &s in decorator_called { - if out.insert(s) { - worklist.push(s); - } - } +) { while let Some(symbol) = worklist.pop() { if let Some(calls) = fn_body_called_symbols.get(&symbol) { for &s in calls { - if out.insert(s) { + if eagerly_called.insert(s) { worklist.push(s); } } } } - out } /// Compute the effective start of a class statement, ignoring trailing @@ -457,6 +554,26 @@ fn effective_class_start(class: &Class<'_>, stmt_start: u32) -> u32 { class.decorators.iter().map(|d| d.span.start).min().map_or(stmt_start, |d| d.min(stmt_start)) } +/// Index every top-level class declaration by its binding `SymbolId` → +/// the class's `span.start`. Covers plain `ClassDeclaration`, +/// `export class …`, and `export default class …` (only the named form — +/// anonymous default-exported classes have no `id`). +/// +/// Used by the BFS safe-skip guard in [`collect_hoist_edits`] to refuse +/// hoisting a statement whose initializer references a class declared +/// at-or-after the protect site, which would introduce a new TDZ on the +/// class itself. +fn collect_top_level_class_positionss(program: &Program<'_>) -> HashMap { + let mut out: HashMap = HashMap::new(); + for stmt in &program.body { + let Some((class, _)) = class_of(stmt) else { continue }; + let Some(id) = &class.id else { continue }; + let Some(symbol) = id.symbol_id.get() else { continue }; + out.insert(symbol, class.span.start); + } + out +} + /// Locate the inner class declaration of a top-level statement, returning the /// effective statement start (including any `export` keyword). fn class_of<'a, 'src>(stmt: &'src Statement<'a>) -> Option<(&'src Class<'a>, u32)> { @@ -530,8 +647,9 @@ fn resolve_symbol(id: &IdentifierReference<'_>, semantic: &Semantic<'_>) -> Opti /// function-call boundaries. /// * `fn_body_called_symbols`: top-level function `SymbolId` → symbols of /// functions/`new` targets directly invoked inside its body. Feeds -/// `compute_eagerly_called` so the BFS only chases bodies that are -/// eagerly reachable from the decorator metadata's call graph. +/// `close_eagerly_called` so each class's BFS only chases bodies that +/// are eagerly reachable from that class's decorator-metadata call +/// graph. /// /// Only `VariableDeclaration` (const/let/var) and the `export` form of it are /// considered: @@ -832,7 +950,17 @@ fn collect_expr_symbols<'a>( } E::CallExpression(call) => { record_direct_callee(&call.callee, semantic, called); - collect_expr_symbols(&call.callee, semantic, out, called); + // IIFE detection: `(() => ...)()` or `(function() { ... })()` — + // the function body runs *eagerly* at this call site, so its + // identifier reads contribute to the eager-evaluation set. The + // default `ArrowFunctionExpression` / `FunctionExpression` + // arms below treat bodies as lazy; for IIFEs we walk the body + // explicitly via `FunctionBodyIdentVisitor` instead. + // + // Regression for Codex review #3310709326 on PR #302. + if !walk_iife_callee_body(&call.callee, semantic, out, called) { + collect_expr_symbols(&call.callee, semantic, out, called); + } for arg in &call.arguments { match arg { Argument::SpreadElement(s) => { @@ -850,7 +978,11 @@ fn collect_expr_symbols<'a>( } E::NewExpression(new) => { record_direct_callee(&new.callee, semantic, called); - collect_expr_symbols(&new.callee, semantic, out, called); + // Symmetric IIFE handling for `new (function() { ... })()` — + // exceedingly rare but covered for consistency. + if !walk_iife_callee_body(&new.callee, semantic, out, called) { + collect_expr_symbols(&new.callee, semantic, out, called); + } for arg in &new.arguments { match arg { Argument::SpreadElement(s) => { @@ -972,6 +1104,52 @@ fn record_direct_callee<'a>( } } +/// If `callee` is the function expression of an IIFE +/// (`(() => …)()` or `(function() {…})()`, after peeling parens and TS +/// wrappers), walk its body eagerly via `FunctionBodyIdentVisitor` and +/// return `true`. The IIFE body runs at the call site, so its identifier +/// reads contribute to the eager-evaluation set — unlike a function stored +/// as a value, where the lazy-bodies rule in [`collect_expr_symbols`] is +/// correct. +/// +/// Returns `false` when the callee is not a function/arrow expression; the +/// caller then falls through to the normal `collect_expr_symbols` descent +/// (which is a no-op for these node kinds anyway, but still correct). +/// +/// Regression for Codex review #3310709326 on PR #302. +fn walk_iife_callee_body<'a>( + callee: &Expression<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) -> bool { + use Expression as E; + let mut cur = callee; + loop { + match cur { + E::ArrowFunctionExpression(arrow) => { + let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + visitor.visit_function_body(&arrow.body); + return true; + } + E::FunctionExpression(func) => { + if let Some(body) = &func.body { + let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + visitor.visit_function_body(body); + } + return true; + } + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return false, + } + } +} + /// Mirror of [`collect_expr_symbols`] for the small set of node kinds that /// can appear directly inside an `Expression::ChainExpression`. Without this, /// optional-chaining (`TOKEN?.id`, `f?.()`) would be silently dropped by @@ -986,7 +1164,9 @@ fn collect_chain_element_symbols<'a>( match el { ChainElement::CallExpression(call) => { record_direct_callee(&call.callee, semantic, called); - collect_expr_symbols(&call.callee, semantic, out, called); + if !walk_iife_callee_body(&call.callee, semantic, out, called) { + collect_expr_symbols(&call.callee, semantic, out, called); + } for arg in &call.arguments { match arg { Argument::SpreadElement(s) => { diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 45dfb21ec..d6982eb05 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -10942,3 +10942,143 @@ const { TOKEN } = TOKENS; result.code ); } + +/// A multi-declarator `const TOKEN = 'tok', BACKREF = TestComponent;` +/// statement is referenced (via `TOKEN`) in the decorator metadata. The +/// statement's *other* declarator initializer references `TestComponent` +/// itself, which lives below. Hoisting the whole statement above the class +/// would put `BACKREF = TestComponent` ahead of `class TestComponent`, +/// introducing a *new* TDZ on the class. +/// +/// The safe-skip guard refuses to hoist a statement when any of its +/// initializer symbols resolves to a top-level class declared at position +/// `>= effective_start` of the class being protected. +/// +/// Regression test for Codex review #3310709319 on PR #302. +#[test] +fn component_provider_multi_declarator_with_class_self_ref_skips_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 1 }] }) +class TestComponent {} +const TOKEN = 'tok', BACKREF = TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + // The original `const TOKEN = 'tok', BACKREF = TestComponent;` statement + // must remain in its original position (below the class). It must NOT be + // duplicated/hoisted above the class. + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` must not be duplicated (no hoist + keep). \ + Hoisting this multi-declarator statement would put \ + `BACKREF = TestComponent` ahead of the class.\nCode:\n{}", + result.code + ); + if let Some(token_pos) = result.code.find("const TOKEN") { + assert!( + token_pos > class_pos, + "`const TOKEN ... BACKREF = TestComponent` must NOT be hoisted \ + above the class — that would introduce a new TDZ on `TestComponent`. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + } +} + +/// `providers: (() => [{ provide: TOKEN, useValue: 1 }])()` — the IIFE +/// is invoked *eagerly* at class-definition time, so the references inside +/// the arrow body must be treated as eager. The general lazy-bodies rule +/// (skip arrow/function bodies) doesn't apply when the function is its own +/// callee. +/// +/// Regression test for Codex review #3310709326 on PR #302. +#[test] +fn component_provider_iife_metadata_hoists_inner_token() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: (() => [{ provide: TOKEN, useValue: 1 }])() }) +class TestComponent {} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + assert!( + token_pos < class_pos, + "`const TOKEN` (referenced inside an IIFE in `providers`) must be \ + hoisted above the class — the IIFE runs eagerly. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// `foo` is referenced as a value (`useFactory: foo`) in TestComponent's +/// decorator metadata — NOT called there. The global `eagerly_called` +/// closure adds `foo` because *another* top-level statement +/// (`const X = foo()`) calls it. The BFS for TestComponent must not chase +/// `foo`'s body just because some unrelated module-level statement happens +/// to invoke `foo`. Otherwise it pulls in `TOKEN` and hoists +/// `const TOKEN = TestComponent;` above the class → new TDZ on the class. +/// +/// Per-class eagerly_called scoping (seeded only from THIS class's +/// `decorator_called`) prevents this leak. +/// +/// Regression test for Cursor review #3310734461 on PR #302. +#[test] +fn component_provider_useFactory_value_does_not_chase_global_eager_caller() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +function foo() { return TOKEN; } +const X = foo(); +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useFactory: foo }] }) +class TestComponent {} +const TOKEN = TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + // `const TOKEN = TestComponent;` must NOT be hoisted above the class — + // that would put `TestComponent` reference ahead of its own declaration. + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` must not be duplicated.\nCode:\n{}", + result.code + ); + if let Some(token_pos) = result.code.find("const TOKEN") { + assert!( + token_pos > class_pos, + "`const TOKEN = TestComponent` must NOT be hoisted above the class \ + — that would introduce a new TDZ on `TestComponent`. \ + `foo` is referenced as a value in `useFactory: foo`, not called \ + by this class's decorator metadata. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + } +} From 311a41a046addbcb90f1992a1b84856e5fe6bf9e Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 21:46:54 +0800 Subject: [PATCH 08/21] fix(hoist): chase param default refs of eagerly called functions Parameter default expressions (`function f(x = TOKEN)` / `function f({ a = X } = {})`) evaluate at call time, before the body runs. When a hoisted initializer eagerly calls a top-level function, any later-declared binding read by a parameter default is just as TDZ-relevant as a body ref. The previous scan only walked the function body, so such defaults left their referenced bindings below the class and the hoisted call-site threw `ReferenceError: Cannot access ... before initialization`. Add a `for_each_pattern_default` helper that yields every nested `AssignmentPattern::right` inside a `BindingPattern`. Use it (alongside `FormalParameter::initializer` for top-level defaults) in `collect_top_level_bindings` for top-level function declarations and in `walk_iife_callee_body` for IIFE callee arrow/function expressions. The collected refs and direct callees join the same `fn_body_symbol_refs` / `fn_body_called_symbols` sets the body walk populates, so the existing BFS and eagerly-called closure pick them up transparently. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 89 ++++++++++++++++++- .../tests/integration_test.rs | 73 +++++++++++++++ 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 53976412b..099cd855a 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -36,8 +36,8 @@ use std::collections::{HashMap, HashSet}; use oxc_ast::ast::{ Argument, ArrayExpressionElement, BindingPattern, ChainElement, Class, Declaration, Decorator, - ExportDefaultDeclarationKind, Expression, IdentifierReference, ObjectPropertyKind, Program, - Statement, + ExportDefaultDeclarationKind, Expression, FormalParameters, IdentifierReference, + ObjectPropertyKind, Program, Statement, }; use oxc_ast_visit::Visit; use oxc_semantic::Semantic; @@ -746,6 +746,14 @@ fn collect_top_level_bindings<'a>( let mut visitor = FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; visitor.visit_function_body(body); + // Parameter defaults (`function f(x = TOKEN)`) evaluate at + // call time, before the body runs. If this function is + // eagerly called from a hoisted initializer, any later- + // declared binding read by a default is just as TDZ-relevant + // as a body ref. Walk the `FormalParameter::initializer` + // directly, plus any nested `AssignmentPattern::right` inside + // destructured params (`function f({ a = X } = {})`). + walk_param_defaults(&func.params, semantic, &mut refs, &mut called); fn_body_symbol_refs.insert(fn_symbol, refs); fn_body_called_symbols.insert(fn_symbol, called); } @@ -797,6 +805,78 @@ fn for_each_binding_identifier<'a>( } } +/// Walk a `BindingPattern` and invoke `f` for every default-value +/// `Expression` it carries — i.e. the `right` of every nested +/// `AssignmentPattern`. Used to chase TDZ-relevant identifier reads inside +/// parameter destructuring defaults like `function f({ a = X } = {})`, +/// where the inner `a = X` is an `AssignmentPattern` whose `right` is the +/// `X` default expression. +/// +/// `FormalParameter`'s top-level default (`function f(x = TOKEN)`) lives on +/// `FormalParameter::initializer`, NOT inside an `AssignmentPattern`, so +/// callers walk that separately and use this helper to cover the *nested* +/// pattern-default case only. +fn for_each_pattern_default<'a, 'src>( + pat: &'src BindingPattern<'a>, + f: &mut impl FnMut(&'src Expression<'a>), +) { + match pat { + BindingPattern::BindingIdentifier(_) => {} + BindingPattern::ObjectPattern(obj) => { + for prop in &obj.properties { + for_each_pattern_default(&prop.value, f); + } + if let Some(rest) = &obj.rest { + for_each_pattern_default(&rest.argument, f); + } + } + BindingPattern::ArrayPattern(arr) => { + for el in &arr.elements { + if let Some(el) = el { + for_each_pattern_default(el, f); + } + } + if let Some(rest) = &arr.rest { + for_each_pattern_default(&rest.argument, f); + } + } + BindingPattern::AssignmentPattern(assign) => { + f(&assign.right); + for_each_pattern_default(&assign.left, f); + } + } +} + +/// Walk every parameter default expression of a function/arrow's +/// `FormalParameters` and feed the refs / direct callees into the same +/// `out` / `called` sets the body visitor populates. Defaults are +/// evaluated at call time before the body runs, so for an eagerly-called +/// function they're as relevant as body refs. +/// +/// Two default shapes are covered: +/// * `param.initializer` — the top-level default for a `FormalParameter` +/// (e.g. the `= TOKEN` in `function f(token = TOKEN)`). +/// * `AssignmentPattern.right` nested anywhere inside the parameter's +/// `BindingPattern` (e.g. the inner `= X` in +/// `function f({ a = X } = {})`). +/// +/// See PR #302 Codex review (#3311099883). +fn walk_param_defaults<'a>( + params: &FormalParameters<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + for param in ¶ms.items { + if let Some(init) = ¶m.initializer { + collect_expr_symbols(init, semantic, out, called); + } + for_each_pattern_default(¶m.pattern, &mut |expr| { + collect_expr_symbols(expr, semantic, out, called); + }); + } +} + /// AST visitor that collects every `IdentifierReference` reachable from a /// function body, resolving each to a `SymbolId` via the semantic model, with /// the same "lazy bodies are opaque" rule the existing expression walker @@ -1130,6 +1210,10 @@ fn walk_iife_callee_body<'a>( E::ArrowFunctionExpression(arrow) => { let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; visitor.visit_function_body(&arrow.body); + // Parameter defaults evaluate at IIFE invocation time, before + // the body runs — symmetric with top-level function decls + // in `collect_top_level_bindings`. See PR #302 Codex P2. + walk_param_defaults(&arrow.params, semantic, out, called); return true; } E::FunctionExpression(func) => { @@ -1137,6 +1221,7 @@ fn walk_iife_callee_body<'a>( let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; visitor.visit_function_body(body); } + walk_param_defaults(&func.params, semantic, out, called); return true; } E::ParenthesizedExpression(p) => cur = &p.expression, diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index d6982eb05..76b8fa34b 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11082,3 +11082,76 @@ const TOKEN = TestComponent; ); } } + +/// When a hoisted initializer eagerly calls a top-level function whose +/// *parameter default expression* reads a later-declared binding, the +/// param-default reference is just as TDZ-relevant as a body reference: +/// defaults evaluate at call time, before the function body runs. +/// +/// Here, the BFS sees `PROVIDERS = makeProviders()`, marks `makeProviders` +/// as eagerly called, and must chase BOTH `makeProviders`'s body refs AND +/// the refs inside its parameter default `token = TOKEN`. Otherwise `TOKEN` +/// is left below the class and the hoisted `const PROVIDERS = makeProviders()` +/// throws `ReferenceError: Cannot access 'TOKEN' before initialization` when +/// the parameter default fires. +/// +/// Regression test for Codex P2 review on PR #302. +#[test] +fn component_provider_eager_call_chases_param_default_refs() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: PROVIDERS }) +class TestComponent {} +const PROVIDERS = makeProviders(); +function makeProviders(token = TOKEN) { return [{ provide: token }]; } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let providers_pos = result.code.find("const PROVIDERS").unwrap_or_else(|| { + panic!("Expected `const PROVIDERS` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read by makeProviders's parameter default at call time) \ + must be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + providers_pos < class_pos, + "`const PROVIDERS` must be hoisted above the class. \ + providers@{providers_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + token_pos < providers_pos, + "`const TOKEN` must precede `const PROVIDERS` so the parameter default \ + `token = TOKEN` can read it when `makeProviders()` runs at module init. \ + token@{token_pos} providers@{providers_pos}\nCode:\n{}", + result.code + ); + + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const PROVIDERS").count(), + 1, + "`const PROVIDERS` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 437d4da4c80df1cec0b36b92680ef9c1b6e33c69 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 22:26:04 +0800 Subject: [PATCH 09/21] fix(hoist): chase destructuring defaults; walk IIFE bodies in eagerly-called fns `collect_top_level_bindings` now walks `AssignmentPattern.right` defaults nested in each declarator's binding pattern, so `const { TOKEN = FALLBACK } = {}` records `FALLBACK` as an init-time dep and keeps the dependency graph TDZ-correct. `FunctionBodyIdentVisitor` now detects IIFE callees in `visit_call_expression` / `visit_new_expression` and walks their bodies eagerly via `walk_iife_callee_body`, so IIFEs inside an eagerly-called top-level function no longer drop their identifier reads. Addresses Codex P2 #3311274924 and Cursor #3311313158 on PR #302. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 53 ++++++++- .../tests/integration_test.rs | 109 ++++++++++++++++++ 2 files changed, 156 insertions(+), 6 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 099cd855a..b7d27dccc 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -716,6 +716,23 @@ fn collect_top_level_bindings<'a>( &mut info.init_called_symbols, ); } + // Destructuring defaults (`const { x = FALLBACK } = obj`) + // fire at evaluation time of THIS statement whenever the + // matching property is missing/undefined. They read the + // default expression's identifiers eagerly, so any later- + // declared top-level binding referenced by a default is + // TDZ-relevant exactly like the `init` itself. Walk every + // nested `AssignmentPattern::right` in the binding pattern + // and feed its refs into the statement's eager sets. See + // PR #302 Codex review #3311274924. + for_each_pattern_default(&declarator.id, &mut |expr| { + collect_expr_symbols( + expr, + semantic, + &mut info.init_symbols, + &mut info.init_called_symbols, + ); + }); } stmt_info.insert(stmt_start, info); continue; @@ -769,12 +786,14 @@ fn collect_top_level_bindings<'a>( /// `ArrayPattern` (each element `Option`, plus `rest`), and /// `AssignmentPattern` (the `left` pattern of `const { x = 1 } = obj`). /// Default expressions on `AssignmentPattern` (e.g. `const { x = SOMETHING } -/// = obj`) are nested *inside* the binding pattern but are NOT walked here — -/// a deliberate conservative choice. In the rare case where a default -/// expression references a later-declared top-level binding, that binding -/// will not be transitively hoisted. Decorator metadata almost never uses -/// destructured names with such defaults, so this gap is accepted rather -/// than implemented. +/// = obj`) are NOT visited by this helper — it only enumerates binding +/// identifiers. Those defaults ARE chased separately by +/// [`for_each_pattern_default`], which callers run at every site where a +/// pattern's defaults evaluate eagerly: at declarator sites in +/// `collect_top_level_bindings`, and at IIFE / parameter-default call sites +/// in [`walk_param_defaults`] / [`walk_iife_callee_body`]. Keeping the two +/// concerns split lets each call site decide whether the defaults are +/// TDZ-relevant for that context. fn for_each_binding_identifier<'a>( pat: &BindingPattern<'a>, f: &mut impl FnMut(&oxc_ast::ast::BindingIdentifier<'a>), @@ -903,6 +922,21 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_call_expression(&mut self, it: &oxc_ast::ast::CallExpression<'a>) { record_direct_callee(&it.callee, self.semantic, self.called); + // IIFE detection mirrors the `collect_expr_symbols` arm: when the + // callee is `(() => ...)` / `(function() { ... })`, the body runs + // eagerly at this call site, so its identifier reads contribute to + // the eager-evaluation set. Without this, `visit_arrow_function` + // / `visit_function` (intentional no-ops below) would silently drop + // the IIFE body inside an eagerly-called function — TDZ regression. + // See PR #302 Cursor review #3311313158. + if walk_iife_callee_body(&it.callee, self.semantic, self.out, self.called) { + // Body handled; only the arguments still need to flow into + // `self.out` / `self.called`. + for arg in &it.arguments { + self.visit_argument(arg); + } + return; + } // Continue default traversal so identifier references inside callee // and arguments still feed `self.out`. oxc_ast_visit::walk::walk_call_expression(self, it); @@ -910,6 +944,13 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_new_expression(&mut self, it: &oxc_ast::ast::NewExpression<'a>) { record_direct_callee(&it.callee, self.semantic, self.called); + // Symmetric IIFE handling for `new (function() { ... })()`. + if walk_iife_callee_body(&it.callee, self.semantic, self.out, self.called) { + for arg in &it.arguments { + self.visit_argument(arg); + } + return; + } oxc_ast_visit::walk::walk_new_expression(self, it); } diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 76b8fa34b..7b79f2491 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11155,3 +11155,112 @@ const TOKEN = 'tok'; result.code ); } + +/// A destructuring binding `const { TOKEN = FALLBACK } = {}` introduces +/// `TOKEN` (used in decorator metadata) but its initializer is `{}`, so the +/// `FALLBACK` default fires when the destructuring statement evaluates. +/// The hoister must chase defaults inside the binding pattern, otherwise +/// `FALLBACK` stays below the class and the hoisted destructuring throws +/// `ReferenceError: Cannot access 'FALLBACK' before initialization` at +/// runtime. +/// +/// Regression test for Codex P2 review #3311274924 on PR #302. +#[test] +fn component_destructured_default_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: TOKEN, useValue: 0 }] }) +class TestComponent {} +const { TOKEN = FALLBACK } = {}; +const FALLBACK = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let fallback_pos = result.code.find("const FALLBACK").unwrap_or_else(|| { + panic!("Expected `const FALLBACK` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result.code.find("const { TOKEN").unwrap_or_else(|| { + panic!("Expected `const {{ TOKEN ...` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + fallback_pos < token_pos, + "`const FALLBACK` must precede `const {{ TOKEN = FALLBACK }} = {{}}` so the \ + destructuring default can read it. fallback@{fallback_pos} token@{token_pos}\nCode:\n{}", + result.code + ); + assert!( + token_pos < class_pos, + "`const {{ TOKEN ... }}` must be hoisted above the class. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + fallback_pos < class_pos, + "`const FALLBACK` must also be hoisted above the class. \ + fallback@{fallback_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + + assert_eq!( + result.code.matches("const FALLBACK").count(), + 1, + "`const FALLBACK` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const { TOKEN").count(), + 1, + "destructuring statement should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// `provideThing` is eagerly called from the decorator. Its body contains an +/// IIFE `(() => [TOKEN])()` whose body executes at the call site, so the +/// `TOKEN` reference is TDZ-relevant. `FunctionBodyIdentVisitor` must walk +/// IIFE callee bodies the same way `collect_expr_symbols` does, or `TOKEN` +/// is left below the class and the eagerly-called function throws at module +/// init. +/// +/// Regression test for Cursor review #3311313158 on PR #302. +#[test] +fn component_eager_fn_body_iife_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [provideThing()] }) +class TestComponent {} +function provideThing() { return (() => [TOKEN])(); } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside an IIFE in the body of an eagerly-called \ + function) must be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 375382728c88eff18a558e461333fd702f62a392 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 23:29:40 +0800 Subject: [PATCH 10/21] fix(hoist): close class-TDZ guard transitively; walk assignment refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the two new inline bot comments on PR #302 and the follow-on adversarial findings the fix surfaced. * Codex P2 #3311493528 — the `stmt_references_later_class` guard now closes `init_called_symbols` through `fn_body_called_symbols` and checks each function's `fn_body_symbol_refs` for class refs, so an initializer that eagerly calls a function whose body reads a later class is no longer hoisted above that class. * Cursor Low #3311551145 — `collect_expr_symbols` gains explicit `AssignmentExpression` / `UpdateExpression` arms plus `collect_assignment_target_symbols` and siblings, so identifier refs on assignment-target lvalues (including pattern targets with defaults and member targets) flow into the dependency graph. * Cascade un-planning ("Step 2c"): when a planned statement's full dep closure reaches a top-level binding that isn't planned at an `insert_at` ≤ the dependent's `insert_at`, the dependent is dropped and the loop iterates to a fixed point — prevents leaving caller hoists stranded when a transitive dep was guard-skipped or planned only for a later class. * Function-valued bindings indexed: each per-declarator `const f = () => …` / `function() { … }` populates `fn_body_*` maps so the guard chases through them like a function declaration. Covers single- and multi-declarator forms. * Indirect call shapes: `fn.call(...)`, `fn.apply(...)`, and `fn.bind(...)()` recorded as eager invocations of `fn` at every call/new visit site. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 526 +++++++++++++++++- .../tests/integration_test.rs | 385 +++++++++++++ 2 files changed, 909 insertions(+), 2 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index b7d27dccc..530f901c9 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -236,12 +236,38 @@ pub fn collect_hoist_edits<'a>( // scope; this safe-skip is the minimal "no regressions" // defense. // + // The guard ALSO refuses to hoist when the initializer + // eagerly calls a function whose body reads a later class. + // Without this transitive check, `var TOKEN = make()` + // with `function make() { return TestComponent; }` would + // be hoisted above `class TestComponent` — the hoisted + // initializer then invokes `make()` which reads + // `TestComponent` before its class binding is initialized, + // throwing `ReferenceError: Cannot access 'TestComponent' + // before initialization`. We close `info.init_called_symbols` + // under `fn_body_called_symbols` (same shape as the BFS's + // per-class `eagerly_called` set) and consult each + // function's `fn_body_symbol_refs` for class refs. + // // The check uses `>=`: a class declared at exactly // `effective_start` is itself the class we're protecting // — definitely blocking. Regression for Codex review - // #3310709319 on PR #302. + // #3310709319 and Codex P2 review #3311493528 on PR #302. + let mut stmt_called: HashSet = + info.init_called_symbols.iter().copied().collect(); + let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); + close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); + let stmt_references_later_class = info.init_symbols.iter().any(|s| { top_level_class_positions.get(s).is_some_and(|&pos| pos >= effective_start) + }) || stmt_called.iter().any(|f| { + fn_body_symbol_refs.get(f).is_some_and(|refs| { + refs.iter().any(|s| { + top_level_class_positions + .get(s) + .is_some_and(|&pos| pos >= effective_start) + }) + }) }); if stmt_references_later_class { continue; @@ -343,6 +369,122 @@ pub fn collect_hoist_edits<'a>( return Vec::new(); } + // Step 2c: cascade un-planning. The per-class BFS plans a statement S + // when S's *immediate* `init_symbols` / closed `init_called_symbols` + // pass the safe-skip guard. But a transitive dependency reached only + // by chasing through `fn_body_symbol_refs` may itself get guard-skipped + // when the BFS later pops it — leaving S planned with a missing dep. + // + // Example (Finding 1 of Codex P2 review on PR #302): + // + // class TestComponent { ... } // ← class C + // var TOKEN = make(); // ← S: passes guard + // function make() { return BACKREF; } // ← make's body + // const BACKREF = TestComponent; // ← D: guard SKIPS + // + // S's immediate `init_called_symbols = {make}`. `make`'s body refs are + // {BACKREF}, which is *not* a class — guard passes, S is planned. Then + // BFS visits `make`, chases body → pushes BACKREF onto worklist. Pop + // BACKREF: its `init_symbols = {TestComponent}` and `TestComponent`'s + // position is `>= effective_start` → guard SKIPS BACKREF. But S + // stayed planned. At runtime, hoisted `var TOKEN = make()` calls + // `make()`, which reads not-yet-initialized `BACKREF`, which reads + // not-yet-initialized `TestComponent`. TDZ. + // + // Fix (Approach B from the review): after BFS, compute each planned + // statement's *full* dependency closure (through eagerly-called fn + // bodies via `combined_eagerly_called`). If any closure symbol resolves + // to a top-level binding whose own statement is NOT in the plan AND + // would have needed hoisting (its position is at-or-after S's + // `insert_at`), drop S. Iterate to fixed point so the un-plan can + // cascade: dropping S may itself orphan another planned T that + // depended only on S. + // + // Function-symbol deps (e.g. `make` itself) are NOT flagged here + // because `symbol_to_stmt.get(&make)` returns None — top-level + // function declarations are JS-hoisted, not handled by the variable + // planner. The chase-through-fn-bodies in `expand_through_functions` + // bridges to the variable bindings they read. + loop { + let plan_starts_snapshot: HashSet = plan.keys().copied().collect(); + let mut to_remove: Vec = Vec::new(); + for (&start, entry) in &plan { + let Some(info) = stmt_info.get(&start) else { continue }; + // Finding 2 (Codex P3 review): use a *per-S* eager-call set — + // the closure of THIS statement's `init_called_symbols` under + // `fn_body_called_symbols` — instead of the global + // `combined_eagerly_called`. The global union over-expands: if + // class B eagerly calls `make` and class A only references + // `makeRef = make` as a *value*, the global set pulls A's + // closure through `make`'s body refs as if A called `make`, + // which can drop A's safe hoist. The per-S set matches the + // shape used by the safe-skip guard's `stmt_called` so the + // cascade un-planning reasons against the same eager-evaluation + // set the planner used. + let mut stmt_called: HashSet = + info.init_called_symbols.iter().copied().collect(); + let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); + close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); + let closure = + expand_through_functions(&info.init_symbols, &fn_body_symbol_refs, &stmt_called); + for s in &closure { + // Function symbols and unresolved refs have no + // `symbol_to_stmt` entry — they can't be a variable + // binding the planner would have moved. Skip. + let Some(&dep_start) = symbol_to_stmt.get(s) else { continue }; + // Self-references (multi-declarator stmt referencing its own + // sibling) don't count. + if dep_start == start { + continue; + } + // Dep is in the plan — only safe if its `insert_at` is at + // or before S's `insert_at`. Finding 1 (Codex P3 review): + // two planned statements can target *different* `insert_at` + // positions (one per decorated class). If S targets + // `insert_at = pos_C1` and its dep `D` is planned only for + // class C2 with `D.insert_at = pos_C2 > pos_C1`, hoisted S + // runs before hoisted D at runtime — fresh TDZ on D. The + // snapshot-membership check we used before missed this; we + // must consult the dep's *current* `insert_at` and unplan + // S when the ordering is wrong. + if plan_starts_snapshot.contains(&dep_start) { + if let Some(dep_entry) = plan.get(&dep_start) + && dep_entry.insert_at <= entry.insert_at + { + continue; + } + // Fall through — dep's `insert_at` is *after* S's, + // treat the dep as unsafe and unplan S. (Lowering the + // dep's `insert_at` instead is the alternative; we + // pick "drop S" for simplicity — the user's existing + // TDZ on the dep persists, but we don't introduce a + // fresh class TDZ via partial hoisting.) + to_remove.push(start); + break; + } + // Dep is declared *before* the class we're hoisting in + // front of — already initialized when S evaluates. + if dep_start <= entry.insert_at { + continue; + } + // Dep would have to be hoisted but isn't planned — S is + // unsafe. Flag and stop scanning this S. + to_remove.push(start); + break; + } + } + if to_remove.is_empty() { + break; + } + for s in to_remove { + plan.remove(&s); + } + } + + if plan.is_empty() { + return Vec::new(); + } + // Step 3: topologically sort the planned statements so dependencies are // emitted *before* their dependents in the hoisted prelude. Within a // single bucket (same `insert_at`), this guarantees that e.g. `const @@ -696,6 +838,33 @@ fn collect_top_level_bindings<'a>( }; for declarator in &decl.declarations { + // Per-declarator: if the declarator's `id` is a plain + // `BindingIdentifier` AND its `init` is *directly* an + // arrow/function expression (after peeling parens / TS + // wrappers), index that binding symbol as if it were a + // function declaration — populate `fn_body_symbol_refs` / + // `fn_body_called_symbols` so the BFS safe-skip guard can + // chase eager calls through it. Handles both single- and + // multi-declarator cases (`const make = () => DEP;` and + // `const make = () => DEP, other = 0;`). Destructured / + // patterned bindings are skipped — the function-value shape + // only appears with a plain identifier binding in practice. + // Run this BEFORE collecting `init_symbols` so the indexing + // happens before the normal binding/init flow. See PR #302 + // Codex P2 Finding 3. + if let (BindingPattern::BindingIdentifier(id), Some(init)) = + (&declarator.id, &declarator.init) + && let Some(fn_symbol) = id.symbol_id.get() + { + index_fn_valued_binding( + init, + fn_symbol, + semantic, + &mut fn_body_symbol_refs, + &mut fn_body_called_symbols, + ); + } + // Walk the declarator's `BindingPattern` recursively so that // destructuring forms (`const { TOKEN } = obj;`, `const [a, b] // = arr;`, `const { a: { b } } = obj;`, …) also index every @@ -922,6 +1091,8 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_call_expression(&mut self, it: &oxc_ast::ast::CallExpression<'a>) { record_direct_callee(&it.callee, self.semantic, self.called); + record_indirect_callee(&it.callee, self.semantic, self.called); + record_bind_callee(&it.callee, self.semantic, self.called); // IIFE detection mirrors the `collect_expr_symbols` arm: when the // callee is `(() => ...)` / `(function() { ... })`, the body runs // eagerly at this call site, so its identifier reads contribute to @@ -944,6 +1115,8 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_new_expression(&mut self, it: &oxc_ast::ast::NewExpression<'a>) { record_direct_callee(&it.callee, self.semantic, self.called); + record_indirect_callee(&it.callee, self.semantic, self.called); + record_bind_callee(&it.callee, self.semantic, self.called); // Symmetric IIFE handling for `new (function() { ... })()`. if walk_iife_callee_body(&it.callee, self.semantic, self.out, self.called) { for arg in &it.arguments { @@ -1071,6 +1244,8 @@ fn collect_expr_symbols<'a>( } E::CallExpression(call) => { record_direct_callee(&call.callee, semantic, called); + record_indirect_callee(&call.callee, semantic, called); + record_bind_callee(&call.callee, semantic, called); // IIFE detection: `(() => ...)()` or `(function() { ... })()` — // the function body runs *eagerly* at this call site, so its // identifier reads contribute to the eager-evaluation set. The @@ -1099,6 +1274,8 @@ fn collect_expr_symbols<'a>( } E::NewExpression(new) => { record_direct_callee(&new.callee, semantic, called); + record_indirect_callee(&new.callee, semantic, called); + record_bind_callee(&new.callee, semantic, called); // Symmetric IIFE handling for `new (function() { ... })()` — // exceedingly rare but covered for consistency. if !walk_iife_callee_body(&new.callee, semantic, out, called) { @@ -1183,17 +1360,209 @@ fn collect_expr_symbols<'a>( E::ChainExpression(chain) => { collect_chain_element_symbols(&chain.expression, semantic, out, called); } + // `(x = TOKEN)` — both sides carry refs that fire at evaluation + // time. The `right` is a regular expression; the `left` is an + // `AssignmentTarget` (bare identifier, member, or pattern-shaped) + // walked via the dedicated helper. Without this, decorator metadata + // shaped `providers: [(cached = TOKEN)]` silently dropped `TOKEN` + // — Cursor Low review #3311551145 on PR #302. + E::AssignmentExpression(assign) => { + collect_expr_symbols(&assign.right, semantic, out, called); + collect_assignment_target_symbols(&assign.left, semantic, out, called); + } + // `x++`, `--y[k]`, etc. The `argument` is a `SimpleAssignmentTarget` + // — bare identifiers and member expressions, never patterns. + E::UpdateExpression(update) => { + collect_simple_assignment_target_symbols(&update.argument, semantic, out, called); + } // Class expressions inside metadata are exceedingly rare and their // bodies aren't eagerly evaluated; treat them as opaque. E::ClassExpression(_) => {} // Function and arrow bodies run lazily — references inside don't // affect class-init evaluation. E::ArrowFunctionExpression(_) | E::FunctionExpression(_) => {} - // Literals and `this`/`super` carry no identifier references. + // Remaining variants carry no identifier references we can resolve + // to a top-level binding: literals (string/number/boolean/null/regex/ + // big-int/template no-substitution), `this`, `Super`, `MetaProperty` + // (`import.meta` / `new.target`), `ImportExpression` (dynamic import + // takes a string literal in practice), and `JSX*` / `V8IntrinsicExpression` + // which aren't valid in TS source we transform. _ => {} } } +/// Walk an `AssignmentTarget` (the `left` of an `AssignmentExpression`, +/// or a nested element inside an array/object pattern target) and feed +/// every identifier reference into `out` / `called`. Member arms mirror +/// the corresponding `Expression::*MemberExpression` arms in +/// [`collect_expr_symbols`]; pattern arms recurse through their nested +/// targets and defaults so e.g. `({ x = TOKEN } = obj)` chases `TOKEN`. +fn collect_assignment_target_symbols<'a>( + target: &oxc_ast::ast::AssignmentTarget<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + use oxc_ast::ast::AssignmentTarget as T; + match target { + T::AssignmentTargetIdentifier(id) => { + if let Some(symbol) = resolve_symbol(id, semantic) { + out.insert(symbol); + } + } + T::ComputedMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + collect_expr_symbols(&member.expression, semantic, out, called); + } + T::StaticMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + T::PrivateFieldExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + T::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::ArrayAssignmentTarget(arr) => { + for el in arr.elements.iter().flatten() { + collect_assignment_target_maybe_default_symbols(el, semantic, out, called); + } + if let Some(rest) = &arr.rest { + collect_assignment_target_symbols(&rest.target, semantic, out, called); + } + } + T::ObjectAssignmentTarget(obj) => { + for prop in &obj.properties { + collect_assignment_target_property_symbols(prop, semantic, out, called); + } + if let Some(rest) = &obj.rest { + collect_assignment_target_symbols(&rest.target, semantic, out, called); + } + } + } +} + +/// `SimpleAssignmentTarget` is the subset of `AssignmentTarget` allowed +/// as the `argument` of `++`/`--`. Same shape minus the pattern variants. +fn collect_simple_assignment_target_symbols<'a>( + target: &oxc_ast::ast::SimpleAssignmentTarget<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + use oxc_ast::ast::SimpleAssignmentTarget as T; + match target { + T::AssignmentTargetIdentifier(id) => { + if let Some(symbol) = resolve_symbol(id, semantic) { + out.insert(symbol); + } + } + T::ComputedMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + collect_expr_symbols(&member.expression, semantic, out, called); + } + T::StaticMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + T::PrivateFieldExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + T::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + T::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + } +} + +/// Helper for array-pattern element / object-pattern property values: +/// either a plain `AssignmentTarget` or an `AssignmentTargetWithDefault` +/// (`[a = X]`, `{ p: a = X }`) whose `init` default evaluates at +/// destructuring time. +fn collect_assignment_target_maybe_default_symbols<'a>( + el: &oxc_ast::ast::AssignmentTargetMaybeDefault<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + use oxc_ast::ast::AssignmentTargetMaybeDefault as D; + match el { + D::AssignmentTargetWithDefault(with_default) => { + collect_assignment_target_symbols(&with_default.binding, semantic, out, called); + collect_expr_symbols(&with_default.init, semantic, out, called); + } + // The remaining variants inherit from `AssignmentTarget`. The + // `AssignmentTarget` variants are matched implicitly by the parent + // enum's `inherit_variants!` macro; cast back through the helper. + D::AssignmentTargetIdentifier(id) => { + if let Some(symbol) = resolve_symbol(id, semantic) { + out.insert(symbol); + } + } + D::ComputedMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + collect_expr_symbols(&member.expression, semantic, out, called); + } + D::StaticMemberExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + D::PrivateFieldExpression(member) => { + collect_expr_symbols(&member.object, semantic, out, called); + } + D::TSAsExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + D::TSSatisfiesExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + D::TSNonNullExpression(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + D::TSTypeAssertion(ts) => collect_expr_symbols(&ts.expression, semantic, out, called), + D::ArrayAssignmentTarget(arr) => { + for el in arr.elements.iter().flatten() { + collect_assignment_target_maybe_default_symbols(el, semantic, out, called); + } + if let Some(rest) = &arr.rest { + collect_assignment_target_symbols(&rest.target, semantic, out, called); + } + } + D::ObjectAssignmentTarget(obj) => { + for prop in &obj.properties { + collect_assignment_target_property_symbols(prop, semantic, out, called); + } + if let Some(rest) = &obj.rest { + collect_assignment_target_symbols(&rest.target, semantic, out, called); + } + } + } +} + +/// `({ foo } = obj)` shorthand vs. `({ foo: bar } = obj)` long form. +/// The shorthand carries an optional `init` default; the long form carries +/// a key (possibly computed — `{ [TOKEN]: x }`) and a `binding` that's a +/// nested target with optional default. +fn collect_assignment_target_property_symbols<'a>( + prop: &oxc_ast::ast::AssignmentTargetProperty<'a>, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + use oxc_ast::ast::AssignmentTargetProperty as P; + match prop { + P::AssignmentTargetPropertyIdentifier(ident) => { + if let Some(symbol) = resolve_symbol(&ident.binding, semantic) { + out.insert(symbol); + } + if let Some(init) = &ident.init { + collect_expr_symbols(init, semantic, out, called); + } + } + P::AssignmentTargetPropertyProperty(prop) => { + if prop.computed + && let Some(key_expr) = prop.name.as_expression() + { + collect_expr_symbols(key_expr, semantic, out, called); + } + collect_assignment_target_maybe_default_symbols(&prop.binding, semantic, out, called); + } + } +} + /// If `callee` is a *direct* identifier reference (peeling through /// parentheses and TS type-only wrappers), record its symbol in `called`. /// Member callees (`foo.bar()`) and other complex expressions are skipped @@ -1225,6 +1594,157 @@ fn record_direct_callee<'a>( } } +/// Recognize a small set of *indirect* call shapes whose immediate effect +/// is to invoke a top-level function: +/// +/// * `fn.call(...)` — `Function.prototype.call` +/// * `fn.apply(...)` — `Function.prototype.apply` +/// +/// In both cases the static member's `object` must be a *direct identifier* +/// (`fn`) — we resolve through the semantic model and record the symbol +/// in `called`. Anything more nested (`obj.fn.call(...)`, +/// `getFn().call(...)`) is out of scope and falls through. +/// +/// The shape `fn.bind(...)()` is handled at the call site by inspecting +/// the *outer* call's callee: if it's a `CallExpression` whose own callee +/// is `Identifier.bind`, the inner identifier is the bound function and +/// will eventually invoke at the outer call site. See [`record_bind_callee`]. +/// +/// Used alongside [`record_direct_callee`] at every call/new site so the +/// guard's `init_called_symbols` reflects the actual eager-invocation set. +/// Regression for Codex P2 review (Finding 3) on PR #302. +fn record_indirect_callee<'a>( + callee: &Expression<'a>, + semantic: &Semantic<'a>, + called: &mut HashSet, +) { + use Expression as E; + let mut cur = callee; + let member = loop { + match cur { + E::StaticMemberExpression(member) => break member, + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return, + } + }; + let prop = member.property.name.as_str(); + if prop != "call" && prop != "apply" { + return; + } + let E::Identifier(id) = &member.object else { return }; + if let Some(symbol) = resolve_symbol(id, semantic) { + called.insert(symbol); + } +} + +/// Handle the `fn.bind(...)()` shape. Called from the call site of the +/// *outer* `CallExpression` — its `callee` is the inner `fn.bind(...)` +/// `CallExpression`. If the inner call's callee is `Identifier.bind` +/// (a `StaticMemberExpression` whose `object` is a direct identifier and +/// `property` is `"bind"`), record the identifier's symbol in `called`. +/// Only one level of bind is covered; nested `fn.bind(a).bind(b)()` falls +/// through. +fn record_bind_callee<'a>( + outer_callee: &Expression<'a>, + semantic: &Semantic<'a>, + called: &mut HashSet, +) { + use Expression as E; + let mut cur = outer_callee; + let inner_call = loop { + match cur { + E::CallExpression(call) => break call, + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return, + } + }; + let mut bind_callee = &inner_call.callee; + let member = loop { + match bind_callee { + E::StaticMemberExpression(member) => break member, + E::ParenthesizedExpression(p) => bind_callee = &p.expression, + E::TSAsExpression(ts) => bind_callee = &ts.expression, + E::TSSatisfiesExpression(ts) => bind_callee = &ts.expression, + E::TSNonNullExpression(ts) => bind_callee = &ts.expression, + E::TSTypeAssertion(ts) => bind_callee = &ts.expression, + E::TSInstantiationExpression(ts) => bind_callee = &ts.expression, + _ => return, + } + }; + if member.property.name.as_str() != "bind" { + return; + } + let E::Identifier(id) = &member.object else { return }; + if let Some(symbol) = resolve_symbol(id, semantic) { + called.insert(symbol); + } +} + +/// If `init` is *directly* an `ArrowFunctionExpression` or +/// `FunctionExpression` (after peeling parens / TS wrappers), index the +/// binding `fn_symbol` as if it were a function declaration: record body +/// identifier refs into `fn_body_symbol_refs[fn_symbol]`, direct callees +/// into `fn_body_called_symbols[fn_symbol]`, and walk parameter defaults +/// into both. Returns `true` when indexing happened. +/// +/// This makes `const make = () => DEP` visible to the BFS safe-skip guard +/// the same way `function make() { return DEP; }` is. See PR #302 Codex +/// P2 (Finding 2). +fn index_fn_valued_binding<'a>( + init: &Expression<'a>, + fn_symbol: SymbolId, + semantic: &Semantic<'a>, + fn_body_symbol_refs: &mut HashMap>, + fn_body_called_symbols: &mut HashMap>, +) -> bool { + use Expression as E; + let mut cur = init; + loop { + match cur { + E::ArrowFunctionExpression(arrow) => { + let mut refs: HashSet = HashSet::new(); + let mut called: HashSet = HashSet::new(); + let mut visitor = + FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; + visitor.visit_function_body(&arrow.body); + walk_param_defaults(&arrow.params, semantic, &mut refs, &mut called); + fn_body_symbol_refs.insert(fn_symbol, refs); + fn_body_called_symbols.insert(fn_symbol, called); + return true; + } + E::FunctionExpression(func) => { + let Some(body) = &func.body else { return false }; + let mut refs: HashSet = HashSet::new(); + let mut called: HashSet = HashSet::new(); + let mut visitor = + FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; + visitor.visit_function_body(body); + walk_param_defaults(&func.params, semantic, &mut refs, &mut called); + fn_body_symbol_refs.insert(fn_symbol, refs); + fn_body_called_symbols.insert(fn_symbol, called); + return true; + } + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return false, + } + } +} + /// If `callee` is the function expression of an IIFE /// (`(() => …)()` or `(function() {…})()`, after peeling parens and TS /// wrappers), walk its body eagerly via `FunctionBodyIdentVisitor` and @@ -1290,6 +1810,8 @@ fn collect_chain_element_symbols<'a>( match el { ChainElement::CallExpression(call) => { record_direct_callee(&call.callee, semantic, called); + record_indirect_callee(&call.callee, semantic, called); + record_bind_callee(&call.callee, semantic, called); if !walk_iife_callee_body(&call.callee, semantic, out, called) { collect_expr_symbols(&call.callee, semantic, out, called); } diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 7b79f2491..d2b770185 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11264,3 +11264,388 @@ const TOKEN = 'tok'; result.code ); } + +/// The safe-skip guard must refuse to hoist a `var TOKEN = make()` initializer +/// when the eagerly-called `make()`'s body reads a later-declared top-level +/// class. Without the fix, hoisting `var TOKEN = make()` above +/// `class TestComponent` invents a fresh TDZ on the class: `make()` runs at +/// the hoisted initializer's evaluation time and reads `TestComponent` before +/// the class binding is initialized. +/// +/// The user's existing TDZ on `TOKEN` is NOT our problem to fix — we must +/// just not introduce a NEW class TDZ. So we only assert that `class +/// TestComponent` still precedes `var TOKEN`. +/// +/// Regression test for Codex P2 review #3311493528 on PR #302. +#[test] +fn component_eager_fn_body_class_ref_blocks_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class TestComponent {} +var TOKEN = make(); +function make() { return TestComponent; } +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < token_pos, + "`var TOKEN = make()` must NOT be hoisted above the class because \ + `make()`'s body reads `TestComponent`. Hoisting would invent a fresh \ + class TDZ. class@{class_pos} token@{token_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("var TOKEN").count(), + 1, + "`var TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// A decorator-metadata `AssignmentExpression` (`(cached = TOKEN)`) carries +/// identifier references on both its `left` and `right`. The +/// `collect_expr_symbols` walker must not silently drop these — otherwise +/// `TOKEN` never enters the BFS and stays declared below the class, while +/// the class's emitted Ivy definition reads `TOKEN` eagerly. +/// +/// Regression test for Cursor Low review #3311551145 on PR #302. +#[test] +fn component_assignment_expression_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +let cached; +@Component({ selector: 'x', template: '', providers: [(cached = TOKEN)] }) +class TestComponent {} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read by an AssignmentExpression in decorator \ + metadata) must be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Finding 1: transitive dependency cascade. The BFS pops `TOKEN` whose +/// only directly-called function is `make()`; the closure of +/// `init_called_symbols` brings in nothing class-relevant from `make`'s +/// body (it just calls `BACKREF` whose binding is a non-function const). +/// So the safe-skip guard at `TOKEN`'s site passes — `TOKEN`'s statement +/// is planned. The BFS then pushes `make`'s body refs onto the worklist, +/// pops `BACKREF`, and *its* guard detects `BACKREF = TestComponent` reading +/// a later class — so `BACKREF` is skipped. But `TOKEN`'s plan entry is +/// still there, leaving the runtime broken: hoisted `var TOKEN = make()` +/// invokes `make()` which reads not-yet-initialized `BACKREF`, which the +/// guard correctly identified would read `TestComponent` if it ran. +/// +/// Required: when a dependency is guard-skipped, every transitively +/// dependent already-planned statement must be un-planned too. Without +/// the fix, `var TOKEN` lands above `class TestComponent` in the output. +/// +/// Regression test for Codex P2 review (Finding 1) on PR #302. +#[test] +fn component_eager_fn_body_transitive_class_ref_unplans_chain() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class TestComponent {} +var TOKEN = make(); +function make() { return BACKREF; } +const BACKREF = TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + let backref_pos = result.code.find("const BACKREF").unwrap_or_else(|| { + panic!("Expected `const BACKREF` to be present.\nCode:\n{}", result.code) + }); + + assert!( + class_pos < token_pos, + "`var TOKEN = make()` must NOT be hoisted above the class because \ + its transitive dep `BACKREF` reads `TestComponent`. class@{class_pos} \ + token@{token_pos}\nCode:\n{}", + result.code + ); + assert!( + class_pos < backref_pos, + "`const BACKREF = TestComponent` must NOT be hoisted above the class. \ + class@{class_pos} backref@{backref_pos}\nCode:\n{}", + result.code + ); +} + +/// Finding 2: function-valued `const`/`let` bindings hide eager class +/// reads. The BFS pops `TOKEN` whose `init_called_symbols = {make}`. +/// `make` is a `const` arrow, not a function decl — so it's missing from +/// `fn_body_*` maps. The closure expansion finds nothing; the guard +/// passes; `TOKEN` gets hoisted above the class. At runtime: hoisted +/// `make()` reads `TestComponent` in TDZ. +/// +/// Required: top-level `const`/`let`/`var` bindings whose initializer is +/// *directly* an `ArrowFunctionExpression` / `FunctionExpression` (after +/// peeling parens / TS wrappers) must be indexed into `fn_body_*` maps +/// keyed by the binding symbol, so the existing safe-skip guard catches +/// the transitive class read. +/// +/// Regression test for Codex P2 review (Finding 2) on PR #302. +#[test] +fn component_eager_fn_value_const_arrow_class_ref_blocks_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class TestComponent {} +var TOKEN = make(); +const make = () => TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < token_pos, + "`var TOKEN = make()` must NOT be hoisted above the class because \ + the `const make = () => TestComponent` arrow body reads the class. \ + class@{class_pos} token@{token_pos}\nCode:\n{}", + result.code + ); +} + +/// Finding 3: member-call shapes `fn.call(...)` / `fn.apply(...)` aren't +/// recognized as eager calls. `record_direct_callee` peels parens / TS +/// wrappers but stops at `StaticMemberExpression`, so `make.call(null)` +/// records nothing in `called`. The guard's `stmt_called` is empty, the +/// transitive class-ref check never inspects `make`'s body, and `TOKEN` +/// gets hoisted above the class. At runtime: hoisted `make.call(null)` +/// reads `TestComponent` in TDZ. +/// +/// Required: extend `record_direct_callee` (or a wrapper) to recognize +/// the static call shapes `fn.call(...)`, `fn.apply(...)`, and +/// `fn.bind(...)()` on top-level function symbols. +/// +/// Regression test for Codex P2 review (Finding 3) on PR #302. +#[test] +fn component_eager_member_call_class_ref_blocks_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class TestComponent {} +var TOKEN = make.call(null); +function make() { return TestComponent; } +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < token_pos, + "`var TOKEN = make.call(null)` must NOT be hoisted above the class \ + because `make()`'s body reads `TestComponent`. class@{class_pos} \ + token@{token_pos}\nCode:\n{}", + result.code + ); +} + +/// Codex P3 review Finding 1: cross-class `insert_at` ordering. Two +/// `@Component`-decorated classes (C1 first, C2 second) with an +/// undecorated `class Mid` between them. C1 plans `var TOKEN = make()` at +/// `insert_at = pos_C1`; its BFS chases `make`'s body to `X` but the +/// safe-skip guard rejects `X` for C1 because `X = Mid` reads class `Mid` +/// which is declared *after* C1. C2's BFS reaches `X` independently (via +/// `useValue: X`) and the safe-skip passes for C2 (Mid is declared +/// *before* C2). So `X` lands in the plan at `insert_at = pos_C2 > +/// pos_C1`. +/// +/// The cascade un-planning loop previously treated "X is in plan" as a +/// safe dep — but X's `insert_at` is *later* than TOKEN's, so at runtime +/// hoisted TOKEN runs before hoisted X and `make()` TDZ-reads `X`. The +/// fix changes the cascade check to "dep planned at an `insert_at` ≤ S's +/// `insert_at`" (drop S otherwise). +/// +/// We assert `class C1` precedes `var TOKEN = make()`: TOKEN must NOT be +/// hoisted because its dep X can't be hoisted to the same insertion +/// position. (TOKEN's user-authored TDZ on X persists — not our problem; +/// we just must not introduce a fresh hoist-induced TDZ between the two +/// hoisted statements.) +#[test] +fn component_cascade_cross_class_insert_order_unplans_dependent() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'a', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class C1 {} +var TOKEN = make(); +function make() { return X; } +class Mid {} +@Component({ selector: 'b', template: '', providers: [{ provide: 'y', useValue: X }] }) +class C2 {} +const X = Mid; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let c1_pos = result + .code + .find("class C1") + .unwrap_or_else(|| panic!("Expected `class C1` to be present.\nCode:\n{}", result.code)); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + c1_pos < token_pos, + "`var TOKEN = make()` must NOT be hoisted above `class C1` because \ + its transitive dep `X` is only planned at `insert_at` for \ + `class C2`, which is *later* in source. Hoisting TOKEN above C1 \ + leaves it running before the hoisted X lands. c1@{c1_pos} \ + token@{token_pos}\nCode:\n{}", + result.code + ); +} + +/// Codex P3 review Finding 2: per-S eager-call set. Class A uses +/// `makeRef` as a value (`useFactory: makeRef`); class B *calls* `make()` +/// (`providers: [make()]`). The cascade pass currently uses +/// `combined_eagerly_called` (the union across all classes) so `make` — +/// only eagerly invoked from B — over-expands A's `makeRef` statement +/// closure through `make`'s body refs. A's safe hoist gets dropped even +/// though A never calls `make`. +/// +/// With the fix, the cascade computes a per-S eager-call set from +/// `info.init_called_symbols` closed under `fn_body_called_symbols`. A's +/// statement `const makeRef = make;` calls nothing, so its eager set is +/// empty and the closure doesn't chase `make`'s body. A's `makeRef` hoist +/// survives. +#[test] +fn component_cascade_value_only_ref_does_not_over_expand() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'a', template: '', providers: [{ provide: 'x', useFactory: makeRef }] }) +class A {} +@Component({ selector: 'b', template: '', providers: [make()] }) +class B {} +const makeRef = make; +function make() { return BACKREF; } +const BACKREF = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let a_pos = result + .code + .find("class A") + .unwrap_or_else(|| panic!("Expected `class A` to be present.\nCode:\n{}", result.code)); + let make_ref_pos = result.code.find("const makeRef").unwrap_or_else(|| { + panic!("Expected `const makeRef` to be present.\nCode:\n{}", result.code) + }); + + assert!( + make_ref_pos < a_pos, + "`const makeRef = make;` must be hoisted above `class A` because A \ + only references `makeRef` as a value — A never *calls* `make`, so \ + `make`'s body refs are irrelevant to A's safe-skip. The cascade \ + must compute a per-S eager-call set so `make`'s eager evaluation \ + from class B doesn't bleed into A's closure. makeRef@{make_ref_pos} \ + a@{a_pos}\nCode:\n{}", + result.code + ); +} + +/// Codex P3 review Finding 3: multi-declarator function-valued bindings. +/// `index_fn_valued_binding` currently only runs when +/// `decl.declarations.len() == 1`. The shape +/// `const make = () => TestComponent, other = 0;` skips indexing, so +/// `make`'s arrow body is never visible to the safe-skip guard. An eager +/// caller (`var TOKEN = make()`) then hoists above the class and TDZ-reads +/// `TestComponent` at runtime. +/// +/// The fix lifts the indexing into the per-declarator loop so each +/// declarator with a plain identifier binding and a direct arrow/function +/// initializer gets indexed regardless of how many siblings share the +/// statement. Assert `class TestComponent` precedes `var TOKEN`. +#[test] +fn component_multi_declarator_fn_valued_binding_blocks_caller_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: [{ provide: 'x', useValue: TOKEN }] }) +class TestComponent {} +var TOKEN = make(); +const make = () => TestComponent, other = 0; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("var TOKEN") + .unwrap_or_else(|| panic!("Expected `var TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < token_pos, + "`var TOKEN = make()` must NOT be hoisted above the class because \ + the multi-declarator binding `const make = () => TestComponent, \ + other = 0;` declares `make` whose arrow body reads `TestComponent`. \ + class@{class_pos} token@{token_pos}\nCode:\n{}", + result.code + ); +} From 9c3c38d26acdeec916c8de9ef77dc99e0363e276 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Wed, 27 May 2026 23:58:26 +0800 Subject: [PATCH 11/21] fix(hoist): chase fn-valued binding bodies in guard, cascade, and topo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the new inline bot comments on PR #302 and the follow-on adversarial findings the fix surfaced. * Codex P2 #3311913006 — when a popped symbol both has `symbol_to_stmt` and `fn_body_symbol_refs`, the BFS branch now plans the statement AND chases body refs (or defers them via `deferred_fns` when the binding isn't yet eagerly called). * Cursor Low #3311962888 — `topological_order` no longer takes the global `combined_eagerly_called`; per-S `stmt_eager_sets` and a shared `stmt_fn_valued_bindings` map drive the dep-edge construction, matching the cascade's reasoning. Adversarial follow-up: the safe-skip guard and cascade un-planning were blind to fn-valued bindings whose arrow body reads a class — for `@Component({providers: make()}) class C {} const make = () => C;` the planner hoisted `make` above `C` and Ivy's eager `make()` then hit TDZ on `C`. The fix lifts `stmt_fn_valued_bindings` to right after `collect_top_level_bindings` and folds each statement's eagerly-called fn-valued binding bodies into both the per-class safe-skip closure (via the BFS's `eagerly_called`) and the cascade closure (via `combined_eagerly_called`), keeping all three passes symmetric. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 187 +++++++++++- .../tests/integration_test.rs | 269 ++++++++++++++++++ 2 files changed, 445 insertions(+), 11 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 530f901c9..70a5b7205 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -118,6 +118,25 @@ pub fn collect_hoist_edits<'a>( return Vec::new(); } + // Reverse view of `symbol_to_stmt` ∩ `fn_body_symbol_refs` keyed by + // `stmt_start`. A statement like `const make = () => TestComponent;` + // declares a fn-valued binding `make` whose body refs are indexed in + // `fn_body_symbol_refs[make]`, but the statement's plain `init_symbols` + // is empty (arrow bodies are lazy in `collect_expr_symbols`). When the + // binding's symbol enters some class's `eagerly_called` (because a + // decorator invokes `make()`), the arrow body fires at module load and + // its reads become TDZ-relevant to that statement. + // + // Computed once here so the BFS safe-skip guard, the cascade un-planning + // pass, and the topological sort can all consult the same map. See + // PR #302 Round 6 follow-on review. + let mut stmt_fn_valued_bindings: HashMap> = HashMap::new(); + for (&sym, &stmt_start) in &symbol_to_stmt { + if fn_body_symbol_refs.contains_key(&sym) { + stmt_fn_valued_bindings.entry(stmt_start).or_default().push(sym); + } + } + // Index every top-level class declaration by its binding `SymbolId` → // the class's `span.start`. Used by the BFS to refuse hoisting any // statement whose initializer references a class that lives at-or-after @@ -255,6 +274,25 @@ pub fn collect_hoist_edits<'a>( // #3310709319 and Codex P2 review #3311493528 on PR #302. let mut stmt_called: HashSet = info.init_called_symbols.iter().copied().collect(); + // Fold in any fn-valued binding declared by this statement + // whose binding symbol is in THIS class's `eagerly_called` + // set. When a decorator (or a chain of hoisted initializers) + // invokes such a binding (`const make = () => …` called as + // `make()`), the arrow/function body fires at module load + // — its body refs are TDZ-relevant exactly like body refs of + // a top-level function declaration the initializer calls. + // Without this, `const make = () => TestComponent;` invoked + // by `providers: make()` would slip past the guard because + // the initializer's plain `init_symbols` / `init_called_symbols` + // are empty (arrow bodies are lazy in `collect_expr_symbols`). + // See PR #302 Round 6 follow-on review. + if let Some(fn_syms) = stmt_fn_valued_bindings.get(&stmt_start) { + for &fn_sym in fn_syms { + if eagerly_called.contains(&fn_sym) { + stmt_called.insert(fn_sym); + } + } + } let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); @@ -334,6 +372,38 @@ pub fn collect_hoist_edits<'a>( worklist.push(s); } } + + // Function-valued bindings act as BOTH a binding (planned + // above) AND a function (their body refs fire when called). + // `index_fn_valued_binding` populates `fn_body_symbol_refs` + // for `const make = () => …` / `const make = function … {}` + // shapes. If the decorator metadata called `make()` (or a + // hoisted initializer's `init_called_symbols` closure + // promoted `make` into `eagerly_called`), the body refs + // ALSO need chasing — otherwise the arrow body's reads + // (e.g. `TOKEN` inside `() => [{ provide: TOKEN, … }]`) + // never enter the worklist and stay declared below the + // class, throwing TDZ when the hoisted `make()` runs at + // module load. Mirror the `else if eagerly_called` branch + // below; defer otherwise so a later eager-set promotion + // belatedly chases the body via the existing now_eager + // sweep at the top of this match arm. See PR #302 Codex P2 + // review #3311913006. + if fn_body_symbol_refs.contains_key(&symbol) { + if eagerly_called.contains(&symbol) { + if chased_fn_bodies.insert(symbol) { + if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { + for &s in body_refs { + if !visited.contains(&s) { + worklist.push(s); + } + } + } + } + } else { + deferred_fns.insert(symbol); + } + } } else if eagerly_called.contains(&symbol) { // The symbol resolves to a top-level function declaration // that is *actually called* (transitively) at module load @@ -423,10 +493,30 @@ pub fn collect_hoist_edits<'a>( // set the planner used. let mut stmt_called: HashSet = info.init_called_symbols.iter().copied().collect(); + // Mirror the BFS safe-skip guard: fold in fn-valued bindings + // this statement declares whose binding symbol is in + // `combined_eagerly_called` (no per-class scoping here — the + // cascade runs after the BFS unions per-class eager sets). Then + // seed the closure with those binding symbols so + // `expand_through_functions` descends into their arrow/function + // bodies. Without this, `const make = () => BACKREF;` planned + // for a class that calls `make()` would not see its body's + // dependency on `BACKREF` (whose own statement was guard- + // skipped), so the cascade would fail to drop `make` and the + // hoisted `make()` would TDZ on `BACKREF`. See PR #302 Round 6 + // follow-on review. + let mut seed: HashSet = info.init_symbols.clone(); + if let Some(fn_syms) = stmt_fn_valued_bindings.get(&start) { + for &fn_sym in fn_syms { + if combined_eagerly_called.contains(&fn_sym) { + stmt_called.insert(fn_sym); + seed.insert(fn_sym); + } + } + } let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); - let closure = - expand_through_functions(&info.init_symbols, &fn_body_symbol_refs, &stmt_called); + let closure = expand_through_functions(&seed, &fn_body_symbol_refs, &stmt_called); for s in &closure { // Function symbols and unresolved refs have no // `symbol_to_stmt` entry — they can't be a variable @@ -489,12 +579,58 @@ pub fn collect_hoist_edits<'a>( // emitted *before* their dependents in the hoisted prelude. Within a // single bucket (same `insert_at`), this guarantees that e.g. `const // TOKEN` precedes `const PROVIDERS = [{ provide: TOKEN, ... }]`. + // + // Precompute `stmt_eager_sets`: per-S closure of + // `info.init_called_symbols` under `fn_body_called_symbols`. This is + // the same shape the cascade un-planning loop computes for its + // `stmt_called` set — passing it (instead of the global + // `combined_eagerly_called`) into `topological_order` makes the two + // passes reason against the same eager-evaluation set. The global + // union can over-expand: a function `make` eagerly called only by + // class B leaks into class A's `makeRef = make` closure when + // computing topo edges, forming a spurious edge that may invert + // ordering or trigger the cycle-break. See PR #302 Cursor Low review + // #3311962888. + // + // `stmt_fn_valued_bindings` (computed once near the top of this + // function) is consulted here too — see the doc comment on + // `topological_order` for why each statement's fn-valued binding + // symbols seed the topo edges. + let mut stmt_eager_sets: HashMap> = HashMap::with_capacity(plan.len()); + for &start in plan.keys() { + let Some(info) = stmt_info.get(&start) else { + stmt_eager_sets.insert(start, HashSet::new()); + continue; + }; + let mut stmt_called: HashSet = info.init_called_symbols.iter().copied().collect(); + let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); + close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); + // Function-valued bindings declared by this statement are part + // of THIS statement's eager-call surface when their initializer + // is invoked at module load (i.e. the binding's symbol is in + // some class's `eagerly_called`). Including them here lets + // `expand_through_functions` chase through the arrow/function + // body's refs to find dependency edges to other planned + // statements — without this the binding's body refs are invisible + // to the topo sort, mirroring the same issue Bug 1 fixed in the + // BFS pop branch. + if let Some(fn_syms) = stmt_fn_valued_bindings.get(&start) { + for &fn_sym in fn_syms { + if combined_eagerly_called.contains(&fn_sym) { + stmt_called.insert(fn_sym); + } + } + } + stmt_eager_sets.insert(start, stmt_called); + } + let order = topological_order( &plan, &symbol_to_stmt, &stmt_info, &fn_body_symbol_refs, - &combined_eagerly_called, + &stmt_eager_sets, + &stmt_fn_valued_bindings, ); // Step 4: emit edits. Group by `insert_at` so multiple statements headed @@ -539,14 +675,30 @@ pub fn collect_hoist_edits<'a>( /// ascending `stmt_start` so the result is deterministic. Cycles (which would /// require ill-formed source where two consts reference each other) are /// broken silently — they can't produce a valid evaluation order anyway. +/// +/// `stmt_eager_sets` is the per-planned-statement closure of +/// `init_called_symbols` under `fn_body_called_symbols`, matching the +/// shape the cascade un-planning loop uses. Passing per-S sets instead of +/// the global `combined_eagerly_called` keeps the cascade and topo +/// passes reasoning against the same eager-evaluation surface — see PR +/// #302 Cursor Low review #3311962888. +/// +/// `stmt_fn_valued_bindings` maps each planned `stmt_start` to the +/// function-valued binding symbols it declares (e.g. `make` for +/// `const make = () => TOKEN;`). Their `fn_body_symbol_refs` entries are +/// chased to surface body-ref dependencies that are invisible to the +/// statement's plain `init_symbols` — see PR #302 Codex P2 review +/// #3311913006. fn topological_order( plan: &HashMap, symbol_to_stmt: &HashMap, stmt_info: &HashMap, fn_body_symbol_refs: &HashMap>, - eagerly_called: &HashSet, + stmt_eager_sets: &HashMap>, + stmt_fn_valued_bindings: &HashMap>, ) -> Vec { let plan_starts: HashSet = plan.keys().copied().collect(); + let empty_eager: HashSet = HashSet::new(); // Adjacency list: stmt_start -> stmt_starts it depends on (must come // *before* it). Filter to only edges that land inside the plan; deps that @@ -555,19 +707,32 @@ fn topological_order( // // The "effective init symbols" of a planned statement are the transitive // closure of its direct `init_symbols` through `fn_body_symbol_refs`, - // **restricted to functions in `eagerly_called`**. If the initializer - // calls a function (directly or transitively), the function body's - // identifier reads count as references that fire when the hoisted - // statement evaluates. Functions only stored as values are NOT expanded - // — their bodies don't run at module load. See PR #302 review (Codex). + // **restricted to functions in this statement's per-S eager-call set**. + // If the initializer calls a function (directly or transitively), the + // function body's identifier reads count as references that fire when + // the hoisted statement evaluates. Functions only stored as values are + // NOT expanded — their bodies don't run at module load. See PR #302 + // review (Codex). + // + // Function-valued binding symbols this statement declares (e.g. `make` + // in `const make = () => TOKEN;`) are added to the seed so the + // expansion descends into their arrow/function bodies — those body + // refs are dependencies of THIS statement at runtime, but invisible + // to plain `init_symbols` because arrow bodies are lazy. let mut deps: HashMap> = HashMap::with_capacity(plan_starts.len()); for &start in &plan_starts { let Some(info) = stmt_info.get(&start) else { deps.insert(start, Vec::new()); continue; }; - let effective = - expand_through_functions(&info.init_symbols, fn_body_symbol_refs, eagerly_called); + let eager = stmt_eager_sets.get(&start).unwrap_or(&empty_eager); + let mut seed: HashSet = info.init_symbols.clone(); + if let Some(fn_syms) = stmt_fn_valued_bindings.get(&start) { + for &fn_sym in fn_syms { + seed.insert(fn_sym); + } + } + let effective = expand_through_functions(&seed, fn_body_symbol_refs, eager); let mut edges: Vec = effective .iter() .filter_map(|s| symbol_to_stmt.get(s)) diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index d2b770185..cff764346 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11649,3 +11649,272 @@ const make = () => TestComponent, other = 0; result.code ); } + +/// Codex P2 review #3311913006: a top-level `const make = () => DEP` +/// populates BOTH `symbol_to_stmt[make]` (binding) AND +/// `fn_body_symbol_refs[make]` (because `index_fn_valued_binding` indexes +/// arrow/function-valued bindings as if they were function declarations). +/// When the BFS pops `make` and `eagerly_called.contains(&make)` (because +/// decorator metadata called `make()`), the `if let Some(&stmt_start) = +/// symbol_to_stmt.get(&make)` branch fires first and plans `make`'s +/// statement — then the `else if eagerly_called.contains(&symbol)` body- +/// chase NEVER runs. Result: `TOKEN`, which `make`'s arrow body reads, is +/// never pushed onto the worklist and stays declared below the class. At +/// runtime, hoisted `makeProviders()` reads `TOKEN` in TDZ. +/// +/// Required: when the BFS pops a symbol that has BOTH a `symbol_to_stmt` +/// entry AND a `fn_body_symbol_refs` entry, AND is in `eagerly_called`, +/// the binding-planning branch must ALSO chase the function body refs — +/// the symbol acts as both a binding AND a function. +/// +/// Assert: `const TOKEN` appears before `const makeProviders` in output, +/// and `const makeProviders` appears before `class TestComponent` — the +/// chase must reach `TOKEN` so it gets hoisted too. +#[test] +fn component_eager_fn_valued_const_chases_body_refs() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: makeProviders() }) +class TestComponent {} +const makeProviders = () => [{ provide: TOKEN, useValue: 0 }]; +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let make_pos = result.code.find("const makeProviders").unwrap_or_else(|| { + panic!("Expected `const makeProviders` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < make_pos, + "`const TOKEN` (read inside `makeProviders`'s arrow body which is \ + eagerly invoked by decorator metadata) must be hoisted above \ + `const makeProviders`. token@{token_pos} make@{make_pos}\nCode:\n{}", + result.code + ); + assert!( + make_pos < class_pos, + "`const makeProviders` must be hoisted above `class TestComponent`. \ + make@{make_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Cursor Low review #3311962888: lock in symmetric per-stmt eager-call +/// reasoning between the cascade un-planning pass and `topological_order`. +/// The cascade was changed to compute a per-S `stmt_called` (closure of +/// `init_called_symbols` under `fn_body_called_symbols`); the topo sort +/// was still passing the global `combined_eagerly_called`. The asymmetry +/// can in principle create spurious dependency edges between planned +/// statements; in practice the cycle-break path is contrived. This test +/// is a regression guardrail: build a case where class A only references +/// `makeRef = make` as a value and class B eagerly calls `make()`. The +/// cascade decides A's hoist is safe; the topological sort must emit A's +/// statement in an order consistent with the cascade's view (i.e. not +/// reorder or drop it). +/// +/// Locks in symmetric per-stmt eager-call reasoning between cascade and +/// topological_order — see PR #302 Cursor review #3311962888. +#[test] +fn component_topo_uses_per_stmt_eager_set() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'a', template: '', providers: [{ provide: 'x', useFactory: makeRef }] }) +class A {} +@Component({ selector: 'b', template: '', providers: [make()] }) +class B {} +const makeRef = make; +function make() { return BACKREF; } +const BACKREF = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let make_ref_pos = result.code.find("const makeRef").unwrap_or_else(|| { + panic!("Expected `const makeRef` to be present.\nCode:\n{}", result.code) + }); + let a_pos = result + .code + .find("class A") + .unwrap_or_else(|| panic!("Expected `class A` to be present.\nCode:\n{}", result.code)); + + // The cascade pass already proves A is safe to hoist; symmetric topo + // must agree — `const makeRef` must precede `class A`. + assert!( + make_ref_pos < a_pos, + "`const makeRef = make;` must be hoisted above `class A` — A only \ + references `makeRef` as a value. The topological sort must reason \ + against the same per-stmt eager-call set the cascade used, so the \ + global `make` eager-call (from class B) doesn't introduce a \ + spurious edge that reorders A's hoist. makeRef@{make_ref_pos} \ + a@{a_pos}\nCode:\n{}", + result.code + ); + // Basic ordering invariant: a single `const makeRef` survives. + assert_eq!( + result.code.matches("const makeRef").count(), + 1, + "`const makeRef` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Follow-on adversarial finding (Round 6): a function-valued `const` +/// binding whose ARROW BODY reads a top-level class can escape BOTH the +/// safe-skip guard AND the cascade un-planning when the binding ITSELF +/// is eagerly called from a decorator. +/// +/// Trace: +/// - `decorator_called = {make}`. Per-class `eagerly_called = {make}`. +/// - BFS pops `make`. `symbol_to_stmt[make]` is present → enter the +/// binding branch. +/// - Safe-skip guard inspects `info.init_symbols` (refs in the +/// *initializer expression*). For `const make = () => TestComponent;`, +/// the initializer is an `ArrowFunctionExpression` — `collect_expr_symbols` +/// treats arrow bodies as lazy, so `init_symbols = {}` and +/// `init_called_symbols = {}`. Guard passes. +/// - Plan adds `const make = () => TestComponent;`. The Round-5 fix then +/// chases `fn_body_symbol_refs[make] = {TestComponent}`, pushing +/// `TestComponent` onto the worklist. BFS pops `TestComponent` — it's +/// a class, not a binding, not in `eagerly_called` — falls through. +/// +/// Result: `make` is hoisted above the class. At Ivy decorator-eval time, +/// hoisted `make()` reads `TestComponent` in TDZ → ReferenceError. +/// +/// Fix: the safe-skip guard must also include the body refs of every +/// fn-valued binding declared by this statement whose binding symbol is +/// in the per-class `eagerly_called` set — those body refs fire when the +/// binding is invoked at module load. +/// +/// Assert: `class TestComponent` precedes `const make` in the output — +/// `make`'s hoisting must be blocked because its body reads +/// `TestComponent`. +#[test] +fn component_eager_fn_valued_const_reading_class_blocks_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +const make = () => TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let make_pos = result + .code + .find("const make") + .unwrap_or_else(|| panic!("Expected `const make` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < make_pos, + "`class TestComponent` must precede `const make` — `make`'s arrow \ + body reads `TestComponent`, and the decorator eagerly invokes \ + `make()`. Hoisting `make` above the class introduces a fresh TDZ \ + on `TestComponent`. class@{class_pos} make@{make_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const make").count(), + 1, + "`const make` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Round 6 transitive variant: the cascade un-planning loop must also +/// consult fn-valued bindings' body refs. +/// +/// Trace: +/// - `@Component({ providers: make() }) class TestComponent {}`. +/// - `const make = () => BACKREF;` — guard passes (arrow body lazy), +/// `make` planned. +/// - Body chase pushes `BACKREF`. BFS pops `BACKREF`. Its stmt's +/// `init_symbols = {TestComponent}` → safe-skip blocks. `BACKREF` is +/// NOT planned. +/// - Cascade pass for `make`: `info.init_symbols = {}` (arrow body lazy), +/// so `expand_through_functions(init_symbols={}, …)` returns empty +/// closure. The cascade never sees that `make`'s body reads `BACKREF`, +/// which isn't planned → cascade doesn't drop `make`. +/// - Result: `make` is hoisted above the class, `BACKREF` stays below; +/// at runtime hoisted `make()` reads `BACKREF` in TDZ. +/// +/// Fix: the cascade un-planning loop's closure seed must include each +/// fn-valued binding's symbol (so `expand_through_functions` descends +/// into its body), gated by `combined_eagerly_called` — only when the +/// binding's symbol is actually eagerly invoked somewhere. +/// +/// Assert: `class TestComponent` precedes BOTH `const make` and +/// `const BACKREF` in the output — neither got hoisted. +#[test] +fn component_eager_fn_valued_const_transitive_class_ref_unplans() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +const make = () => BACKREF; +const BACKREF = TestComponent; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let make_pos = result + .code + .find("const make") + .unwrap_or_else(|| panic!("Expected `const make` to be present.\nCode:\n{}", result.code)); + let backref_pos = result.code.find("const BACKREF").unwrap_or_else(|| { + panic!("Expected `const BACKREF` to be present.\nCode:\n{}", result.code) + }); + + assert!( + class_pos < make_pos, + "`class TestComponent` must precede `const make` — `make`'s arrow \ + body reads `BACKREF` which transitively reads `TestComponent`. \ + Hoisting `make` introduces a TDZ. class@{class_pos} \ + make@{make_pos}\nCode:\n{}", + result.code + ); + assert!( + class_pos < backref_pos, + "`class TestComponent` must precede `const BACKREF` — `BACKREF` \ + directly reads `TestComponent`. The original guard already \ + blocks `BACKREF`'s hoist; this assertion locks that in. \ + class@{class_pos} backref@{backref_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const make").count(), + 1, + "`const make` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const BACKREF").count(), + 1, + "`const BACKREF` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 0de5f1cc8399103df073bcc6ed1f2d96da7eb5ea Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 09:12:46 +0800 Subject: [PATCH 12/21] fix(hoist): index class constructors and walk eager class-expr parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Codex P2 #3312108552 — top-level class declarations are now indexed into `fn_body_symbol_refs` / `fn_body_called_symbols` (with `include_constructor: true`) via a new `walk_class_eager_parts` helper. The helper covers the union of class-definition-time eager refs (super_class, computed keys, static field / accessor initializers, static blocks) and `new`-time eager refs (constructor body + parameter defaults, instance field / accessor initializers). The BFS body-chase branch then chases through `new ClassName()` callers in hoisted initializers — `const PROVIDERS = [new S()]` no longer hoists past `class TestComponent` when `S`'s constructor reads a later const. * Codex P2 #3312108558 — `collect_expr_symbols`'s previously-opaque `E::ClassExpression` arm now calls `walk_class_eager_parts` with `include_constructor: false`, so the eager parts of an inline class expression (super_class, computed keys, static initializers, static blocks) feed the dependency graph. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 168 +++++++++++++++++- .../tests/integration_test.rs | 154 ++++++++++++++++ 2 files changed, 316 insertions(+), 6 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 70a5b7205..4863ac0e1 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -35,9 +35,9 @@ use std::collections::{HashMap, HashSet}; use oxc_ast::ast::{ - Argument, ArrayExpressionElement, BindingPattern, ChainElement, Class, Declaration, Decorator, - ExportDefaultDeclarationKind, Expression, FormalParameters, IdentifierReference, - ObjectPropertyKind, Program, Statement, + Argument, ArrayExpressionElement, BindingPattern, ChainElement, Class, ClassElement, + Declaration, Decorator, ExportDefaultDeclarationKind, Expression, FormalParameters, + IdentifierReference, MethodDefinitionKind, ObjectPropertyKind, Program, Statement, }; use oxc_ast_visit::Visit; use oxc_semantic::Semantic; @@ -1108,6 +1108,42 @@ fn collect_top_level_bindings<'a>( fn_body_symbol_refs.insert(fn_symbol, refs); fn_body_called_symbols.insert(fn_symbol, called); } + continue; + } + + // Top-level class declarations. We don't *move* classes (they're + // gated separately and never added to `symbol_to_stmt`), but we + // index the eager parts of each class so the BFS can chase + // TDZ-relevant identifiers through `new ClassName()` callers in + // hoisted initializers. Mirrors the function-declaration arm above: + // `fn_body_symbol_refs[class_symbol]` receives every identifier read + // at class-definition time (super_class, computed keys, static + // initializers, static blocks) AND `new`-time (constructor body + + // params + instance field initializers); `fn_body_called_symbols` + // receives direct callees seen in those parts. + // + // The over-counting (definition-time eager merged with new-time + // eager) is intentional. The BFS only ever uses these maps to + // *block* hoisting that would introduce a fresh TDZ — never to + // greenlight one — so extending the body-ref set can only + // over-block, never under-block. See PR #302 Codex P2 review + // #3312108552. + if let Some((class, _)) = class_of(stmt) { + if let Some(id) = &class.id + && let Some(class_symbol) = id.symbol_id.get() + { + let mut refs: HashSet = HashSet::new(); + let mut called: HashSet = HashSet::new(); + walk_class_eager_parts( + class, + /* include_constructor */ true, + semantic, + &mut refs, + &mut called, + ); + fn_body_symbol_refs.insert(class_symbol, refs); + fn_body_called_symbols.insert(class_symbol, called); + } } } @@ -1200,6 +1236,110 @@ fn for_each_pattern_default<'a, 'src>( } } +/// Walk a class's eager parts and feed identifier refs / direct callees +/// into `out` / `called`. "Eager parts" depend on `include_constructor`: +/// +/// * Class-definition-time eager (always walked): `super_class` expression, +/// computed keys on any member, static field/accessor initializers, +/// static blocks. +/// * `new`-time eager (walked when `include_constructor` is true): +/// constructor body + constructor parameter defaults, instance field / +/// accessor initializers (these run inside the synthesized constructor). +/// +/// For a top-level *class declaration* indexed as if it were a "function" +/// in `fn_body_symbol_refs`, we want `include_constructor = true` — +/// `new ClassName()` triggers both definition-time AND constructor-time +/// eager evaluations. Over-counting the definition-time parts is fine; it +/// only over-blocks hoisting, never under-blocks. +/// +/// For a *class expression* embedded inside an eagerly-evaluated decorator +/// argument (Bug 2 of Codex P2 review #3312108558), the class expression +/// itself is being defined inline — so only the class-definition-time +/// eager parts fire here. Instance methods/fields/constructor bodies are +/// lazy until someone calls `new` on the class, which the metadata can't +/// see. Use `include_constructor = false`. +/// +/// Member decorators and the class's own decorators are skipped — decorator +/// factory invocation is special and out of scope. +fn walk_class_eager_parts<'a>( + class: &Class<'a>, + include_constructor: bool, + semantic: &Semantic<'a>, + out: &mut HashSet, + called: &mut HashSet, +) { + // `super_class` evaluates at class-definition time, before the body + // executes. Always walk it. + if let Some(super_expr) = &class.super_class { + collect_expr_symbols(super_expr, semantic, out, called); + } + for element in &class.body.body { + match element { + ClassElement::MethodDefinition(method) => { + // Computed keys fire at class-definition time regardless of + // method kind. + if method.computed + && let Some(key_expr) = method.key.as_expression() + { + collect_expr_symbols(key_expr, semantic, out, called); + } + // Constructor body + parameter defaults fire at `new`-time. + if include_constructor && method.kind == MethodDefinitionKind::Constructor { + if let Some(body) = &method.value.body { + let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + visitor.visit_function_body(body); + } + walk_param_defaults(&method.value.params, semantic, out, called); + } + // Non-constructor instance method bodies are lazy; static + // method bodies are also lazy (they're properties on the + // class object, executed when called). Skip both. + } + ClassElement::PropertyDefinition(prop) => { + if prop.computed + && let Some(key_expr) = prop.key.as_expression() + { + collect_expr_symbols(key_expr, semantic, out, called); + } + // Static field initializers fire at class-definition time. + // Instance field initializers fire at `new`-time inside the + // synthesized constructor. + if let Some(value) = &prop.value { + if prop.r#static { + collect_expr_symbols(value, semantic, out, called); + } else if include_constructor { + collect_expr_symbols(value, semantic, out, called); + } + } + } + ClassElement::AccessorProperty(accessor) => { + if accessor.computed + && let Some(key_expr) = accessor.key.as_expression() + { + collect_expr_symbols(key_expr, semantic, out, called); + } + if let Some(value) = &accessor.value { + if accessor.r#static { + collect_expr_symbols(value, semantic, out, called); + } else if include_constructor { + collect_expr_symbols(value, semantic, out, called); + } + } + } + ClassElement::StaticBlock(block) => { + // `static { … }` body runs once at class-definition time. + // Walk it like an eagerly-evaluated function body. + let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + for stmt in &block.body { + visitor.visit_statement(stmt); + } + } + // `TSIndexSignature` is type-only, erased at runtime. + ClassElement::TSIndexSignature(_) => {} + } + } +} + /// Walk every parameter default expression of a function/arrow's /// `FormalParameters` and feed the refs / direct callees into the same /// `out` / `called` sets the body visitor populates. Defaults are @@ -1540,9 +1680,25 @@ fn collect_expr_symbols<'a>( E::UpdateExpression(update) => { collect_simple_assignment_target_symbols(&update.argument, semantic, out, called); } - // Class expressions inside metadata are exceedingly rare and their - // bodies aren't eagerly evaluated; treat them as opaque. - E::ClassExpression(_) => {} + // Class expressions inside an eagerly-evaluated context. Several + // parts of a class expression fire at class-definition time and + // are TDZ-relevant: the `super_class` expression, computed keys + // on any member, static field / accessor initializers, and static + // blocks. Instance methods, instance fields, and the constructor + // body are lazy until someone calls `new` on the class — and the + // metadata can't see that call, so they stay opaque. + // + // Member decorators and the class expression's own decorators are + // skipped here. See PR #302 Codex P2 review #3312108558. + E::ClassExpression(class_expr) => { + walk_class_eager_parts( + class_expr.as_ref(), + /* include_constructor */ false, + semantic, + out, + called, + ); + } // Function and arrow bodies run lazily — references inside don't // affect class-init evaluation. E::ArrowFunctionExpression(_) | E::FunctionExpression(_) => {} diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index cff764346..fee9ddd5e 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11918,3 +11918,157 @@ const BACKREF = TestComponent; result.code ); } + +/// Codex P2 review #3312108552: top-level class declarations' constructor +/// bodies are NOT indexed into `fn_body_symbol_refs` / +/// `fn_body_called_symbols`. When a hoisted initializer eagerly invokes +/// `new ClassName()`, the constructor body runs at module load — and any +/// later-declared top-level binding it reads will TDZ-throw. +/// +/// Trace: +/// - `@Component({ providers: PROVIDERS }) class TestComponent {}` +/// - `class S { constructor() { TOKEN; } }` declared above. +/// - `const PROVIDERS = [new S()];` below the decorated class. +/// - `const TOKEN = 1;` below `PROVIDERS`. +/// +/// BFS pops `PROVIDERS`: `init_symbols = {S}`, `init_called_symbols = {S}` +/// (recorded by `record_direct_callee` on `new S()`). Without class +/// indexing, the closure of `init_called_symbols` under +/// `fn_body_called_symbols` stays `{S}` and `fn_body_symbol_refs.get(&S)` +/// is empty. Safe-skip guard passes. `PROVIDERS` is planned. BFS chases +/// `S` (transitive): not in `symbol_to_stmt`, not in `eagerly_called` +/// (since `S` is a class, not a function decl) → nothing happens. `TOKEN` +/// never enters the worklist; it stays below the class. At runtime, +/// hoisted `new S()` reads `TOKEN` in TDZ. +/// +/// Fix: index every top-level class declaration's constructor body (and +/// eager class parts) into `fn_body_symbol_refs` / `fn_body_called_symbols`. +/// Then `S` becomes `eagerly_called` once `PROVIDERS`'s +/// `init_called_symbols` is folded in, and the BFS chases the class +/// "body" refs (which include `TOKEN`). +/// +/// Assert: `const TOKEN` precedes `const PROVIDERS` AND `const PROVIDERS` +/// precedes `class TestComponent` — both transitively hoisted. +#[test] +fn component_eager_new_class_constructor_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +class S { constructor() { TOKEN; } } +@Component({ selector: 'x', template: '', providers: PROVIDERS }) +class TestComponent {} +const PROVIDERS = [new S()]; +const TOKEN = 1; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let providers_pos = result.code.find("const PROVIDERS").unwrap_or_else(|| { + panic!("Expected `const PROVIDERS` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < providers_pos, + "`const TOKEN` (read inside `class S`'s constructor body which is \ + eagerly invoked by `new S()` in `PROVIDERS`) must be hoisted above \ + `const PROVIDERS`. token@{token_pos} providers@{providers_pos}\nCode:\n{}", + result.code + ); + assert!( + providers_pos < class_pos, + "`const PROVIDERS` must be hoisted above `class TestComponent`. \ + providers@{providers_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const PROVIDERS").count(), + 1, + "`const PROVIDERS` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Codex P2 review #3312108558: `E::ClassExpression(_) => {}` in +/// `collect_expr_symbols` drops the eager parts of a class expression — +/// the `super_class` expression, computed keys, static field initializers, +/// and static blocks. Those fire when the class expression is *defined*, +/// not lazily when its methods run. +/// +/// Trace: +/// - `@Component({ providers: PROVIDERS }) class TestComponent {}` +/// - `const PROVIDERS = [class extends BASE {}];` +/// - `const BASE = class {};` +/// +/// Without the fix, `PROVIDERS`'s `init_symbols` is empty (class expr is +/// opaque), so `BASE` never enters the worklist. `PROVIDERS` is hoisted +/// above `TestComponent` but `BASE` stays below — at runtime, hoisted +/// `[class extends BASE {}]` evaluates and reads `BASE` in TDZ. +/// +/// Fix: walk `super_class`, computed keys on all members, static field +/// initializers, static accessor initializers, and static blocks. +/// +/// Assert: `const BASE` precedes `const PROVIDERS` AND `const PROVIDERS` +/// precedes `class TestComponent`. +#[test] +fn component_class_expr_super_class_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: PROVIDERS }) +class TestComponent {} +const PROVIDERS = [class extends BASE {}]; +const BASE = class {}; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let base_pos = result + .code + .find("const BASE") + .unwrap_or_else(|| panic!("Expected `const BASE` to be present.\nCode:\n{}", result.code)); + let providers_pos = result.code.find("const PROVIDERS").unwrap_or_else(|| { + panic!("Expected `const PROVIDERS` to be present.\nCode:\n{}", result.code) + }); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + base_pos < providers_pos, + "`const BASE` (read by `class extends BASE {{}}` inside `PROVIDERS`) \ + must be hoisted above `const PROVIDERS`. base@{base_pos} \ + providers@{providers_pos}\nCode:\n{}", + result.code + ); + assert!( + providers_pos < class_pos, + "`const PROVIDERS` must be hoisted above `class TestComponent`. \ + providers@{providers_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const BASE").count(), + 1, + "`const BASE` should appear exactly once.\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const PROVIDERS").count(), + 1, + "`const PROVIDERS` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 174c796c1d8598ed5b1629cd636286a60d1bc490 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 09:28:46 +0800 Subject: [PATCH 13/21] fix(hoist): walk classes inside fn bodies, branch callees, tag template tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Codex P2 #3314767088 — `FunctionBodyIdentVisitor::visit_class` was a no-op, dropping inline `class extends TOKEN {}` defined inside an eagerly-called function body. It now calls `walk_class_eager_parts` with `include_constructor: true` so super_class, computed keys, static initializers/blocks, and (conservatively) constructor body refs feed `fn_body_symbol_refs` / `fn_body_called_symbols`. * Codex P2 #3314767091 — `record_direct_callee` only recognised bare identifiers under parens / TS wrappers. Restructured as an iterative worklist that also descends into both branches of `ConditionalExpression` / `LogicalExpression` and the last expression of a `SequenceExpression`, so callees like `(cond ? makeA : makeB)()` and `(makeA || makeB)()` enter the eager-call set. * Cursor Low #3314770575 — `FunctionBodyIdentVisitor` gained `visit_tagged_template_expression`, recording the tag via the same direct/indirect/bind callee helpers as `visit_call_expression` so `tag\`…\`` inside a top-level function body chases the tag's body refs. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 115 ++++++++++++--- .../tests/integration_test.rs | 134 ++++++++++++++++++ 2 files changed, 227 insertions(+), 22 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 4863ac0e1..9aa09a928 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -1444,9 +1444,42 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { fn visit_arrow_function_expression(&mut self, _it: &oxc_ast::ast::ArrowFunctionExpression<'a>) { } - // Class expressions inside the body define methods that don't run at - // call time of the outer function. Skip. - fn visit_class(&mut self, _it: &Class<'a>) {} + // Class expressions inside an eagerly-called function body evaluate + // their eager parts (`super_class`, computed keys, static field / + // accessor initializers, static blocks) at call time of the outer + // function. The instance / static method bodies and instance field + // initializers are lazy until something `new`s the class — but if the + // surrounding code does call `new` here, the constructor body and + // parameter defaults fire too. Over-counting only over-blocks (it never + // under-blocks), so include the constructor to stay conservative. + // + // Regression for Codex P2 review #3314767088 on PR #302. + fn visit_class(&mut self, it: &Class<'a>) { + walk_class_eager_parts( + it, + /* include_constructor */ true, + self.semantic, + self.out, + self.called, + ); + } + + fn visit_tagged_template_expression( + &mut self, + it: &oxc_ast::ast::TaggedTemplateExpression<'a>, + ) { + // Mirror `visit_call_expression`: a tagged template `tag`...`` invokes + // `tag`, so its direct/indirect/bind callee shapes contribute to + // `called` just like a `CallExpression`. Without this override, the + // default walk reaches `tag` via `visit_identifier_reference` (which + // only feeds `out`), so the tag's body never gets chased through + // `eagerly_called`. Regression for Cursor Low review #3314770575 + // on PR #302. + record_direct_callee(&it.tag, self.semantic, self.called); + record_indirect_callee(&it.tag, self.semantic, self.called); + record_bind_callee(&it.tag, self.semantic, self.called); + oxc_ast_visit::walk::walk_tagged_template_expression(self, it); + } } /// Advance `end` past one trailing line terminator so that deleting the @@ -1884,33 +1917,71 @@ fn collect_assignment_target_property_symbols<'a>( } } -/// If `callee` is a *direct* identifier reference (peeling through -/// parentheses and TS type-only wrappers), record its symbol in `called`. -/// Member callees (`foo.bar()`) and other complex expressions are skipped -/// — only direct callees of `CallExpression`/`NewExpression` count as -/// eager invocations of a top-level function. +/// If `callee` resolves to one or more *direct* identifier references +/// (peeling through parentheses and TS type-only wrappers, and descending +/// into the branches of conditional / logical / sequence callees), record +/// each symbol in `called`. Member callees (`foo.bar()`) and other complex +/// expressions are skipped — only direct callees of +/// `CallExpression`/`NewExpression` count as eager invocations of a +/// top-level function. +/// +/// Conditional (`(cond ? a : b)()`), logical (`(a || b)()`), and sequence +/// (`(x, y, z)()`) callees are first-class shapes: either branch of the +/// conditional/logical may end up invoked, and the last expression in a +/// sequence is the result whose callee is invoked. The worklist below +/// pushes both branches of `?:` / `||`/`&&`/`??` and the LAST expression of +/// a sequence, with a `seen` guard so cycles or shared subtrees in the AST +/// don't loop forever (in practice each `Expression` node is unique, but +/// guarding by raw pointer is cheap insurance against quadratic blow-up on +/// pathological inputs). +/// +/// Regression for Codex P2 review #3314767091 on PR #302. fn record_direct_callee<'a>( callee: &Expression<'a>, semantic: &Semantic<'a>, called: &mut HashSet, ) { use Expression as E; - let mut cur = callee; - loop { - match cur { - E::Identifier(id) => { - if let Some(symbol) = resolve_symbol(id, semantic) { - called.insert(symbol); + let mut worklist: Vec<&Expression<'a>> = vec![callee]; + let mut seen: HashSet<*const Expression<'a>> = HashSet::new(); + while let Some(mut cur) = worklist.pop() { + loop { + let key = cur as *const Expression<'a>; + if !seen.insert(key) { + break; + } + match cur { + E::Identifier(id) => { + if let Some(symbol) = resolve_symbol(id, semantic) { + called.insert(symbol); + } + break; } - return; + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + E::ConditionalExpression(cond) => { + worklist.push(&cond.consequent); + worklist.push(&cond.alternate); + break; + } + E::LogicalExpression(log) => { + worklist.push(&log.left); + worklist.push(&log.right); + break; + } + E::SequenceExpression(seq) => { + // Only the last expression's value becomes the callee. + if let Some(last) = seq.expressions.last() { + worklist.push(last); + } + break; + } + _ => break, } - E::ParenthesizedExpression(p) => cur = &p.expression, - E::TSAsExpression(ts) => cur = &ts.expression, - E::TSSatisfiesExpression(ts) => cur = &ts.expression, - E::TSNonNullExpression(ts) => cur = &ts.expression, - E::TSTypeAssertion(ts) => cur = &ts.expression, - E::TSInstantiationExpression(ts) => cur = &ts.expression, - _ => return, } } } diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index fee9ddd5e..f5473c710 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -12072,3 +12072,137 @@ const BASE = class {}; result.code ); } + +/// `make()` is eagerly invoked by the decorator. Inside `make`'s body an +/// inline class expression `class extends TOKEN {}` evaluates eagerly, so the +/// `super_class` reference to `TOKEN` should flow into the eager-evaluation +/// set. `FunctionBodyIdentVisitor::visit_class` is a no-op which silently +/// drops these refs unless it walks the class's eager parts via +/// `walk_class_eager_parts`. +/// +/// Regression test for Codex P2 review #3314767088 on PR #302. +#[test] +fn component_eager_fn_body_inline_class_extends_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { return class extends TOKEN {}; } +const TOKEN = class {}; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read by `class extends TOKEN {{}}` inside the body of \ + an eagerly-called function) must be hoisted above the class to avoid \ + TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// `(cond ? makeA : makeB)()` invokes one of `makeA`/`makeB`. Both branches +/// can run, so `record_direct_callee` must descend into the consequent and +/// alternate of a `ConditionalExpression` callee and add both identifiers to +/// `called`. Without this, neither callee body is chased and `TOKEN` stays +/// declared below the class. +/// +/// Regression test for Codex P2 review #3314767091 on PR #302. +#[test] +fn component_eager_conditional_callee_chases_both_branches() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const cond = true; +@Component({ selector: 'x', template: '', providers: (cond ? makeA : makeB)() }) +class TestComponent {} +function makeA() { return TOKEN; } +function makeB() { return TOKEN; } +const TOKEN = 1; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside both branches of a conditional callee \ + `(cond ? makeA : makeB)()`) must be hoisted above the class to avoid \ + TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Inside an eagerly-called function `outer()`, a tagged template +/// `` tag`hello` `` invokes `tag`. `FunctionBodyIdentVisitor` must override +/// `visit_tagged_template_expression` and record the tag as a callee +/// (direct/indirect/bind) — otherwise the default walk adds `tag` to `out` +/// only, `tag` never enters `eagerly_called`, and the late `TOKEN` reference +/// inside `tag`'s body is never chased. +/// +/// Regression test for Cursor Low review #3314770575 on PR #302. +#[test] +fn component_eager_fn_body_tagged_template_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: outer() }) +class TestComponent {} +function outer() { return tag`hello`; } +function tag(_strings: TemplateStringsArray) { return TOKEN; } +const TOKEN = 1; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside the body of a tagged-template tag invoked \ + from an eagerly-called function) must be hoisted above the class to \ + avoid TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 730983c3c446ee00a877c0ffb723a3a52831dbea Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 09:39:06 +0800 Subject: [PATCH 14/21] fix(hoist): record indirect/bind callees on tagged-template tags `collect_expr_symbols::E::TaggedTemplateExpression` only called `record_direct_callee`, while the call/new arms and the body visitor's override already invoke all three callee helpers. Decorator metadata like `make.bind(null)\`...\`` or `make.call(this)\`...\`` therefore recorded `make` as a value reference but never as an eager callee, so its body wasn't chased and a later-declared binding it reads stayed below the class. Now mirrors call/new: direct + indirect + bind. Closes Cursor Low #3314809112 and Codex P2 #3314810080. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 9 ++++ .../tests/integration_test.rs | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 9aa09a928..4c5c1dd23 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -1662,7 +1662,16 @@ fn collect_expr_symbols<'a>( } } E::TaggedTemplateExpression(tagged) => { + // Mirror the call/new arms: a tagged template invokes the tag + // function eagerly, so direct, member-call (`fn.call`, `fn.apply`), + // and `fn.bind(...)` shapes must all enter `called`. Without the + // indirect/bind helpers here, `make.bind(null)\`...\`` in decorator + // metadata would record `make` as a value reference but never chase + // its body. Covers PR #302 Cursor Low #3314809112 / Codex P2 + // #3314810080. record_direct_callee(&tagged.tag, semantic, called); + record_indirect_callee(&tagged.tag, semantic, called); + record_bind_callee(&tagged.tag, semantic, called); collect_expr_symbols(&tagged.tag, semantic, out, called); for e in &tagged.quasi.expressions { collect_expr_symbols(e, semantic, out, called); diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index f5473c710..7909cd544 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -12206,3 +12206,45 @@ const TOKEN = 1; result.code ); } + +/// Decorator metadata uses a tagged template whose tag is produced by +/// `.bind` / `.call` / `.apply`. The tag function fires at class-definition +/// time, so its body refs must enter the eagerly-called closure — same +/// treatment `E::CallExpression` / `E::NewExpression` already get. Covers +/// both Cursor Low #3314809112 (consistency with call/new arms) and Codex +/// P2 #3314810080 (bound tagged-template callees) on PR #302. +#[test] +fn component_tagged_template_bind_tag_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make.bind(null)`hello` }) +class TestComponent {} +function make() { return [{ provide: TOKEN, useValue: 0 }]; } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside `make`'s body, called via a `.bind`-tagged \ + template in decorator metadata) must be hoisted above the class to avoid \ + TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From d542220ec54e054fdba97be24043e36d351d5c66 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 09:47:18 +0800 Subject: [PATCH 15/21] fix(hoist): chase fn-valued binding bodies for pre-class bindings too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The BFS branch for a `symbol_to_stmt` hit ran the fn-valued binding body-chase only AFTER the `stmt_start < class_body_end` early-continue, so a `const make = () => [{ provide: TOKEN }];` declared *before* the decorated class slipped past the chase entirely. `make`'s binding is indeed initialized at class-eval time, but the arrow body still fires when the decorator calls `make()` — and its later-declared reads stay in TDZ. Moved the body-chase block to BEFORE the early-continue so it runs regardless of stmt position; removed the now-duplicate post-plan copy. Closes Codex P2 #3314836115. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 66 ++++++++++--------- .../tests/integration_test.rs | 45 +++++++++++++ 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 4c5c1dd23..91c353314 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -229,6 +229,36 @@ pub fn collect_hoist_edits<'a>( } if let Some(&stmt_start) = symbol_to_stmt.get(&symbol) { let Some(info) = stmt_info.get(&stmt_start) else { continue }; + + // Body chase for eagerly-called fn-valued bindings runs + // BEFORE the pre-class early-continue. Even when the binding + // itself is declared *before* the class (so its binding is + // initialized when the class evaluates), the function body + // it stores still fires when the decorator calls it, and + // that body's later-declared reads are TDZ-relevant. Without + // this, `const make = () => [{ provide: TOKEN }]` followed + // by `@Component({providers: make()}) class C {}` and a + // later `const TOKEN` would skip the chase entirely because + // `make`'s stmt_start < class_body_end. Mirrors the post- + // plan body-chase below; deferred otherwise so a later eager- + // set promotion belatedly chases via the `now_eager` sweep. + // Regression for Codex P2 review #3314836115 on PR #302. + if fn_body_symbol_refs.contains_key(&symbol) { + if eagerly_called.contains(&symbol) { + if chased_fn_bodies.insert(symbol) + && let Some(body_refs) = fn_body_symbol_refs.get(&symbol) + { + for &s in body_refs { + if !visited.contains(&s) { + worklist.push(s); + } + } + } + } else { + deferred_fns.insert(symbol); + } + } + // Skip bindings declared *before* this class — they're // already initialized when the class evaluates. // `class_body_end` is the exclusive end of the class body @@ -373,37 +403,11 @@ pub fn collect_hoist_edits<'a>( } } - // Function-valued bindings act as BOTH a binding (planned - // above) AND a function (their body refs fire when called). - // `index_fn_valued_binding` populates `fn_body_symbol_refs` - // for `const make = () => …` / `const make = function … {}` - // shapes. If the decorator metadata called `make()` (or a - // hoisted initializer's `init_called_symbols` closure - // promoted `make` into `eagerly_called`), the body refs - // ALSO need chasing — otherwise the arrow body's reads - // (e.g. `TOKEN` inside `() => [{ provide: TOKEN, … }]`) - // never enter the worklist and stay declared below the - // class, throwing TDZ when the hoisted `make()` runs at - // module load. Mirror the `else if eagerly_called` branch - // below; defer otherwise so a later eager-set promotion - // belatedly chases the body via the existing now_eager - // sweep at the top of this match arm. See PR #302 Codex P2 - // review #3311913006. - if fn_body_symbol_refs.contains_key(&symbol) { - if eagerly_called.contains(&symbol) { - if chased_fn_bodies.insert(symbol) { - if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { - for &s in body_refs { - if !visited.contains(&s) { - worklist.push(s); - } - } - } - } - } else { - deferred_fns.insert(symbol); - } - } + // The fn-valued-binding body chase used to live here, after + // the plan/transitive logic, but it must run for pre-class + // bindings too — moved to the top of this branch above the + // `stmt_start < class_body_end` early-continue. Covers PR + // #302 Codex P2 reviews #3311913006 and #3314836115. } else if eagerly_called.contains(&symbol) { // The symbol resolves to a top-level function declaration // that is *actually called* (transitively) at module load diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 7909cd544..7d52c2581 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -12248,3 +12248,48 @@ const TOKEN = 'tok'; result.code ); } + +/// An eagerly-called function-valued binding declared *before* the +/// decorated class is itself already initialized — but the function body +/// it stores still fires when the decorator calls it, and that body's +/// later-declared reads (`TOKEN` below) are TDZ-relevant. The BFS used +/// to skip the body chase entirely when the binding's stmt_start was +/// before the class's body end, leaving `TOKEN` unhoisted and the +/// emitted Ivy definition throwing at module load. +/// +/// Regression test for Codex P2 review #3314836115 on PR #302. +#[test] +fn component_pre_class_fn_valued_binding_chases_body_refs() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const make = () => [{ provide: TOKEN, useValue: 0 }]; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside the body of a pre-class fn-valued binding \ + called by the decorator) must be hoisted above the class to avoid TDZ. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From b4934906fb2e9f31bd59a9558112e904390c6b3d Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 09:47:58 +0800 Subject: [PATCH 16/21] chore(hoist): drop stale 'moved from' comment in BFS Co-Authored-By: Claude Opus 4.7 --- crates/oxc_angular_compiler/src/component/hoist.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index 91c353314..a195d8564 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -402,12 +402,6 @@ pub fn collect_hoist_edits<'a>( worklist.push(s); } } - - // The fn-valued-binding body chase used to live here, after - // the plan/transitive logic, but it must run for pre-class - // bindings too — moved to the top of this branch above the - // `stmt_start < class_body_end` early-continue. Covers PR - // #302 Codex P2 reviews #3311913006 and #3314836115. } else if eagerly_called.contains(&symbol) { // The symbol resolves to a top-level function declaration // that is *actually called* (transitively) at module load From 73482730d9f74231d5c36889eadc3b69a821750a Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 10:02:52 +0800 Subject: [PATCH 17/21] chore(hoist): strip PR/issue bookkeeping references from comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes trailing `See PR #302 ...`, `Regression for/test for ... review #...`, `Round N follow-on review`, `Codex P2/P3 review Finding N`, etc. — the references that belong in PR descriptions and rot as the codebase moves. Substantive technical justifications kept; only the bookkeeping tails removed. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 84 ++++++------------ .../tests/integration_test.rs | 86 +++++-------------- 2 files changed, 45 insertions(+), 125 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index a195d8564..ec1ff983d 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -128,8 +128,7 @@ pub fn collect_hoist_edits<'a>( // its reads become TDZ-relevant to that statement. // // Computed once here so the BFS safe-skip guard, the cascade un-planning - // pass, and the topological sort can all consult the same map. See - // PR #302 Round 6 follow-on review. + // pass, and the topological sort can all consult the same map. let mut stmt_fn_valued_bindings: HashMap> = HashMap::new(); for (&sym, &stmt_start) in &symbol_to_stmt { if fn_body_symbol_refs.contains_key(&sym) { @@ -141,7 +140,6 @@ pub fn collect_hoist_edits<'a>( // the class's `span.start`. Used by the BFS to refuse hoisting any // statement whose initializer references a class that lives at-or-after // the protect site — see the safe-skip guard near `plan.entry(...)`. - // Regression for Codex review #3310709319 on PR #302. let top_level_class_positions = collect_top_level_class_positionss(program); // Step 2a: gather per-class decorator-metadata symbols (both the full @@ -157,8 +155,7 @@ pub fn collect_hoist_edits<'a>( // (`useFactory: foo`), foo's body does NOT fire when this class // evaluates — and chasing TOKEN would invent a new TDZ on the class // (when `TOKEN = TestComponent`). A global `eagerly_called` (seeded - // from every module-init call site) over-reaches across classes. See - // PR #302 review (Cursor #3310734461). + // from every module-init call site) over-reaches across classes. let mut classes: Vec<(&Class<'a>, u32, HashSet, HashSet)> = Vec::new(); for stmt in &program.body { let Some((class, stmt_start_pos)) = class_of(stmt) else { continue }; @@ -184,7 +181,7 @@ pub fn collect_hoist_edits<'a>( // against the nondeterministic dedup bug where, with `const A = 1, B = 2;` // referenced by two different classes, the surviving entry's `insert_at` // depended on HashMap iteration order and could land *after* the earlier - // class. See PR #302 review. + // class. let mut plan: HashMap = HashMap::new(); // Union of per-class `eagerly_called` sets for all classes that // contributed to the plan. The topological sort's edge expansion @@ -242,7 +239,6 @@ pub fn collect_hoist_edits<'a>( // `make`'s stmt_start < class_body_end. Mirrors the post- // plan body-chase below; deferred otherwise so a later eager- // set promotion belatedly chases via the `now_eager` sweep. - // Regression for Codex P2 review #3314836115 on PR #302. if fn_body_symbol_refs.contains_key(&symbol) { if eagerly_called.contains(&symbol) { if chased_fn_bodies.insert(symbol) @@ -300,8 +296,7 @@ pub fn collect_hoist_edits<'a>( // // The check uses `>=`: a class declared at exactly // `effective_start` is itself the class we're protecting - // — definitely blocking. Regression for Codex review - // #3310709319 and Codex P2 review #3311493528 on PR #302. + // — definitely blocking. let mut stmt_called: HashSet = info.init_called_symbols.iter().copied().collect(); // Fold in any fn-valued binding declared by this statement @@ -315,7 +310,6 @@ pub fn collect_hoist_edits<'a>( // by `providers: make()` would slip past the guard because // the initializer's plain `init_symbols` / `init_called_symbols` // are empty (arrow bodies are lazy in `collect_expr_symbols`). - // See PR #302 Round 6 follow-on review. if let Some(fn_syms) = stmt_fn_valued_bindings.get(&stmt_start) { for &fn_sym in fn_syms { if eagerly_called.contains(&fn_sym) { @@ -396,7 +390,6 @@ pub fn collect_hoist_edits<'a>( // provide: TOKEN, ... }]; const TOKEN = ...;` moves // `PROVIDERS` but leaves `TOKEN` below, so module evaluation // now throws inside the hoisted `PROVIDERS` initializer. - // See PR #302 review. for &s in &info.init_symbols { if !visited.contains(&s) { worklist.push(s); @@ -408,7 +401,6 @@ pub fn collect_hoist_edits<'a>( // *for this class*. Don't hoist the function itself (JS // already hoists fn decls), but its body's identifier // reads fire whenever it runs. Chase those references. - // See PR #302 review (Codex). if chased_fn_bodies.insert(symbol) { if let Some(body_refs) = fn_body_symbol_refs.get(&symbol) { for &s in body_refs { @@ -422,7 +414,7 @@ pub fn collect_hoist_edits<'a>( // Top-level function not (yet) in eagerly_called for this // class. Defer — if a later visit promotes it (because some // planned binding's initializer calls it), we'll belatedly - // chase its body. See PR #302 review (Cursor). + // chase its body. deferred_fns.insert(symbol); } } @@ -443,7 +435,7 @@ pub fn collect_hoist_edits<'a>( // by chasing through `fn_body_symbol_refs` may itself get guard-skipped // when the BFS later pops it — leaving S planned with a missing dep. // - // Example (Finding 1 of Codex P2 review on PR #302): + // Example: // // class TestComponent { ... } // ← class C // var TOKEN = make(); // ← S: passes guard @@ -478,7 +470,7 @@ pub fn collect_hoist_edits<'a>( let mut to_remove: Vec = Vec::new(); for (&start, entry) in &plan { let Some(info) = stmt_info.get(&start) else { continue }; - // Finding 2 (Codex P3 review): use a *per-S* eager-call set — + // Use a *per-S* eager-call set — // the closure of THIS statement's `init_called_symbols` under // `fn_body_called_symbols` — instead of the global // `combined_eagerly_called`. The global union over-expands: if @@ -501,8 +493,7 @@ pub fn collect_hoist_edits<'a>( // for a class that calls `make()` would not see its body's // dependency on `BACKREF` (whose own statement was guard- // skipped), so the cascade would fail to drop `make` and the - // hoisted `make()` would TDZ on `BACKREF`. See PR #302 Round 6 - // follow-on review. + // hoisted `make()` would TDZ on `BACKREF`. let mut seed: HashSet = info.init_symbols.clone(); if let Some(fn_syms) = stmt_fn_valued_bindings.get(&start) { for &fn_sym in fn_syms { @@ -526,7 +517,7 @@ pub fn collect_hoist_edits<'a>( continue; } // Dep is in the plan — only safe if its `insert_at` is at - // or before S's `insert_at`. Finding 1 (Codex P3 review): + // or before S's `insert_at`: // two planned statements can target *different* `insert_at` // positions (one per decorated class). If S targets // `insert_at = pos_C1` and its dep `D` is planned only for @@ -587,8 +578,7 @@ pub fn collect_hoist_edits<'a>( // union can over-expand: a function `make` eagerly called only by // class B leaks into class A's `makeRef = make` closure when // computing topo edges, forming a spurious edge that may invert - // ordering or trigger the cycle-break. See PR #302 Cursor Low review - // #3311962888. + // ordering or trigger the cycle-break. // // `stmt_fn_valued_bindings` (computed once near the top of this // function) is consulted here too — see the doc comment on @@ -678,15 +668,13 @@ pub fn collect_hoist_edits<'a>( /// `init_called_symbols` under `fn_body_called_symbols`, matching the /// shape the cascade un-planning loop uses. Passing per-S sets instead of /// the global `combined_eagerly_called` keeps the cascade and topo -/// passes reasoning against the same eager-evaluation surface — see PR -/// #302 Cursor Low review #3311962888. +/// passes reasoning against the same eager-evaluation surface. /// /// `stmt_fn_valued_bindings` maps each planned `stmt_start` to the /// function-valued binding symbols it declares (e.g. `make` for /// `const make = () => TOKEN;`). Their `fn_body_symbol_refs` entries are /// chased to surface body-ref dependencies that are invisible to the -/// statement's plain `init_symbols` — see PR #302 Codex P2 review -/// #3311913006. +/// statement's plain `init_symbols`. fn topological_order( plan: &HashMap, symbol_to_stmt: &HashMap, @@ -709,8 +697,7 @@ fn topological_order( // If the initializer calls a function (directly or transitively), the // function body's identifier reads count as references that fire when // the hoisted statement evaluates. Functions only stored as values are - // NOT expanded — their bodies don't run at module load. See PR #302 - // review (Codex). + // NOT expanded — their bodies don't run at module load. // // Function-valued binding symbols this statement declares (e.g. `make` // in `const make = () => TOKEN;`) are added to the seed so the @@ -831,8 +818,7 @@ fn expand_through_functions( /// /// Per-class scoping: the seed is THIS class's call graph only. A function /// invoked elsewhere in the module but only referenced as a value in this -/// class's metadata does not enter this class's set. See PR #302 review -/// (Cursor #3310734461). +/// class's metadata does not enter this class's set. fn close_eagerly_called( eagerly_called: &mut HashSet, worklist: &mut Vec, @@ -1013,8 +999,7 @@ fn collect_top_level_bindings<'a>( // patterned bindings are skipped — the function-value shape // only appears with a plain identifier binding in practice. // Run this BEFORE collecting `init_symbols` so the indexing - // happens before the normal binding/init flow. See PR #302 - // Codex P2 Finding 3. + // happens before the normal binding/init flow. if let (BindingPattern::BindingIdentifier(id), Some(init)) = (&declarator.id, &declarator.init) && let Some(fn_symbol) = id.symbol_id.get() @@ -1033,8 +1018,7 @@ fn collect_top_level_bindings<'a>( // = arr;`, `const { a: { b } } = obj;`, …) also index every // binding identifier they introduce. Without this, decorator // metadata referencing such a binding never resolves to its - // declaring statement and the hoist is skipped. See PR #302 - // Codex review. + // declaring statement and the hoist is skipped. for_each_binding_identifier(&declarator.id, &mut |id| { if let Some(symbol_id) = id.symbol_id.get() { symbol_to_stmt.insert(symbol_id, stmt_start); @@ -1055,8 +1039,7 @@ fn collect_top_level_bindings<'a>( // declared top-level binding referenced by a default is // TDZ-relevant exactly like the `init` itself. Walk every // nested `AssignmentPattern::right` in the binding pattern - // and feed its refs into the statement's eager sets. See - // PR #302 Codex review #3311274924. + // and feed its refs into the statement's eager sets. for_each_pattern_default(&declarator.id, &mut |expr| { collect_expr_symbols( expr, @@ -1124,8 +1107,7 @@ fn collect_top_level_bindings<'a>( // eager) is intentional. The BFS only ever uses these maps to // *block* hoisting that would introduce a fresh TDZ — never to // greenlight one — so extending the body-ref set can only - // over-block, never under-block. See PR #302 Codex P2 review - // #3312108552. + // over-block, never under-block. if let Some((class, _)) = class_of(stmt) { if let Some(id) = &class.id && let Some(class_symbol) = id.symbol_id.get() @@ -1251,7 +1233,7 @@ fn for_each_pattern_default<'a, 'src>( /// only over-blocks hoisting, never under-blocks. /// /// For a *class expression* embedded inside an eagerly-evaluated decorator -/// argument (Bug 2 of Codex P2 review #3312108558), the class expression +/// argument, the class expression /// itself is being defined inline — so only the class-definition-time /// eager parts fire here. Instance methods/fields/constructor bodies are /// lazy until someone calls `new` on the class, which the metadata can't @@ -1350,8 +1332,6 @@ fn walk_class_eager_parts<'a>( /// * `AssignmentPattern.right` nested anywhere inside the parameter's /// `BindingPattern` (e.g. the inner `= X` in /// `function f({ a = X } = {})`). -/// -/// See PR #302 Codex review (#3311099883). fn walk_param_defaults<'a>( params: &FormalParameters<'a>, semantic: &Semantic<'a>, @@ -1402,7 +1382,6 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { // the eager-evaluation set. Without this, `visit_arrow_function` // / `visit_function` (intentional no-ops below) would silently drop // the IIFE body inside an eagerly-called function — TDZ regression. - // See PR #302 Cursor review #3311313158. if walk_iife_callee_body(&it.callee, self.semantic, self.out, self.called) { // Body handled; only the arguments still need to flow into // `self.out` / `self.called`. @@ -1450,8 +1429,6 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { // surrounding code does call `new` here, the constructor body and // parameter defaults fire too. Over-counting only over-blocks (it never // under-blocks), so include the constructor to stay conservative. - // - // Regression for Codex P2 review #3314767088 on PR #302. fn visit_class(&mut self, it: &Class<'a>) { walk_class_eager_parts( it, @@ -1471,8 +1448,7 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { // `called` just like a `CallExpression`. Without this override, the // default walk reaches `tag` via `visit_identifier_reference` (which // only feeds `out`), so the tag's body never gets chased through - // `eagerly_called`. Regression for Cursor Low review #3314770575 - // on PR #302. + // `eagerly_called`. record_direct_callee(&it.tag, self.semantic, self.called); record_indirect_callee(&it.tag, self.semantic, self.called); record_bind_callee(&it.tag, self.semantic, self.called); @@ -1588,8 +1564,6 @@ fn collect_expr_symbols<'a>( // default `ArrowFunctionExpression` / `FunctionExpression` // arms below treat bodies as lazy; for IIFEs we walk the body // explicitly via `FunctionBodyIdentVisitor` instead. - // - // Regression for Codex review #3310709326 on PR #302. if !walk_iife_callee_body(&call.callee, semantic, out, called) { collect_expr_symbols(&call.callee, semantic, out, called); } @@ -1665,8 +1639,7 @@ fn collect_expr_symbols<'a>( // and `fn.bind(...)` shapes must all enter `called`. Without the // indirect/bind helpers here, `make.bind(null)\`...\`` in decorator // metadata would record `make` as a value reference but never chase - // its body. Covers PR #302 Cursor Low #3314809112 / Codex P2 - // #3314810080. + // its body. record_direct_callee(&tagged.tag, semantic, called); record_indirect_callee(&tagged.tag, semantic, called); record_bind_callee(&tagged.tag, semantic, called); @@ -1709,8 +1682,7 @@ fn collect_expr_symbols<'a>( // time. The `right` is a regular expression; the `left` is an // `AssignmentTarget` (bare identifier, member, or pattern-shaped) // walked via the dedicated helper. Without this, decorator metadata - // shaped `providers: [(cached = TOKEN)]` silently dropped `TOKEN` - // — Cursor Low review #3311551145 on PR #302. + // shaped `providers: [(cached = TOKEN)]` silently dropped `TOKEN`. E::AssignmentExpression(assign) => { collect_expr_symbols(&assign.right, semantic, out, called); collect_assignment_target_symbols(&assign.left, semantic, out, called); @@ -1729,7 +1701,7 @@ fn collect_expr_symbols<'a>( // metadata can't see that call, so they stay opaque. // // Member decorators and the class expression's own decorators are - // skipped here. See PR #302 Codex P2 review #3312108558. + // skipped here. E::ClassExpression(class_expr) => { walk_class_eager_parts( class_expr.as_ref(), @@ -1941,8 +1913,6 @@ fn collect_assignment_target_property_symbols<'a>( /// don't loop forever (in practice each `Expression` node is unique, but /// guarding by raw pointer is cheap insurance against quadratic blow-up on /// pathological inputs). -/// -/// Regression for Codex P2 review #3314767091 on PR #302. fn record_direct_callee<'a>( callee: &Expression<'a>, semantic: &Semantic<'a>, @@ -2011,7 +1981,6 @@ fn record_direct_callee<'a>( /// /// Used alongside [`record_direct_callee`] at every call/new site so the /// guard's `init_called_symbols` reflects the actual eager-invocation set. -/// Regression for Codex P2 review (Finding 3) on PR #302. fn record_indirect_callee<'a>( callee: &Expression<'a>, semantic: &Semantic<'a>, @@ -2097,8 +2066,7 @@ fn record_bind_callee<'a>( /// into both. Returns `true` when indexing happened. /// /// This makes `const make = () => DEP` visible to the BFS safe-skip guard -/// the same way `function make() { return DEP; }` is. See PR #302 Codex -/// P2 (Finding 2). +/// the same way `function make() { return DEP; }` is. fn index_fn_valued_binding<'a>( init: &Expression<'a>, fn_symbol: SymbolId, @@ -2155,8 +2123,6 @@ fn index_fn_valued_binding<'a>( /// Returns `false` when the callee is not a function/arrow expression; the /// caller then falls through to the normal `collect_expr_symbols` descent /// (which is a no-op for these node kinds anyway, but still correct). -/// -/// Regression for Codex review #3310709326 on PR #302. fn walk_iife_callee_body<'a>( callee: &Expression<'a>, semantic: &Semantic<'a>, @@ -2172,7 +2138,7 @@ fn walk_iife_callee_body<'a>( visitor.visit_function_body(&arrow.body); // Parameter defaults evaluate at IIFE invocation time, before // the body runs — symmetric with top-level function decls - // in `collect_top_level_bindings`. See PR #302 Codex P2. + // in `collect_top_level_bindings`. walk_param_defaults(&arrow.params, semantic, out, called); return true; } diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 7d52c2581..6739bb899 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -5769,7 +5769,7 @@ fn transform_to_r3_nodes(template: &str) -> (std::vec::Vec, std::vec::Ve #[test] fn test_for_block_no_expression_returns_none() { - // Finding 2: @for with no expression should return None (no ForLoopBlock node), + // @for with no expression should return None (no ForLoopBlock node), // matching Angular's behavior where parseForLoopParameters returns null. let (errors, has_for_block) = transform_to_r3("@for {
}"); assert!( @@ -5781,7 +5781,7 @@ fn test_for_block_no_expression_returns_none() { #[test] fn test_for_block_missing_track_returns_none() { - // Finding 2: @for with valid expression but missing track should return None, + // @for with valid expression but missing track should return None, // matching Angular's behavior (params.trackBy === null → node stays null). let (errors, has_for_block) = transform_to_r3("@for (item of items) {
}"); assert!( @@ -5796,7 +5796,7 @@ fn test_for_block_missing_track_returns_none() { #[test] fn test_if_block_no_expression_skips_main_branch() { - // Finding 3: @if with no parameters should not push a main branch, + // @if with no parameters should not push a main branch, // matching Angular where parseConditionalBlockParameters returns null. let (errors, node_types) = transform_to_r3_nodes("@if {
}"); // The IfBlock should have 0 branches (main branch skipped) @@ -6190,7 +6190,7 @@ export class TestComponent { /// a dynamic value, the compiler extracts a pure function constant (e.g., `_c0`). /// This constant must be emitted in the output — not silently dropped. /// -/// Regression test for: host binding pool constants not being emitted in +/// Guards against host binding pool constants not being emitted in /// compile_template_to_js_with_options path. #[test] fn test_host_binding_pure_function_declarations_emitted() { @@ -10557,7 +10557,7 @@ export class TestComponent {} assert!(token_pos < class_pos, "Order should be preserved.\nCode:\n{}", result.code); } -/// Reproducer for PR #302 review feedback: when two bindings from the *same* +/// When two bindings from the *same* /// multi-declarator statement (`const A = 1, B = 2;`) are referenced by /// different decorated classes, the hoist plan keys entries by binding name, /// producing two `HoistEntry` values that share the same `stmt_start` but @@ -10567,7 +10567,7 @@ export class TestComponent {} /// earliest referencing class. That leaves the earlier class still inside the /// TDZ of the hoisted statement. /// -/// Scenario from the Codex review: +/// Scenario: /// * `class A` (decorated) references `B`. /// * `class C` (decorated) references `A`. /// * Both classes are declared *before* `const A = 1, B = 2;`. @@ -10634,7 +10634,7 @@ const Aval = 1, Bval = 2; ); } -/// Regression for transitive TDZ deps: when decorator metadata references an +/// Guards transitive TDZ deps: when decorator metadata references an /// aggregate binding (e.g. `providers: PROVIDERS`) and that aggregate's /// initializer transitively references *another* later-declared top-level /// binding (`TOKEN`), the hoister must pull both bindings above the class. @@ -10712,8 +10712,6 @@ const TOKEN = 'tok'; /// also pull `TOKEN` above the class — otherwise the hoisted `PROVIDERS` /// initializer invokes `makeProviders()` before `TOKEN` is initialized and /// throws `ReferenceError: Cannot access 'TOKEN' before initialization`. -/// -/// Regression test for Codex bot review on PR #302 (line 340 of hoist.rs). #[test] fn component_provider_const_via_function_call_pulls_in_transitive_tdz_dep() { let allocator = Allocator::default(); @@ -10784,8 +10782,6 @@ function makeProviders() { return [{ provide: TOKEN }]; } /// after the class body and is still in the TDZ when the class's static /// fields evaluate. The hoist must move it; using `<=` for the /// "before-class" check accidentally skips this boundary case. -/// -/// Regression test for Cursor bot review on PR #302 (line 124 of hoist.rs). #[test] fn component_provider_const_immediately_after_class_brace_is_hoisted() { let allocator = Allocator::default(); @@ -10825,9 +10821,6 @@ export class TestComponent {}const TOKEN = 'tok';\n"; /// So later-declared bindings reachable only through that function's body /// must NOT be hoisted. Hoisting them would create a NEW TDZ that didn't /// exist in the original source. -/// -/// Regression test for PR #302 Codex review: BFS function-body chasing -/// branch must only fire when the function is eagerly called. #[test] fn component_provider_useFactory_function_value_does_not_hoist_body_deps() { let allocator = Allocator::default(); @@ -10867,9 +10860,6 @@ const TOKEN = TestComponent; /// `Expression::ChainExpression` (optional chaining, `TOKEN?.id` or `f?.()`) /// must contribute identifier references to the decorator-metadata symbol /// scan, so that the referenced top-level binding gets hoisted. -/// -/// Regression test for PR #302 Codex review: the catch-all `_ => {}` arm -/// in `collect_expr_symbols` was silently dropping `ChainExpression`. #[test] fn component_provider_optional_chain_token_is_hoisted() { let allocator = Allocator::default(); @@ -10906,10 +10896,6 @@ const TOKEN = { id: 'tok' }; /// Top-level destructuring patterns must be indexed: `const { TOKEN } = X;` /// binds `TOKEN`, and decorator metadata referencing `TOKEN` must hoist that /// declaration above the class. -/// -/// Regression test for PR #302 Codex review: `collect_top_level_bindings` -/// only handled `BindingPattern::BindingIdentifier`, ignoring object/array -/// destructuring patterns entirely. #[test] fn component_provider_destructured_top_level_token_is_hoisted() { let allocator = Allocator::default(); @@ -10953,8 +10939,6 @@ const { TOKEN } = TOKENS; /// The safe-skip guard refuses to hoist a statement when any of its /// initializer symbols resolves to a top-level class declared at position /// `>= effective_start` of the class being protected. -/// -/// Regression test for Codex review #3310709319 on PR #302. #[test] fn component_provider_multi_declarator_with_class_self_ref_skips_hoist() { let allocator = Allocator::default(); @@ -10997,8 +10981,6 @@ const TOKEN = 'tok', BACKREF = TestComponent; /// the arrow body must be treated as eager. The general lazy-bodies rule /// (skip arrow/function bodies) doesn't apply when the function is its own /// callee. -/// -/// Regression test for Codex review #3310709326 on PR #302. #[test] fn component_provider_iife_metadata_hoists_inner_token() { let allocator = Allocator::default(); @@ -11043,8 +11025,6 @@ const TOKEN = 'tok'; /// /// Per-class eagerly_called scoping (seeded only from THIS class's /// `decorator_called`) prevents this leak. -/// -/// Regression test for Cursor review #3310734461 on PR #302. #[test] fn component_provider_useFactory_value_does_not_chase_global_eager_caller() { let allocator = Allocator::default(); @@ -11094,8 +11074,6 @@ const TOKEN = TestComponent; /// is left below the class and the hoisted `const PROVIDERS = makeProviders()` /// throws `ReferenceError: Cannot access 'TOKEN' before initialization` when /// the parameter default fires. -/// -/// Regression test for Codex P2 review on PR #302. #[test] fn component_provider_eager_call_chases_param_default_refs() { let allocator = Allocator::default(); @@ -11163,8 +11141,6 @@ const TOKEN = 'tok'; /// `FALLBACK` stays below the class and the hoisted destructuring throws /// `ReferenceError: Cannot access 'FALLBACK' before initialization` at /// runtime. -/// -/// Regression test for Codex P2 review #3311274924 on PR #302. #[test] fn component_destructured_default_chases_late_const() { let allocator = Allocator::default(); @@ -11227,8 +11203,6 @@ const FALLBACK = 'tok'; /// IIFE callee bodies the same way `collect_expr_symbols` does, or `TOKEN` /// is left below the class and the eagerly-called function throws at module /// init. -/// -/// Regression test for Cursor review #3311313158 on PR #302. #[test] fn component_eager_fn_body_iife_chases_late_const() { let allocator = Allocator::default(); @@ -11275,8 +11249,6 @@ const TOKEN = 'tok'; /// The user's existing TDZ on `TOKEN` is NOT our problem to fix — we must /// just not introduce a NEW class TDZ. So we only assert that `class /// TestComponent` still precedes `var TOKEN`. -/// -/// Regression test for Codex P2 review #3311493528 on PR #302. #[test] fn component_eager_fn_body_class_ref_blocks_hoist() { let allocator = Allocator::default(); @@ -11318,8 +11290,6 @@ function make() { return TestComponent; } /// `collect_expr_symbols` walker must not silently drop these — otherwise /// `TOKEN` never enters the BFS and stays declared below the class, while /// the class's emitted Ivy definition reads `TOKEN` eagerly. -/// -/// Regression test for Cursor Low review #3311551145 on PR #302. #[test] fn component_assignment_expression_chases_late_const() { let allocator = Allocator::default(); @@ -11356,7 +11326,7 @@ const TOKEN = 'tok'; ); } -/// Finding 1: transitive dependency cascade. The BFS pops `TOKEN` whose +/// Transitive dependency cascade. The BFS pops `TOKEN` whose /// only directly-called function is `make()`; the closure of /// `init_called_symbols` brings in nothing class-relevant from `make`'s /// body (it just calls `BACKREF` whose binding is a non-function const). @@ -11371,8 +11341,6 @@ const TOKEN = 'tok'; /// Required: when a dependency is guard-skipped, every transitively /// dependent already-planned statement must be un-planned too. Without /// the fix, `var TOKEN` lands above `class TestComponent` in the output. -/// -/// Regression test for Codex P2 review (Finding 1) on PR #302. #[test] fn component_eager_fn_body_transitive_class_ref_unplans_chain() { let allocator = Allocator::default(); @@ -11413,7 +11381,7 @@ const BACKREF = TestComponent; ); } -/// Finding 2: function-valued `const`/`let` bindings hide eager class +/// Function-valued `const`/`let` bindings hide eager class /// reads. The BFS pops `TOKEN` whose `init_called_symbols = {make}`. /// `make` is a `const` arrow, not a function decl — so it's missing from /// `fn_body_*` maps. The closure expansion finds nothing; the guard @@ -11425,8 +11393,6 @@ const BACKREF = TestComponent; /// peeling parens / TS wrappers) must be indexed into `fn_body_*` maps /// keyed by the binding symbol, so the existing safe-skip guard catches /// the transitive class read. -/// -/// Regression test for Codex P2 review (Finding 2) on PR #302. #[test] fn component_eager_fn_value_const_arrow_class_ref_blocks_hoist() { let allocator = Allocator::default(); @@ -11457,7 +11423,7 @@ const make = () => TestComponent; ); } -/// Finding 3: member-call shapes `fn.call(...)` / `fn.apply(...)` aren't +/// Member-call shapes `fn.call(...)` / `fn.apply(...)` aren't /// recognized as eager calls. `record_direct_callee` peels parens / TS /// wrappers but stops at `StaticMemberExpression`, so `make.call(null)` /// records nothing in `called`. The guard's `stmt_called` is empty, the @@ -11468,8 +11434,6 @@ const make = () => TestComponent; /// Required: extend `record_direct_callee` (or a wrapper) to recognize /// the static call shapes `fn.call(...)`, `fn.apply(...)`, and /// `fn.bind(...)()` on top-level function symbols. -/// -/// Regression test for Codex P2 review (Finding 3) on PR #302. #[test] fn component_eager_member_call_class_ref_blocks_hoist() { let allocator = Allocator::default(); @@ -11500,7 +11464,7 @@ function make() { return TestComponent; } ); } -/// Codex P3 review Finding 1: cross-class `insert_at` ordering. Two +/// Cross-class `insert_at` ordering. Two /// `@Component`-decorated classes (C1 first, C2 second) with an /// undecorated `class Mid` between them. C1 plans `var TOKEN = make()` at /// `insert_at = pos_C1`; its BFS chases `make`'s body to `X` but the @@ -11558,7 +11522,7 @@ const X = Mid; ); } -/// Codex P3 review Finding 2: per-S eager-call set. Class A uses +/// Per-S eager-call set. Class A uses /// `makeRef` as a value (`useFactory: makeRef`); class B *calls* `make()` /// (`providers: [make()]`). The cascade pass currently uses /// `combined_eagerly_called` (the union across all classes) so `make` — @@ -11607,7 +11571,7 @@ const BACKREF = 'tok'; ); } -/// Codex P3 review Finding 3: multi-declarator function-valued bindings. +/// Multi-declarator function-valued bindings. /// `index_fn_valued_binding` currently only runs when /// `decl.declarations.len() == 1`. The shape /// `const make = () => TestComponent, other = 0;` skips indexing, so @@ -11650,7 +11614,7 @@ const make = () => TestComponent, other = 0; ); } -/// Codex P2 review #3311913006: a top-level `const make = () => DEP` +/// A top-level `const make = () => DEP` /// populates BOTH `symbol_to_stmt[make]` (binding) AND /// `fn_body_symbol_refs[make]` (because `index_fn_valued_binding` indexes /// arrow/function-valued bindings as if they were function declarations). @@ -11715,7 +11679,7 @@ const TOKEN = 'tok'; ); } -/// Cursor Low review #3311962888: lock in symmetric per-stmt eager-call +/// Locks in symmetric per-stmt eager-call /// reasoning between the cascade un-planning pass and `topological_order`. /// The cascade was changed to compute a per-S `stmt_called` (closure of /// `init_called_symbols` under `fn_body_called_symbols`); the topo sort @@ -11729,7 +11693,7 @@ const TOKEN = 'tok'; /// reorder or drop it). /// /// Locks in symmetric per-stmt eager-call reasoning between cascade and -/// topological_order — see PR #302 Cursor review #3311962888. +/// topological_order. #[test] fn component_topo_uses_per_stmt_eager_set() { let allocator = Allocator::default(); @@ -11775,7 +11739,7 @@ const BACKREF = 'tok'; ); } -/// Follow-on adversarial finding (Round 6): a function-valued `const` +/// A function-valued `const` /// binding whose ARROW BODY reads a top-level class can escape BOTH the /// safe-skip guard AND the cascade un-planning when the binding ITSELF /// is eagerly called from a decorator. @@ -11919,7 +11883,7 @@ const BACKREF = TestComponent; ); } -/// Codex P2 review #3312108552: top-level class declarations' constructor +/// Top-level class declarations' constructor /// bodies are NOT indexed into `fn_body_symbol_refs` / /// `fn_body_called_symbols`. When a hoisted initializer eagerly invokes /// `new ClassName()`, the constructor body runs at module load — and any @@ -12001,7 +11965,7 @@ const TOKEN = 1; ); } -/// Codex P2 review #3312108558: `E::ClassExpression(_) => {}` in +/// `E::ClassExpression(_) => {}` in /// `collect_expr_symbols` drops the eager parts of a class expression — /// the `super_class` expression, computed keys, static field initializers, /// and static blocks. Those fire when the class expression is *defined*, @@ -12079,8 +12043,6 @@ const BASE = class {}; /// set. `FunctionBodyIdentVisitor::visit_class` is a no-op which silently /// drops these refs unless it walks the class's eager parts via /// `walk_class_eager_parts`. -/// -/// Regression test for Codex P2 review #3314767088 on PR #302. #[test] fn component_eager_fn_body_inline_class_extends_chases_late_const() { let allocator = Allocator::default(); @@ -12122,8 +12084,6 @@ const TOKEN = class {}; /// alternate of a `ConditionalExpression` callee and add both identifiers to /// `called`. Without this, neither callee body is chased and `TOKEN` stays /// declared below the class. -/// -/// Regression test for Codex P2 review #3314767091 on PR #302. #[test] fn component_eager_conditional_callee_chases_both_branches() { let allocator = Allocator::default(); @@ -12168,8 +12128,6 @@ const TOKEN = 1; /// (direct/indirect/bind) — otherwise the default walk adds `tag` to `out` /// only, `tag` never enters `eagerly_called`, and the late `TOKEN` reference /// inside `tag`'s body is never chased. -/// -/// Regression test for Cursor Low review #3314770575 on PR #302. #[test] fn component_eager_fn_body_tagged_template_chases_late_const() { let allocator = Allocator::default(); @@ -12210,9 +12168,7 @@ const TOKEN = 1; /// Decorator metadata uses a tagged template whose tag is produced by /// `.bind` / `.call` / `.apply`. The tag function fires at class-definition /// time, so its body refs must enter the eagerly-called closure — same -/// treatment `E::CallExpression` / `E::NewExpression` already get. Covers -/// both Cursor Low #3314809112 (consistency with call/new arms) and Codex -/// P2 #3314810080 (bound tagged-template callees) on PR #302. +/// treatment `E::CallExpression` / `E::NewExpression` already get. #[test] fn component_tagged_template_bind_tag_chases_late_const() { let allocator = Allocator::default(); @@ -12256,8 +12212,6 @@ const TOKEN = 'tok'; /// to skip the body chase entirely when the binding's stmt_start was /// before the class's body end, leaving `TOKEN` unhoisted and the /// emitted Ivy definition throwing at module load. -/// -/// Regression test for Codex P2 review #3314836115 on PR #302. #[test] fn component_pre_class_fn_valued_binding_chases_body_refs() { let allocator = Allocator::default(); From 9575f2cb1966e6d4f16c59934a1e4e12ab016690 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 10:20:18 +0800 Subject: [PATCH 18/21] fix(hoist): descend through branch receivers; symmetric topo eager close * `record_indirect_callee` / `record_bind_callee` previously required the static-member receiver to be a bare identifier, missing shapes like `(cond ? makeA : makeB).call(null)` and `(cond ? makeA : makeB).bind(null)()`. Both now delegate the receiver descent to `record_direct_callee`, which already peels parens / TS wrappers and walks conditional / logical / sequence branches. * The topo-precompute eager set was closed under `fn_body_called_symbols` BEFORE folding in fn-valued binding symbols, while the cascade did the opposite order. Reordered the precompute to match the cascade so both passes expand through the same transitive callees and dependency edges through fn-valued bindings stay visible to the topological sort. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 53 +++--- .../tests/integration_test.rs | 154 ++++++++++++++++++ 2 files changed, 183 insertions(+), 24 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index ec1ff983d..d1ece6338 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -590,18 +590,16 @@ pub fn collect_hoist_edits<'a>( stmt_eager_sets.insert(start, HashSet::new()); continue; }; + // Mirror the cascade un-planning pass exactly: seed with + // `init_called_symbols`, fold in fn-valued binding symbols (when + // eagerly called) BEFORE the closure, then close under + // `fn_body_called_symbols`. The closure must include the + // transitive callees reachable through the fn-valued binding's + // body — folding after the close would leave those callees out + // of `stmt_called` for the topo edge-expansion, so dependency + // edges through the binding's chain would be missed and a + // dependent could be emitted before its dependee. let mut stmt_called: HashSet = info.init_called_symbols.iter().copied().collect(); - let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); - close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); - // Function-valued bindings declared by this statement are part - // of THIS statement's eager-call surface when their initializer - // is invoked at module load (i.e. the binding's symbol is in - // some class's `eagerly_called`). Including them here lets - // `expand_through_functions` chase through the arrow/function - // body's refs to find dependency edges to other planned - // statements — without this the binding's body refs are invisible - // to the topo sort, mirroring the same issue Bug 1 fixed in the - // BFS pop branch. if let Some(fn_syms) = stmt_fn_valued_bindings.get(&start) { for &fn_sym in fn_syms { if combined_eagerly_called.contains(&fn_sym) { @@ -609,6 +607,8 @@ pub fn collect_hoist_edits<'a>( } } } + let mut stmt_call_wl: Vec = stmt_called.iter().copied().collect(); + close_eagerly_called(&mut stmt_called, &mut stmt_call_wl, &fn_body_called_symbols); stmt_eager_sets.insert(start, stmt_called); } @@ -1969,8 +1969,10 @@ fn record_direct_callee<'a>( /// * `fn.call(...)` — `Function.prototype.call` /// * `fn.apply(...)` — `Function.prototype.apply` /// -/// In both cases the static member's `object` must be a *direct identifier* -/// (`fn`) — we resolve through the semantic model and record the symbol +/// The static member's `object` may also be a `ConditionalExpression`, +/// `LogicalExpression`, or `SequenceExpression`; each candidate identifier +/// reachable through the receiver (after peeling parens / TS wrappers and +/// descending into branches / last seq element) is resolved and recorded /// in `called`. Anything more nested (`obj.fn.call(...)`, /// `getFn().call(...)`) is out of scope and falls through. /// @@ -2004,17 +2006,19 @@ fn record_indirect_callee<'a>( if prop != "call" && prop != "apply" { return; } - let E::Identifier(id) = &member.object else { return }; - if let Some(symbol) = resolve_symbol(id, semantic) { - called.insert(symbol); - } + // Receiver may be a bare identifier OR a conditional / logical / sequence + // expression whose branches each resolve to an identifier (e.g. + // `(cond ? makeA : makeB).call(null)`). Reuse the same descent logic + // `record_direct_callee` uses for the outer callee. + record_direct_callee(&member.object, semantic, called); } /// Handle the `fn.bind(...)()` shape. Called from the call site of the /// *outer* `CallExpression` — its `callee` is the inner `fn.bind(...)` -/// `CallExpression`. If the inner call's callee is `Identifier.bind` -/// (a `StaticMemberExpression` whose `object` is a direct identifier and -/// `property` is `"bind"`), record the identifier's symbol in `called`. +/// `CallExpression`. If the inner call's callee is `.bind` where +/// `` is a direct identifier (after peeling parens / TS wrappers) +/// or a conditional / logical / sequence expression whose branches each +/// resolve to an identifier, record every reachable symbol in `called`. /// Only one level of bind is covered; nested `fn.bind(a).bind(b)()` falls /// through. fn record_bind_callee<'a>( @@ -2052,10 +2056,11 @@ fn record_bind_callee<'a>( if member.property.name.as_str() != "bind" { return; } - let E::Identifier(id) = &member.object else { return }; - if let Some(symbol) = resolve_symbol(id, semantic) { - called.insert(symbol); - } + // Receiver may be a bare identifier OR a conditional / logical / sequence + // expression whose branches each resolve to an identifier (e.g. + // `(cond ? makeA : makeB).bind(null)()`). Reuse `record_direct_callee` + // for symmetric descent. + record_direct_callee(&member.object, semantic, called); } /// If `init` is *directly* an `ArrowFunctionExpression` or diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 6739bb899..faec505f9 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -12247,3 +12247,157 @@ const TOKEN = 'tok'; result.code ); } + +/// Decorator metadata invokes a `.call`-style indirect callee whose +/// receiver is a conditional expression: `(cond ? makeA : makeB).call(null)`. +/// `record_indirect_callee` must descend through the conditional/logical/ +/// sequence wrapper to reach the underlying identifiers — otherwise neither +/// `makeA` nor `makeB` enters the eagerly-called closure and `TOKEN` (read +/// from their bodies) is left unhoisted. +#[test] +fn component_eager_indirect_callee_descends_through_conditional() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const cond = true; +@Component({ selector: 'x', template: '', providers: (cond ? makeA : makeB).call(null) }) +class TestComponent {} +function makeA() { return [{ provide: TOKEN, useValue: 0 }]; } +function makeB() { return [{ provide: TOKEN, useValue: 1 }]; } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside both branches of a conditional indirect callee \ + `(cond ? makeA : makeB).call(null)`) must be hoisted above the class to \ + avoid TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Decorator metadata invokes a `.bind`-style callee whose receiver is a +/// conditional expression: `(cond ? makeA : makeB).bind(null)()`. +/// `record_bind_callee` must descend through the conditional/logical/ +/// sequence wrapper on the bind receiver to reach the underlying +/// identifiers — otherwise neither `makeA` nor `makeB` enters the +/// eagerly-called closure and `TOKEN` (read from their bodies) is left +/// unhoisted. +#[test] +fn component_eager_bind_callee_descends_through_conditional() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +const cond = true; +@Component({ selector: 'x', template: '', providers: (cond ? makeA : makeB).bind(null)() }) +class TestComponent {} +function makeA() { return [{ provide: TOKEN, useValue: 0 }]; } +function makeB() { return [{ provide: TOKEN, useValue: 1 }]; } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside both branches of a conditional bind callee \ + `(cond ? makeA : makeB).bind(null)()`) must be hoisted above the class to \ + avoid TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Both the cascade un-planning pass and the topological-precompute pass +/// derive a per-statement `stmt_called` set. They must compute it with the +/// SAME shape — seed with `init_called_symbols`, fold in fn-valued binding +/// symbols (when eagerly called), then close under `fn_body_called_symbols`. +/// If the two passes disagree, the topo edge expansion may miss a dependency +/// edge through a fn-valued binding's body chain, leaving a hoisted +/// dependent emitted before its dependee. +/// +/// Engineered shape: `make = () => inner()` calls `inner()`, whose body +/// reads `TOKEN`. The final emission order must place `const TOKEN` BEFORE +/// the hoisted `const make = () => inner();` so that when `make()` runs at +/// module load the eventual `TOKEN` read is initialized. +#[test] +fn component_topo_symmetric_eager_set_with_fn_valued_binding_chain() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +const make = () => inner(); +function inner() { return TOKEN; } +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let make_pos = result + .code + .find("const make") + .unwrap_or_else(|| panic!("Expected `const make` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` must be hoisted above the class. \ + token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + make_pos < class_pos, + "`const make` must be hoisted above the class. \ + make@{make_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert!( + token_pos < make_pos, + "`const TOKEN` must precede `const make` so that `make()` (called at \ + module load via the decorator) reads an initialized `TOKEN` through \ + `inner()`. token@{token_pos} make@{make_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} From 9e5074478f00a486f8b3ae16293009385655a2b6 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 10:30:16 +0800 Subject: [PATCH 19/21] fix(hoist): chase named nested function bodies in body visitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `FunctionBodyIdentVisitor::visit_function` no-opped on every nested function, so a local declaration like `function outer() { function inner() { return TOKEN; } return inner(); }` called from an eagerly-evaluated body never contributed `TOKEN` to the eager surface — `inner` is a local symbol that's not indexed in any `fn_body_*` map, so `close_eagerly_called` can't chase through it. The visitor now walks the body and parameter defaults of any nested `Function` with `id: Some(_)`. Anonymous functions and arrow expressions remain lazy. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 30 +++++++++-- .../tests/integration_test.rs | 50 +++++++++++++++++++ 2 files changed, 76 insertions(+), 4 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index d1ece6338..a794c2ffe 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -1409,13 +1409,35 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { oxc_ast_visit::walk::walk_new_expression(self, it); } - // Nested function/arrow expressions only execute when *they* are called, - // not when the enclosing function is. Don't descend. + // A *named* nested `Function` may be a local function declaration that + // gets called from the surrounding eagerly-evaluated body — e.g. + // `function outer() { function inner() { return TOKEN; } return inner(); }`. + // Its body therefore runs eagerly at outer's call time and must + // contribute identifier reads / callees to `self.out` / `self.called`. + // The local symbol won't be indexed in `fn_body_called_symbols`, so + // `close_eagerly_called` can't chase it; folding the body in here + // closes that gap. + // + // Parameter defaults still need an explicit walk because the inner + // function's parameter defaults fire at *its* call sites — which, for + // local decls invoked inside the eager body, are themselves eager. + // + // Anonymous `Function` (`it.id == None`) — i.e. a function *expression* + // assigned to a value — remains lazy: it only runs when its value is + // invoked, which the outer body doesn't model. A `const x = function + // named() {...}` shape would be over-walked here, but over-counting only + // over-blocks hoisting (it never under-blocks) and disambiguating + // declaration vs. named expression requires extra scope-flag plumbing + // that isn't worth the cost. fn visit_function( &mut self, - _it: &oxc_ast::ast::Function<'a>, - _flags: oxc_syntax::scope::ScopeFlags, + it: &oxc_ast::ast::Function<'a>, + flags: oxc_syntax::scope::ScopeFlags, ) { + if it.id.is_some() { + oxc_ast_visit::walk::walk_function(self, it, flags); + walk_param_defaults(&it.params, self.semantic, self.out, self.called); + } } fn visit_arrow_function_expression(&mut self, _it: &oxc_ast::ast::ArrowFunctionExpression<'a>) { diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index faec505f9..1aac943b2 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11239,6 +11239,56 @@ const TOKEN = 'tok'; ); } +/// `make` is eagerly invoked from the decorator. Inside `make`, a *local* +/// function declaration `inner` is defined and immediately called. `inner`'s +/// body reads a later-declared top-level `const TOKEN`, so `TOKEN` is +/// TDZ-relevant: at module load, the hoisted decorator-eval runs +/// `make() → inner() → TOKEN` before the const initializer fires. +/// +/// `FunctionBodyIdentVisitor::visit_function` must descend into named nested +/// `Function` nodes so the locally-declared `inner` contributes its body +/// references (and its own callees) to the enclosing function's eager +/// surface. Without that, the BFS never observes that `make()` transitively +/// reads `TOKEN` and the const stays below the class. +#[test] +fn component_eager_fn_body_local_fn_decl_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { + function inner() { return TOKEN; } + return inner(); +} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside a locally-declared function called from \ + the body of an eagerly-called function) must be hoisted above the \ + class to avoid TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + /// The safe-skip guard must refuse to hoist a `var TOKEN = make()` initializer /// when the eagerly-called `make()`'s body reads a later-declared top-level /// class. Without the fix, hoisting `var TOKEN = make()` above From 7df59fff6536ccf90eeb438b7237859ff6402221 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 10:48:28 +0800 Subject: [PATCH 20/21] fix(hoist): chase locally-bound arrow/fn helpers when called eagerly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `FunctionBodyIdentVisitor` skipped arrows entirely, so a local binding like `const inner = () => TOKEN;` inside an eagerly-called body left `inner` recorded as a callee but with no body to chase — `TOKEN` stayed below the class. Added a per-visitor `local_fn_bodies` map populated by a new `visit_variable_declarator` override that indexes arrow/function inits via a scratch walk. At each `CallExpression` / `NewExpression` / `TaggedTemplateExpression` we resolve every reachable callee identifier (through parens, TS wrappers, conditional / logical / sequence branches, `.call` / `.apply` receivers, and the inner call of `.bind(...)()`) and fold the indexed body's refs / callees into the eager surface, with a `folded` guard for self- and mutually-recursive arrows. Value-passed arrows that are never invoked inside the body (`useFactory: lazy`) stay lazy because the fold only fires at call sites. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 247 +++++++++++++++++- .../tests/integration_test.rs | 75 ++++++ 2 files changed, 312 insertions(+), 10 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index a794c2ffe..da27efd57 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -1075,8 +1075,7 @@ fn collect_top_level_bindings<'a>( let Some(fn_symbol) = id.symbol_id.get() else { continue }; let mut refs: HashSet = HashSet::new(); let mut called: HashSet = HashSet::new(); - let mut visitor = - FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, &mut refs, &mut called); visitor.visit_function_body(body); // Parameter defaults (`function f(x = TOKEN)`) evaluate at // call time, before the body runs. If this function is @@ -1266,7 +1265,7 @@ fn walk_class_eager_parts<'a>( // Constructor body + parameter defaults fire at `new`-time. if include_constructor && method.kind == MethodDefinitionKind::Constructor { if let Some(body) = &method.value.body { - let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, out, called); visitor.visit_function_body(body); } walk_param_defaults(&method.value.params, semantic, out, called); @@ -1309,7 +1308,7 @@ fn walk_class_eager_parts<'a>( ClassElement::StaticBlock(block) => { // `static { … }` body runs once at class-definition time. // Walk it like an eagerly-evaluated function body. - let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, out, called); for stmt in &block.body { visitor.visit_statement(stmt); } @@ -1363,6 +1362,137 @@ struct FunctionBodyIdentVisitor<'a, 'b> { semantic: &'b Semantic<'a>, out: &'b mut HashSet, called: &'b mut HashSet, + /// Map from local `const`/`let`/`var` binding `SymbolId` (declared inside + /// the function body being walked) to the (refs, callees) collected from + /// the body of an arrow/function expression assigned to that binding. + /// + /// Populated by [`visit_variable_declarator`] when it sees + /// `const inner = () => …` / `const inner = function () { … }` inside + /// the body. The arrow body is NOT folded into `out`/`called` at the + /// declarator site — the arrow only fires if something calls `inner`, + /// which would still be lazy if `inner` is merely passed as a value. + /// Instead, every `CallExpression` / `NewExpression` / + /// `TaggedTemplateExpression` site that resolves a callee to one of + /// these local symbols folds the stored body refs into the surrounding + /// eager surface. That precision preserves laziness for value-passed + /// arrows (e.g. `useFactory: inner`) while still chasing identifier + /// reads when the local arrow is invoked inside the eager body + /// (e.g. `return inner();`). + local_fn_bodies: HashMap, HashSet)>, +} + +impl<'a, 'b> FunctionBodyIdentVisitor<'a, 'b> { + fn new( + semantic: &'b Semantic<'a>, + out: &'b mut HashSet, + called: &'b mut HashSet, + ) -> Self { + Self { semantic, out, called, local_fn_bodies: HashMap::new() } + } + + /// Fold every `local_fn_bodies` entry reachable through a call/new/tag + /// callee shape into `self.out` / `self.called`. Walks the same callee + /// descent shape `record_direct_callee` uses (peels parens / TS wrappers, + /// descends into conditional / logical / sequence branches), resolving + /// each bare identifier to a `SymbolId`. When that symbol matches a + /// `local_fn_bodies` entry, the stored body refs flow into the eager + /// surface AND any local-arrow callees stored under that entry are + /// folded transitively (`f` calls `g` calls `h`, all local arrows). + fn fold_local_fn_body_at_callee(&mut self, callee: &Expression<'a>) { + use Expression as E; + let mut worklist: Vec<&Expression<'a>> = vec![callee]; + let mut seen: HashSet<*const Expression<'a>> = HashSet::new(); + let mut callee_symbols: Vec = Vec::new(); + while let Some(mut cur) = worklist.pop() { + loop { + let key = cur as *const Expression<'a>; + if !seen.insert(key) { + break; + } + match cur { + E::Identifier(id) => { + if let Some(symbol) = resolve_symbol(id, self.semantic) { + callee_symbols.push(symbol); + } + break; + } + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + // `obj.fn(...)` — receiver may itself be a local arrow + // binding (`obj.foo` where `obj` is local). We only chase + // through the static-member chain to handle `fn.call` / + // `fn.apply` / `fn.bind` — the receiver of those is the + // callable. Other static members (`obj.method`) imply a + // member call on the receiver, not a call on a local + // arrow binding, so they fall through. + E::StaticMemberExpression(member) => { + let prop = member.property.name.as_str(); + if prop == "call" || prop == "apply" || prop == "bind" { + cur = &member.object; + } else { + break; + } + } + // `fn.bind(...)()` — the outer callee is a `CallExpression` + // whose own callee is `.bind`. Descend through + // the inner call so the receiver `` (which may + // be a local arrow binding) gets resolved. + E::CallExpression(inner_call) => cur = &inner_call.callee, + E::ConditionalExpression(cond) => { + worklist.push(&cond.consequent); + worklist.push(&cond.alternate); + break; + } + E::LogicalExpression(log) => { + worklist.push(&log.left); + worklist.push(&log.right); + break; + } + E::SequenceExpression(seq) => { + if let Some(last) = seq.expressions.last() { + worklist.push(last); + } + break; + } + _ => break, + } + } + } + + // Fold each resolved symbol's stored body, transitively through any + // local-arrow callees stored under that body. A small visited set + // guards against cycles (`const a = () => b(); const b = () => a();` + // — both would self-reference once cross-folded). + let mut fold_worklist: Vec = callee_symbols; + let mut folded: HashSet = HashSet::new(); + while let Some(symbol) = fold_worklist.pop() { + if !folded.insert(symbol) { + continue; + } + if let Some((refs, called)) = self.local_fn_bodies.get(&symbol) { + // Clone the small symbol sets so the borrow on + // `self.local_fn_bodies` releases before we mutate + // `self.out` / `self.called`. + let refs = refs.clone(); + let called = called.clone(); + for sym in &refs { + self.out.insert(*sym); + } + for sym in &called { + self.called.insert(*sym); + // If a callee inside this local arrow is itself another + // local arrow binding, transitively fold its body too. + if self.local_fn_bodies.contains_key(sym) { + fold_worklist.push(*sym); + } + } + } + } + } } impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { @@ -1372,10 +1502,59 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { } } + // Index arrow / function expressions assigned to a plain + // `const`/`let`/`var` binding inside the body being walked. The arrow + // body is collected into a scratch `(refs, called)` pair and stored in + // `local_fn_bodies` keyed by the binding's `SymbolId`. Subsequent + // call sites in this body fold the stored body refs in only when the + // local arrow is actually invoked, preserving laziness for + // value-passed arrows. + // + // The scratch walk uses a *fresh* `FunctionBodyIdentVisitor`, so a local + // arrow whose body contains another local arrow won't transitively + // index that inner arrow under the outer's body. The fold step at call + // sites handles cross-arrow chains through `called` instead. + fn visit_variable_declarator(&mut self, it: &oxc_ast::ast::VariableDeclarator<'a>) { + if let BindingPattern::BindingIdentifier(id) = &it.id + && let Some(fn_symbol) = id.symbol_id.get() + && let Some(init) = &it.init + { + let mut scratch_refs: HashSet = HashSet::new(); + let mut scratch_called: HashSet = HashSet::new(); + if index_local_fn_valued_binding( + init, + self.semantic, + &mut scratch_refs, + &mut scratch_called, + ) { + self.local_fn_bodies.insert(fn_symbol, (scratch_refs, scratch_called)); + // Visit only the type annotation (which may carry runtime- + // irrelevant identifier refs in TS — they're erased). Skip + // the init: the arrow/function body must NOT contribute to + // `self.out` here, because the arrow might never be called. + // Pattern visit covers any defaults inside the binding (none + // for the `BindingIdentifier` branch, but kept symmetric). + self.visit_binding_pattern(&it.id); + if let Some(type_annotation) = &it.type_annotation { + self.visit_ts_type_annotation(type_annotation); + } + return; + } + } + // Init is not a direct arrow/function — fall through to the default + // walk so identifier refs in the init feed `self.out` normally. + oxc_ast_visit::walk::walk_variable_declarator(self, it); + } + fn visit_call_expression(&mut self, it: &oxc_ast::ast::CallExpression<'a>) { record_direct_callee(&it.callee, self.semantic, self.called); record_indirect_callee(&it.callee, self.semantic, self.called); record_bind_callee(&it.callee, self.semantic, self.called); + // Local arrow bindings (`const inner = () => TOKEN; inner();`) aren't + // in `fn_body_called_symbols`, so `close_eagerly_called` can't chase + // them. Fold any indexed body refs in directly at the call site, + // exactly when the arrow is invoked. + self.fold_local_fn_body_at_callee(&it.callee); // IIFE detection mirrors the `collect_expr_symbols` arm: when the // callee is `(() => ...)` / `(function() { ... })`, the body runs // eagerly at this call site, so its identifier reads contribute to @@ -1399,6 +1578,7 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { record_direct_callee(&it.callee, self.semantic, self.called); record_indirect_callee(&it.callee, self.semantic, self.called); record_bind_callee(&it.callee, self.semantic, self.called); + self.fold_local_fn_body_at_callee(&it.callee); // Symmetric IIFE handling for `new (function() { ... })()`. if walk_iife_callee_body(&it.callee, self.semantic, self.out, self.called) { for arg in &it.arguments { @@ -1474,6 +1654,7 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { record_direct_callee(&it.tag, self.semantic, self.called); record_indirect_callee(&it.tag, self.semantic, self.called); record_bind_callee(&it.tag, self.semantic, self.called); + self.fold_local_fn_body_at_callee(&it.tag); oxc_ast_visit::walk::walk_tagged_template_expression(self, it); } } @@ -2108,8 +2289,7 @@ fn index_fn_valued_binding<'a>( E::ArrowFunctionExpression(arrow) => { let mut refs: HashSet = HashSet::new(); let mut called: HashSet = HashSet::new(); - let mut visitor = - FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, &mut refs, &mut called); visitor.visit_function_body(&arrow.body); walk_param_defaults(&arrow.params, semantic, &mut refs, &mut called); fn_body_symbol_refs.insert(fn_symbol, refs); @@ -2120,8 +2300,7 @@ fn index_fn_valued_binding<'a>( let Some(body) = &func.body else { return false }; let mut refs: HashSet = HashSet::new(); let mut called: HashSet = HashSet::new(); - let mut visitor = - FunctionBodyIdentVisitor { semantic, out: &mut refs, called: &mut called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, &mut refs, &mut called); visitor.visit_function_body(body); walk_param_defaults(&func.params, semantic, &mut refs, &mut called); fn_body_symbol_refs.insert(fn_symbol, refs); @@ -2139,6 +2318,54 @@ fn index_fn_valued_binding<'a>( } } +/// Sibling of [`index_fn_valued_binding`] that writes the indexed body refs / +/// direct callees into caller-owned scratch sets instead of the cross-statement +/// maps. Used by [`FunctionBodyIdentVisitor::visit_variable_declarator`] to +/// build a `(refs, called)` pair for a local arrow/function binding declared +/// inside a function body — those bindings are NOT top-level and don't belong +/// in `fn_body_symbol_refs`, but their bodies still need to be foldable into +/// the surrounding eager surface when they're invoked. +/// +/// Returns `true` when `init` is (after peeling parens / TS wrappers) directly +/// an `ArrowFunctionExpression` or `FunctionExpression` and indexing happened. +/// The scratch visitor used here is *fresh*, so a local arrow whose body +/// contains another local arrow won't transitively pick up the inner arrow's +/// body refs through this single index step — the outer caller's fold-at-call +/// step handles that chain via the `called` set on subsequent invocations. +fn index_local_fn_valued_binding<'a>( + init: &Expression<'a>, + semantic: &Semantic<'a>, + refs: &mut HashSet, + called: &mut HashSet, +) -> bool { + use Expression as E; + let mut cur = init; + loop { + match cur { + E::ArrowFunctionExpression(arrow) => { + let mut visitor = FunctionBodyIdentVisitor::new(semantic, refs, called); + visitor.visit_function_body(&arrow.body); + walk_param_defaults(&arrow.params, semantic, refs, called); + return true; + } + E::FunctionExpression(func) => { + let Some(body) = &func.body else { return false }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, refs, called); + visitor.visit_function_body(body); + walk_param_defaults(&func.params, semantic, refs, called); + return true; + } + E::ParenthesizedExpression(p) => cur = &p.expression, + E::TSAsExpression(ts) => cur = &ts.expression, + E::TSSatisfiesExpression(ts) => cur = &ts.expression, + E::TSNonNullExpression(ts) => cur = &ts.expression, + E::TSTypeAssertion(ts) => cur = &ts.expression, + E::TSInstantiationExpression(ts) => cur = &ts.expression, + _ => return false, + } + } +} + /// If `callee` is the function expression of an IIFE /// (`(() => …)()` or `(function() {…})()`, after peeling parens and TS /// wrappers), walk its body eagerly via `FunctionBodyIdentVisitor` and @@ -2161,7 +2388,7 @@ fn walk_iife_callee_body<'a>( loop { match cur { E::ArrowFunctionExpression(arrow) => { - let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, out, called); visitor.visit_function_body(&arrow.body); // Parameter defaults evaluate at IIFE invocation time, before // the body runs — symmetric with top-level function decls @@ -2171,7 +2398,7 @@ fn walk_iife_callee_body<'a>( } E::FunctionExpression(func) => { if let Some(body) = &func.body { - let mut visitor = FunctionBodyIdentVisitor { semantic, out, called }; + let mut visitor = FunctionBodyIdentVisitor::new(semantic, out, called); visitor.visit_function_body(body); } walk_param_defaults(&func.params, semantic, out, called); diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 1aac943b2..3a9658ca2 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11289,6 +11289,81 @@ const TOKEN = 'tok'; ); } +/// `make` is eagerly invoked from the decorator. Inside `make`, a *local* +/// arrow expression is assigned to a `const inner` binding and then +/// immediately called via `inner()`. `inner`'s body reads a later-declared +/// top-level `const TOKEN`, so `TOKEN` is TDZ-relevant: at module load, the +/// hoisted decorator-eval runs `make() → inner() → TOKEN` before the const +/// initializer fires. +/// +/// Unlike a *named* nested function (handled by walking through +/// `visit_function`), arrows assigned to local bindings need a separate +/// indexing step: `FunctionBodyIdentVisitor` must record arrow-valued local +/// bindings inside the function body it walks, then fold those bodies in at +/// each call site so calls to local arrows transitively contribute their +/// reads to the enclosing eager surface. +#[test] +fn component_eager_fn_body_local_arrow_binding_chases_late_const() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { + const inner = () => TOKEN; + return inner(); +} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read inside a local arrow binding called from the \ + body of an eagerly-called function) must be hoisted above the class \ + to avoid TDZ. token@{token_pos} class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// Sibling of `component_eager_fn_body_local_arrow_binding_chases_late_const` +/// that locks in laziness: when a local arrow binding is stored in a provider +/// (`useFactory: lazy`) but is NEVER called inside the enclosing function's +/// body, the arrow's body refs must NOT force a hoist via the local-arrow +/// indexing. The hoist might still happen because other analysis paths treat +/// the provider shape as eager, but the transform must at minimum not error. +#[test] +fn component_eager_fn_body_lazy_local_arrow_does_not_force_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { + const lazy = () => TOKEN; + return [{ provide: 'tok', useFactory: lazy }]; +} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); +} + /// The safe-skip guard must refuse to hoist a `var TOKEN = make()` initializer /// when the eagerly-called `make()`'s body reads a later-declared top-level /// class. Without the fix, hoisting `var TOKEN = make()` above From 342d625b019ae7fda21d2c6ae07d28af770ab0c3 Mon Sep 17 00:00:00 2001 From: LongYinan Date: Thu, 28 May 2026 11:00:08 +0800 Subject: [PATCH 21/21] fix(hoist): index nested fn declarations lazily; fold only at call sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior round made `FunctionBodyIdentVisitor::visit_function` walk named nested function bodies unconditionally, which broke laziness — an uncalled `function unused() { return TOKEN; }` declared inside an eagerly-called helper folded `TOKEN` into the eager surface and could hoist it above the class for no reason. Mirror the local-arrow path: `visit_function` now indexes the function into `local_fn_bodies` and defers the fold to call sites. A new `visit_function_body` / `visit_block_statement` pre-pass indexes hoisted function declarations before source-order walking so a call that textually precedes its declaration still resolves. Co-Authored-By: Claude Opus 4.7 --- .../src/component/hoist.rs | 116 ++++++++++++++---- .../tests/integration_test.rs | 97 +++++++++++++++ 2 files changed, 191 insertions(+), 22 deletions(-) diff --git a/crates/oxc_angular_compiler/src/component/hoist.rs b/crates/oxc_angular_compiler/src/component/hoist.rs index da27efd57..e070ac1eb 100644 --- a/crates/oxc_angular_compiler/src/component/hoist.rs +++ b/crates/oxc_angular_compiler/src/component/hoist.rs @@ -1493,6 +1493,52 @@ impl<'a, 'b> FunctionBodyIdentVisitor<'a, 'b> { } } } + + /// Index a nested named function (declaration or named expression) + /// keyed by its binding `SymbolId`: walk its body and parameter defaults + /// into a fresh `(refs, called)` pair and store it under + /// `local_fn_bodies`. The stored entry is folded into the surrounding + /// eager surface only when something invokes the function — see + /// `fold_local_fn_body_at_callee`. + /// + /// Idempotent: re-indexing a symbol overwrites the entry with the same + /// data, so the pre-pass + `visit_function` paths can both run without + /// duplicating work. + fn index_local_fn_body( + &mut self, + fn_symbol: SymbolId, + params: &FormalParameters<'a>, + body: Option<&oxc_ast::ast::FunctionBody<'a>>, + ) { + let mut refs: HashSet = HashSet::new(); + let mut called: HashSet = HashSet::new(); + if let Some(body) = body { + let mut scratch = FunctionBodyIdentVisitor::new(self.semantic, &mut refs, &mut called); + scratch.visit_function_body(body); + } + walk_param_defaults(params, self.semantic, &mut refs, &mut called); + self.local_fn_bodies.insert(fn_symbol, (refs, called)); + } + + /// Pre-pass: index every nested function declaration that appears in + /// the given statement list. Function declarations are hoisted within + /// the enclosing function/block scope, so a call may textually precede + /// the declaration. Indexing up front lets the source-order walk + /// resolve those calls at their fold-site. + fn index_hoisted_fn_declarations( + &mut self, + statements: &oxc_allocator::Vec<'a, Statement<'a>>, + ) { + for stmt in statements { + if let Statement::FunctionDeclaration(func) = stmt { + if let Some(id) = &func.id { + if let Some(fn_symbol) = id.symbol_id.get() { + self.index_local_fn_body(fn_symbol, &func.params, func.body.as_deref()); + } + } + } + } + } } impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { @@ -1589,40 +1635,66 @@ impl<'a, 'b> Visit<'a> for FunctionBodyIdentVisitor<'a, 'b> { oxc_ast_visit::walk::walk_new_expression(self, it); } - // A *named* nested `Function` may be a local function declaration that - // gets called from the surrounding eagerly-evaluated body — e.g. + // A *named* nested `Function` (declaration or named function expression) + // may be a local function called from the surrounding eagerly-evaluated + // body — e.g. // `function outer() { function inner() { return TOKEN; } return inner(); }`. - // Its body therefore runs eagerly at outer's call time and must - // contribute identifier reads / callees to `self.out` / `self.called`. - // The local symbol won't be indexed in `fn_body_called_symbols`, so - // `close_eagerly_called` can't chase it; folding the body in here - // closes that gap. + // Its body runs eagerly only when something invokes it. Mirroring the + // local-arrow-binding pattern in `visit_variable_declarator`, index the + // body into `local_fn_bodies` keyed by the function's `SymbolId` and let + // the call-site fold (`fold_local_fn_body_at_callee`) bring the body + // refs into `self.out` / `self.called` only when the function is + // actually invoked. Walking the body unconditionally here would over- + // hoist bindings read by a declared-but-uncalled nested function. // - // Parameter defaults still need an explicit walk because the inner - // function's parameter defaults fire at *its* call sites — which, for - // local decls invoked inside the eager body, are themselves eager. - // - // Anonymous `Function` (`it.id == None`) — i.e. a function *expression* - // assigned to a value — remains lazy: it only runs when its value is - // invoked, which the outer body doesn't model. A `const x = function - // named() {...}` shape would be over-walked here, but over-counting only - // over-blocks hoisting (it never under-blocks) and disambiguating - // declaration vs. named expression requires extra scope-flag plumbing - // that isn't worth the cost. + // For nested function *declarations*, the JS hoisting rule means a call + // may textually precede the declaration. Source-order traversal can't + // resolve such a call by the time it visits it, so the pre-passes in + // `visit_function_body` / `visit_block_statement` index every nested + // function declaration in the current scope BEFORE walking statements. + // The duplicate indexing here is idempotent (HashMap insert overwrites + // with identical data) and also covers named function *expressions* + // (`x = function named() { ... }`), which the block pre-pass doesn't + // see. fn visit_function( &mut self, it: &oxc_ast::ast::Function<'a>, - flags: oxc_syntax::scope::ScopeFlags, + _flags: oxc_syntax::scope::ScopeFlags, ) { - if it.id.is_some() { - oxc_ast_visit::walk::walk_function(self, it, flags); - walk_param_defaults(&it.params, self.semantic, self.out, self.called); + if let Some(id) = &it.id { + if let Some(fn_symbol) = id.symbol_id.get() { + self.index_local_fn_body(fn_symbol, &it.params, it.body.as_deref()); + } } + // Body intentionally not walked: the fold-at-call-site path replaces + // the unconditional walk that previously broke laziness for declared- + // but-uncalled nested functions. } fn visit_arrow_function_expression(&mut self, _it: &oxc_ast::ast::ArrowFunctionExpression<'a>) { } + // Pre-pass: function declarations are hoisted within their enclosing + // function body, so a call to `inner()` may appear in source *before* + // `function inner() { ... }`. Scan the statement list first and index + // every nested function declaration; only then walk statements so the + // fold-at-call-site logic in `visit_call_expression` / + // `visit_new_expression` / `visit_tagged_template_expression` can + // resolve such calls. + fn visit_function_body(&mut self, it: &oxc_ast::ast::FunctionBody<'a>) { + self.index_hoisted_fn_declarations(&it.statements); + oxc_ast_visit::walk::walk_function_body(self, it); + } + + // Block-scoped function declarations (`if (x) { function inner() { … } }`) + // are hoisted to the top of the block in modern JS. Apply the same + // pre-pass to nested block statements so a call earlier in the block + // can resolve the locally-hoisted function. + fn visit_block_statement(&mut self, it: &oxc_ast::ast::BlockStatement<'a>) { + self.index_hoisted_fn_declarations(&it.body); + oxc_ast_visit::walk::walk_block_statement(self, it); + } + // Class expressions inside an eagerly-called function body evaluate // their eager parts (`super_class`, computed keys, static field / // accessor initializers, static blocks) at call time of the outer diff --git a/crates/oxc_angular_compiler/tests/integration_test.rs b/crates/oxc_angular_compiler/tests/integration_test.rs index 3a9658ca2..634b95c29 100644 --- a/crates/oxc_angular_compiler/tests/integration_test.rs +++ b/crates/oxc_angular_compiler/tests/integration_test.rs @@ -11364,6 +11364,103 @@ const TOKEN = 'tok'; assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); } +/// Laziness sibling for *named* nested function declarations: inside an +/// eagerly-called `make()`, a locally-declared `function unused()` reads +/// `TOKEN`, but the function is never invoked. `make()` returns `[]`, so no +/// eager read of `TOKEN` actually happens at decorator-eval time. The +/// transform must NOT fold `unused`'s body into the eager surface — doing so +/// would falsely hoist `TOKEN` above the class even though no value-passed +/// reference fires. +/// +/// The original source places `const TOKEN` after the class. With correct +/// laziness, the transform leaves that ordering intact. +#[test] +fn component_eager_fn_body_uncalled_nested_fn_decl_does_not_force_hoist() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { + function unused() { return TOKEN; } + return []; +} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + + assert!( + class_pos < token_pos, + "`const TOKEN` must NOT be hoisted: `unused` is declared but never \ + called, so its body refs are lazy. class@{class_pos} \ + token@{token_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + +/// JS function declarations are hoisted inside their enclosing scope, so a +/// call to `inner()` can appear in source *before* the `function inner()` +/// declaration and still resolve at runtime. The visitor walks in source +/// order, so it sees `return inner();` before it indexes `inner`. The +/// fold-at-call-site path must therefore pre-index nested function +/// declarations within each function body / block before walking the +/// statements — otherwise the call site cannot resolve `inner` and `TOKEN` +/// stays unhoisted. +#[test] +fn component_eager_fn_body_hoisted_fn_decl_call_still_chases() { + let allocator = Allocator::default(); + let source = r#" +import { Component } from '@angular/core'; +@Component({ selector: 'x', template: '', providers: make() }) +class TestComponent {} +function make() { + return inner(); + function inner() { return TOKEN; } +} +const TOKEN = 'tok'; +"#; + let result = transform_angular_file(&allocator, "test.component.ts", source, None, None); + assert!(!result.has_errors(), "Should not have errors: {:?}", result.diagnostics); + + let token_pos = result + .code + .find("const TOKEN") + .unwrap_or_else(|| panic!("Expected `const TOKEN` to be present.\nCode:\n{}", result.code)); + let class_pos = result.code.find("class TestComponent").unwrap_or_else(|| { + panic!("Expected `class TestComponent` to be present.\nCode:\n{}", result.code) + }); + + assert!( + token_pos < class_pos, + "`const TOKEN` (read by a hoisted nested function declaration called \ + from above its source position inside an eagerly-called function) \ + must be hoisted above the class to avoid TDZ. token@{token_pos} \ + class@{class_pos}\nCode:\n{}", + result.code + ); + assert_eq!( + result.code.matches("const TOKEN").count(), + 1, + "`const TOKEN` should appear exactly once.\nCode:\n{}", + result.code + ); +} + /// The safe-skip guard must refuse to hoist a `var TOKEN = make()` initializer /// when the eagerly-called `make()`'s body reads a later-declared top-level /// class. Without the fix, hoisting `var TOKEN = make()` above