From 5a3b3cd77e7c11ae5c91d3553a1b7d6bd55ef1d3 Mon Sep 17 00:00:00 2001 From: Ethan Ng Date: Sat, 30 May 2026 16:27:23 -0700 Subject: [PATCH] Remove over-strict softmax mask divisibility assert Summary: `SoftmaxPattern.fuse()` asserts `mask_shape[-1] % 16 == 0`. The softmax mask passed to the fused op is a dummy (`mask_type=0`, no masking is applied), so its trailing dimension does not affect numerics, and the historical `QuantFusion` simply floor-divided without asserting. The assert rejects otherwise-valid shapes (e.g. softmax over a last dim of 17 or 33) and fails `test_quantized_softmax_out_*` (T273477740). Remove the assert and floor-divide the mask shape like before, in both the `fbcode/` and `xplat/` cells. Differential Revision: D106957459 --- backends/cadence/aot/quantizer/patterns.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py index a7026cbf26c..9897d443725 100644 --- a/backends/cadence/aot/quantizer/patterns.py +++ b/backends/cadence/aot/quantizer/patterns.py @@ -1092,9 +1092,6 @@ def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None: return None mask_shape = list(mask_shape) # Softmax mask is packed 16 elements per int32 word. - assert ( - mask_shape[-1] % 16 == 0 - ), f"Softmax mask dimension must be divisible by 16, got {mask_shape[-1]}" mask_shape[-1] = mask_shape[-1] // 16 mask_tensor = insert_node_with_meta( gm,