From 5a3b3cd77e7c11ae5c91d3553a1b7d6bd55ef1d3 Mon Sep 17 00:00:00 2001
From: Ethan Ng <ethann@meta.com>
Date: Sat, 30 May 2026 16:27:23 -0700
Subject: [PATCH] Remove over-strict softmax mask divisibility assert

Summary: `SoftmaxPattern.fuse()` asserts `mask_shape[-1] % 16 == 0`. The softmax mask passed to the fused op is a dummy (`mask_type=0`, no masking is applied), so its trailing dimension does not affect numerics, and the historical `QuantFusion` simply floor-divided without asserting. The assert rejects otherwise-valid shapes (e.g. softmax over a last dim of 17 or 33) and fails `test_quantized_softmax_out_*` (T273477740). Remove the assert and floor-divide the mask shape like before, in both the `fbcode/` and `xplat/` cells.

Differential Revision: D106957459
---
 backends/cadence/aot/quantizer/patterns.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
index a7026cbf26c..9897d443725 100644
--- a/backends/cadence/aot/quantizer/patterns.py
+++ b/backends/cadence/aot/quantizer/patterns.py
@@ -1092,9 +1092,6 @@ def fuse(self, gm: fx.GraphModule, anchor_node: fx.Node) -> fx.Node | None:
             return None
         mask_shape = list(mask_shape)
         # Softmax mask is packed 16 elements per int32 word.
-        assert (
-            mask_shape[-1] % 16 == 0
-        ), f"Softmax mask dimension must be divisible by 16, got {mask_shape[-1]}"
         mask_shape[-1] = mask_shape[-1] // 16
         mask_tensor = insert_node_with_meta(
             gm,