diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index fb5622c99f7a135..1f5620b48469fdd 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -217,6 +217,7 @@ _PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) { static inline bool _PyLong_IsZero(const PyLongObject *op) { + assert(op != NULL); return (op->long_value.lv_tag & SIGN_MASK) == SIGN_ZERO; } @@ -346,6 +347,87 @@ _PyLong_CheckExactAndCompact(PyObject *op) return PyLong_CheckExact(op) && _PyLong_IsCompact((const PyLongObject *)op); } +/* A cheap guard used by Tier 2 / JIT integer fast paths. + * + * "Compact" ints are single-digit. Non-compact ints may still fit in int64_t, + * but are limited to a small number of digits (3 for 30-bit digits, 5 for + * 15-bit digits). This is an intentionally cheap filter: callers must still + * do an exact range check during extraction. + */ +#define _PY_LONG_MAX_DIGITS_FOR_INT64 ((64 + PyLong_SHIFT - 1) / PyLong_SHIFT) + +static inline int +_PyLong_MightFitInt64(const PyLongObject *v) +{ + if (_PyLong_IsCompact(v)) { + return 1; + } + Py_ssize_t ndigits = _PyLong_DigitCount(v); + if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) { + return 0; + } + if (ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64) { + unsigned int shift = PyLong_SHIFT * (unsigned int)(ndigits - 1); + uint64_t max_pos_top = (uint64_t)INT64_MAX >> shift; + uint64_t max_neg_top = ((uint64_t)INT64_MAX + 1) >> shift; /* abs(INT64_MIN) */ + uint64_t max_top = ((v->long_value.lv_tag & SIGN_MASK) == SIGN_NEGATIVE) + ? max_neg_top + : max_pos_top; + return (uint64_t)v->long_value.ob_digit[ndigits - 1] <= max_top; + } + return 1; +} + +static inline int +_PyLong_CheckExactAndMightFitInt64(PyObject *op) +{ + return PyLong_CheckExact(op) && + _PyLong_MightFitInt64((const PyLongObject *)op); +} + +/* Extract an exact int to int64_t without raising. + * + * Returns true on success and writes to *out; returns false if the value is + * out of int64_t range. Never sets an exception. + */ +static inline bool +_PyLong_TryAsInt64Exact(PyLongObject *v, int64_t *out) +{ + assert(PyLong_CheckExact((PyObject *)v)); + if (_PyLong_IsCompact(v)) { + *out = (int64_t)_PyLong_CompactValue(v); + return true; + } + Py_ssize_t ndigits = _PyLong_DigitCount(v); + if (ndigits > _PY_LONG_MAX_DIGITS_FOR_INT64) { + return false; + } + uint64_t abs_val = 0; + unsigned int shift = 0; + for (Py_ssize_t i = 0; i < ndigits; i++) { + uint64_t d = (uint64_t)v->long_value.ob_digit[i]; + if (ndigits == _PY_LONG_MAX_DIGITS_FOR_INT64 && + i == ndigits - 1 && + shift != 0 && + (d >> (64 - shift)) != 0) + { + return false; + } + abs_val |= d << shift; + shift += PyLong_SHIFT; + } + int sign = 1 - (v->long_value.lv_tag & SIGN_MASK); + if (abs_val <= (uint64_t)INT64_MAX) { + *out = sign < 0 ? -(int64_t)abs_val : (int64_t)abs_val; + return true; + } + if (sign < 0 && abs_val == (uint64_t)INT64_MAX + 1) { + *out = INT64_MIN; + return true; + } + return false; +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d2e29a1b95ede2f..341cd60a83c0607 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1114,12 +1114,12 @@ PyAPI_DATA(const struct opcode_metadata) _PyOpcode_opcode_metadata[267]; const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_EXTEND] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC0000, HAS_LOCAL_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_RECORDS_VALUE_FLAG }, [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_SYNC_SP_FLAG | HAS_NEEDS_GUARD_IP_FLAG | HAS_RECORDS_VALUE_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, @@ -1128,7 +1128,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_USTR_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1174,7 +1174,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CLEANUP_THROW] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, - [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, [CONTAINS_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 6713e9bc95f942d..093cd0d67f92971 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -105,15 +105,15 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_TOS_INT] = HAS_EXIT_FLAG, [_GUARD_NOS_OVERFLOWED] = HAS_EXIT_FLAG, [_GUARD_TOS_OVERFLOWED] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_ADD_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_INT] = HAS_EXIT_FLAG | HAS_PURE_FLAG, - [_BINARY_OP_ADD_INT_INPLACE] = HAS_EXIT_FLAG, - [_BINARY_OP_SUBTRACT_INT_INPLACE] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT_INPLACE] = HAS_EXIT_FLAG, - [_BINARY_OP_ADD_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG, - [_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT_INPLACE] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, + [_BINARY_OP_SUBTRACT_INT_INPLACE] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, + [_BINARY_OP_MULTIPLY_INT_INPLACE] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, + [_BINARY_OP_ADD_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, + [_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, + [_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT] = HAS_EXIT_FLAG | HAS_ERROR_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_PURE_FLAG, @@ -242,7 +242,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, - [_COMPARE_OP_INT] = HAS_ARG_FLAG, + [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_COMPARE_OP_STR] = HAS_ARG_FLAG, [_IS_OP] = HAS_ARG_FLAG, [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2248920c266aef5..538bcf2df4a9646 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -4245,6 +4245,52 @@ def testfunc(args): # Verify small int singletons are not corrupted self.assertEqual(7, 3 + 4) + def test_int_add_inplace_noncompact_unique_lhs(self): + # a + b produces a unique non-compact int that still fits in int64_t. + def testfunc(args): + a, b, c, n = args + total = 0 + for _ in range(n): + total += (a + b) + c + return total + + res, ex = self._run_with_optimizer( + testfunc, (5_000_000_000, 6_000_000_000, 7, TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 11_000_000_007) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops) + + def test_int_add_inplace_noncompact_int64_boundary(self): + def testfunc(args): + a, b, n = args + total = 0 + for _ in range(n): + total += (a + b) + 1 + return total + + res, ex = self._run_with_optimizer( + testfunc, (4_611_686_018_427_387_903, 4_611_686_018_427_387_903, + TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 9_223_372_036_854_775_807) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_ADD_INT_INPLACE", uops) + + def test_int_add_inplace_noncompact_overflow_falls_back(self): + def testfunc(args): + a, b, n = args + total = 0 + for _ in range(n): + total += (a + b) + 1 + return total + + res, ex = self._run_with_optimizer( + testfunc, (4_611_686_018_427_387_904, 4_611_686_018_427_387_904, + TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 9_223_372_036_854_775_809) + self.assertIsNotNone(ex) + def test_int_subtract_inplace_unique_lhs(self): # a * b produces a unique compact int; subtracting c reuses it def testfunc(args): @@ -4275,6 +4321,38 @@ def testfunc(args): uops = get_opnames(ex) self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT", uops) + def test_int_subtract_inplace_noncompact_unique_lhs(self): + # a + b produces a unique non-compact int that still fits in int64_t. + def testfunc(args): + a, b, c, n = args + total = 0 + for _ in range(n): + total += (a + b) - c + return total + + res, ex = self._run_with_optimizer( + testfunc, (5_000_000_000, 6_000_000_000, 7, TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 10_999_999_993) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops) + + def test_int_subtract_inplace_noncompact_int64_min_boundary(self): + def testfunc(args): + a, b, n = args + total = 0 + for _ in range(n): + total += (a + b) - 1 + return total + + res, ex = self._run_with_optimizer( + testfunc, (-4_611_686_018_427_387_904, -4_611_686_018_427_387_903, + TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * (-9_223_372_036_854_775_808)) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_SUBTRACT_INT_INPLACE", uops) + def test_int_multiply_inplace_unique_lhs(self): # (a + b) produces a unique compact int; multiplying by c reuses it def testfunc(args): @@ -4305,6 +4383,69 @@ def testfunc(args): uops = get_opnames(ex) self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT", uops) + def test_int_multiply_inplace_noncompact_unique_lhs(self): + # a + b produces a unique non-compact int that still fits in int64_t. + def testfunc(args): + a, b, c, n = args + total = 0 + for _ in range(n): + total += (a + b) * c + return total + + res, ex = self._run_with_optimizer( + testfunc, (5_000_000_000, 6_000_000_000, 3, TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 33_000_000_000) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_MULTIPLY_INT_INPLACE", uops) + + def test_int_multiply_inplace_noncompact_overflow_falls_back(self): + def testfunc(args): + a, b, c, n = args + total = 0 + for _ in range(n): + total += (a + b) * c + return total + + res, ex = self._run_with_optimizer( + testfunc, (4_611_686_018_427_387_904, 4_611_686_018_427_387_904, 2, + TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 18_446_744_073_709_551_616) + self.assertIsNotNone(ex) + + def test_compare_int_noncompact(self): + def testfunc(args): + a, b, n = args + total = 0 + for _ in range(n): + if a < b: + total += 1 + return total + + res, ex = self._run_with_optimizer( + testfunc, (5_000_000_000, 6_000_000_000, TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD) + self.assertIsNotNone(ex) + + def test_compare_int_noncompact_all_predicates(self): + def testfunc(args): + a, b, n = args + total = 0 + for _ in range(n): + total += (a < b) + total += (a <= b) + total += (a == a) + total += (b != a) + total += (b > a) + total += (b >= a) + return total + + res, ex = self._run_with_optimizer( + testfunc, (9_223_372_036_854_775_006, 9_223_372_036_854_775_007, + TIER2_THRESHOLD)) + self.assertEqual(res, TIER2_THRESHOLD * 6) + self.assertIsNotNone(ex) + def test_int_inplace_chain_propagation(self): # a * b + c * d: both products are unique, the + reuses one; # result of + is also unique for the subsequent += diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 7946550ec0db637..aa356cd8809cb15 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1375,7 +1375,7 @@ def binary_op_add_int(): self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT") self.assert_no_opcode(binary_op_add_int, "BINARY_OP") - def binary_op_int_non_compact(): + def binary_op_int_non_compact_int64(): for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): a, b = 10000000000, 1 c = a + b @@ -1385,10 +1385,25 @@ def binary_op_int_non_compact(): c = a * b self.assertEqual(c, 10000000000) - binary_op_int_non_compact() - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_ADD_INT") - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_SUBTRACT_INT") - self.assert_no_opcode(binary_op_int_non_compact, "BINARY_OP_MULTIPLY_INT") + binary_op_int_non_compact_int64() + self.assert_specialized(binary_op_int_non_compact_int64, "BINARY_OP_ADD_INT") + self.assert_specialized(binary_op_int_non_compact_int64, "BINARY_OP_SUBTRACT_INT") + self.assert_specialized(binary_op_int_non_compact_int64, "BINARY_OP_MULTIPLY_INT") + + def binary_op_int_too_large(): + for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): + a, b = 1 << 200, 1 + c = a + b + self.assertEqual(c, (1 << 200) + 1) + c = a - b + self.assertEqual(c, (1 << 200) - 1) + c = a * b + self.assertEqual(c, 1 << 200) + + binary_op_int_too_large() + self.assert_no_opcode(binary_op_int_too_large, "BINARY_OP_ADD_INT") + self.assert_no_opcode(binary_op_int_too_large, "BINARY_OP_SUBTRACT_INT") + self.assert_no_opcode(binary_op_int_too_large, "BINARY_OP_MULTIPLY_INT") def binary_op_add_unicode(): for _ in range(_testinternalcapi.SPECIALIZATION_THRESHOLD): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-28-15-00-00.gh-issue-150424.jit-wide-int-fastpath.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-28-15-00-00.gh-issue-150424.jit-wide-int-fastpath.rst new file mode 100644 index 000000000000000..a424b651054b62e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-28-15-00-00.gh-issue-150424.jit-wide-int-fastpath.rst @@ -0,0 +1,2 @@ +Improve JIT integer arithmetic fast paths for exact ``int`` values that fit +in the signed 64-bit range but are not compact Python integers. diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index 11dfcc68eb2dacd..374baf81cf3ff48 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -188,7 +188,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -198,7 +199,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -212,7 +214,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -222,6 +223,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -574,7 +578,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -584,7 +589,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -598,7 +604,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -608,6 +613,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -807,7 +815,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -981,7 +990,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1070,7 +1080,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1162,7 +1173,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1324,7 +1336,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1334,7 +1347,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1348,7 +1362,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -1358,6 +1371,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -5151,7 +5167,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -5161,7 +5178,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -5173,13 +5191,18 @@ right = value; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(_PyLong_IsCompact((PyLongObject *)left_o)); - assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); - assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && - _PyLong_DigitCount((PyLongObject *)right_o) <= 1); - Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); - Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int64_t ileft; + int64_t iright; + _PyFrame_SetStackPointer(frame, stack_pointer); + int ok = _PyLong_TryAsInt64Exact((PyLongObject *)left_o, &ileft) + && _PyLong_TryAsInt64Exact((PyLongObject *)right_o, &iright); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!ok) { + UPDATE_MISS_STATS(COMPARE_OP); + assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); + JUMP_TO_PREDICTED(COMPARE_OP); + } int sign_ish = COMPARISON_BIT(ileft, iright); l = left; r = right; @@ -12189,7 +12212,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(STORE_SUBSCR); assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR)); JUMP_TO_PREDICTED(STORE_SUBSCR); @@ -12426,7 +12450,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(TO_BOOL); assert(_PyOpcode_Deopt[opcode] == (TO_BOOL)); JUMP_TO_PREDICTED(TO_BOOL); diff --git a/Objects/longobject.c b/Objects/longobject.c index 6e6011cb19aab5f..dcd8c916a7fee24 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -322,31 +322,65 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } -/* Create a new medium int object from a medium int. - * Do not raise. Return NULL if not medium or can't allocate. */ -static inline _PyStackRef -medium_from_stwodigits(stwodigits x) +static inline bool +_Py_i64_add_overflow(int64_t a, int64_t b, int64_t *out) { - if (IS_SMALL_INT(x)) { - return PyStackRef_FromPyObjectBorrow(get_small_int((sdigit)x)); + if ((b > 0 && a > INT64_MAX - b) || (b < 0 && a < INT64_MIN - b)) { + return true; } - assert(x != 0); - if(!is_medium_int(x)) { - return PyStackRef_NULL; + *out = a + b; + return false; +} + +static inline bool +_Py_i64_sub_overflow(int64_t a, int64_t b, int64_t *out) +{ + if ((b > 0 && a < INT64_MIN + b) || (b < 0 && a > INT64_MAX + b)) { + return true; } - PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); - if (v == NULL) { - v = PyObject_Malloc(sizeof(PyLongObject)); - if (v == NULL) { - return PyStackRef_NULL; + *out = a - b; + return false; +} + +#if !defined(__SIZEOF_INT128__) +static inline uint64_t +_Py_uabs_i64(int64_t x) +{ + return x < 0 ? (uint64_t)(0 - (uint64_t)x) : (uint64_t)x; +} +#endif + +static inline bool +_Py_i64_mul_overflow(int64_t a, int64_t b, int64_t *out) +{ +#if defined(__SIZEOF_INT128__) + __int128 prod = (__int128)a * (__int128)b; + if (prod < INT64_MIN || prod > INT64_MAX) { + return true; + } + *out = (int64_t)prod; + return false; +#else + uint64_t ua = _Py_uabs_i64(a); + uint64_t ub = _Py_uabs_i64(b); + uint64_t limit = ((a < 0) ^ (b < 0)) ? (uint64_t)INT64_MAX + 1 : (uint64_t)INT64_MAX; + if (ua != 0 && ub > limit / ua) { + return true; + } + uint64_t uprod = ua * ub; + if ((a < 0) ^ (b < 0)) { + if (uprod == (uint64_t)INT64_MAX + 1) { + *out = INT64_MIN; + } + else { + *out = -(int64_t)uprod; } - _PyObject_Init((PyObject*)v, &PyLong_Type); - _PyLong_InitTag(v); } - digit abs_x = x < 0 ? (digit)(-x) : (digit)x; - _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); - v->long_value.ob_digit[0] = abs_x; - return PyStackRef_FromPyObjectStealMortal((PyObject *)v); + else { + *out = (int64_t)uprod; + } + return false; +#endif } @@ -1324,7 +1358,6 @@ PyLong_AsNativeBytes(PyObject* vv, void* buffer, Py_ssize_t n, int flags) } if (_PyLong_IsCompact(v)) { - res = 0; cv.v = _PyLong_CompactValue(v); /* Most paths result in res = sizeof(compact value). Only the case * where 0 < n < sizeof(compact value) do we need to check and adjust @@ -2601,6 +2634,7 @@ long_from_binary_base(const char *start, const char *end, Py_ssize_t digits, int /* n <- the number of Python digits needed, = ceiling((digits * bits_per_char) / PyLong_SHIFT). */ + assert(bits_per_char > 0); if (digits > (PY_SSIZE_T_MAX - (PyLong_SHIFT - 1)) / bits_per_char) { PyErr_SetString(PyExc_ValueError, "int string too large to convert"); @@ -3866,9 +3900,19 @@ long_add(PyLongObject *a, PyLongObject *b) _PyStackRef _PyCompactLong_Add(PyLongObject *a, PyLongObject *b) { - assert(_PyLong_BothAreCompact(a, b)); - stwodigits v = medium_value(a) + medium_value(b); - return medium_from_stwodigits(v); + int64_t va, vb; + if (_PyLong_TryAsInt64Exact(a, &va) && _PyLong_TryAsInt64Exact(b, &vb)) { + int64_t v; + if (_Py_i64_add_overflow(va, vb, &v)) { + return PyStackRef_NULL; + } + PyLongObject *result = (PyLongObject *)PyLong_FromInt64(v); + if (result == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal((PyObject *)result); + } + return PyStackRef_NULL; } static PyObject * @@ -3911,9 +3955,19 @@ long_sub(PyLongObject *a, PyLongObject *b) _PyStackRef _PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b) { - assert(_PyLong_BothAreCompact(a, b)); - stwodigits v = medium_value(a) - medium_value(b); - return medium_from_stwodigits(v); + int64_t va, vb; + if (_PyLong_TryAsInt64Exact(a, &va) && _PyLong_TryAsInt64Exact(b, &vb)) { + int64_t v; + if (_Py_i64_sub_overflow(va, vb, &v)) { + return PyStackRef_NULL; + } + PyLongObject *result = (PyLongObject *)PyLong_FromInt64(v); + if (result == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal((PyObject *)result); + } + return PyStackRef_NULL; } static PyObject * @@ -4353,14 +4407,24 @@ long_mul(PyLongObject *a, PyLongObject *b) return z; } -/* This function returns NULL if the result is not compact, +/* This function returns NULL if the result does not fit in int64 range, * or if it fails to allocate, but never raises */ _PyStackRef _PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b) { - assert(_PyLong_BothAreCompact(a, b)); - stwodigits v = medium_value(a) * medium_value(b); - return medium_from_stwodigits(v); + int64_t va, vb; + if (_PyLong_TryAsInt64Exact(a, &va) && _PyLong_TryAsInt64Exact(b, &vb)) { + int64_t v; + if (_Py_i64_mul_overflow(va, vb, &v)) { + return PyStackRef_NULL; + } + PyLongObject *result = (PyLongObject *)PyLong_FromInt64(v); + if (result == NULL) { + return PyStackRef_ERROR; + } + return PyStackRef_FromPyObjectSteal((PyObject *)result); + } + return PyStackRef_NULL; } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 993d231751409ba..f19039cccbd2084 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -634,24 +634,28 @@ dummy_func( op(_GUARD_NOS_INT, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - EXIT_IF(!_PyLong_CheckExactAndCompact(left_o)); + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + EXIT_IF(!ok); } op(_GUARD_TOS_INT, (value -- value)) { PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - EXIT_IF(!_PyLong_CheckExactAndCompact(value_o)); + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + EXIT_IF(!ok); } op(_GUARD_NOS_OVERFLOWED, (left, unused -- left, unused)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); - EXIT_IF(!_PyLong_IsCompact((PyLongObject *)left_o)); + int ok = _PyLong_MightFitInt64((PyLongObject *)left_o); + EXIT_IF(!ok); } op(_GUARD_TOS_OVERFLOWED, (value -- value)) { PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); assert(Py_TYPE(value_o) == &PyLong_Type); - EXIT_IF(!_PyLong_IsCompact((PyLongObject *)value_o)); + int ok = _PyLong_MightFitInt64((PyLongObject *)value_o); + EXIT_IF(!ok); } pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res, l, r)) { @@ -659,7 +663,6 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); @@ -667,6 +670,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(res)); } pure op(_BINARY_OP_ADD_INT, (left, right -- res, l, r)) { @@ -674,7 +678,6 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); @@ -682,6 +685,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(res)); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res, l, r)) { @@ -689,7 +693,6 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); @@ -697,6 +700,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(res)); } macro(BINARY_OP_MULTIPLY_INT) = @@ -718,6 +722,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE, (left, right -- res, l, r)) { @@ -727,6 +732,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE, (left, right -- res, l, r)) { @@ -736,6 +742,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } tier2 op(_BINARY_OP_ADD_INT_INPLACE_RIGHT, (left, right -- res, l, r)) { @@ -745,6 +752,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } tier2 op(_BINARY_OP_SUBTRACT_INT_INPLACE_RIGHT, (left, right -- res, l, r)) { @@ -754,6 +762,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } tier2 op(_BINARY_OP_MULTIPLY_INT_INPLACE_RIGHT, (left, right -- res, l, r)) { @@ -763,6 +772,7 @@ dummy_func( l = left; r = right; INPUTS_DEAD(); + ERROR_IF(PyStackRef_IsError(_int_inplace_res)); } op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) { @@ -3314,14 +3324,13 @@ dummy_func( PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(_PyLong_IsCompact((PyLongObject *)left_o)); - assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); - assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && - _PyLong_DigitCount((PyLongObject *)right_o) <= 1); - Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); - Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int64_t ileft; + int64_t iright; + int ok = _PyLong_TryAsInt64Exact((PyLongObject *)left_o, &ileft) + && _PyLong_TryAsInt64Exact((PyLongObject *)right_o, &iright); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg + EXIT_IF(!ok); int sign_ish = COMPARISON_BIT(ileft, iright); l = left; r = right; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index c61690e8bd7240a..b4c94a2a53d05b5 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -618,10 +618,17 @@ gen_try_set_executing(PyGenObject *gen) break; \ } \ assert(_PyObject_IsUniquelyReferenced(target_o)); \ + PyLongObject *left_long = \ + (PyLongObject *)PyStackRef_AsPyObjectBorrow(left); \ + PyLongObject *right_long = \ + (PyLongObject *)PyStackRef_AsPyObjectBorrow(right); \ + if (!_PyLong_BothAreCompact(left_long, right_long)) { \ + break; \ + } \ Py_ssize_t left_val = _PyLong_CompactValue( \ - (PyLongObject *)PyStackRef_AsPyObjectBorrow(left)); \ + left_long); \ Py_ssize_t right_val = _PyLong_CompactValue( \ - (PyLongObject *)PyStackRef_AsPyObjectBorrow(right)); \ + right_long); \ Py_ssize_t result = left_val OP right_val; \ if (!_PY_IS_SMALL_INT(result) \ && ((twodigits)((stwodigits)result) + PyLong_MASK \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9aaf9639b9b9015..7ada13d23d19b6d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3827,7 +3827,8 @@ _PyStackRef left; left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); @@ -3848,7 +3849,8 @@ _PyStackRef _stack_item_0 = _tos_cache0; left = stack_pointer[-1]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache0 = _stack_item_0; SET_CURRENT_CACHED_VALUES(1); @@ -3871,7 +3873,8 @@ _PyStackRef _stack_item_1 = _tos_cache1; left = _stack_item_0; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache1 = _stack_item_1; _tos_cache0 = left; @@ -3894,7 +3897,8 @@ _PyStackRef _stack_item_2 = _tos_cache2; left = _stack_item_1; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache2 = _stack_item_2; _tos_cache1 = left; @@ -3916,7 +3920,8 @@ _PyStackRef value; value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); @@ -3936,7 +3941,8 @@ _PyStackRef _stack_item_0 = _tos_cache0; value = _stack_item_0; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache0 = value; SET_CURRENT_CACHED_VALUES(1); @@ -3956,7 +3962,8 @@ _PyStackRef _stack_item_1 = _tos_cache1; value = _stack_item_1; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache1 = value; _tos_cache0 = _stack_item_0; @@ -3979,7 +3986,8 @@ _PyStackRef _stack_item_2 = _tos_cache2; value = _stack_item_2; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache2 = value; _tos_cache1 = _stack_item_1; @@ -4002,7 +4010,8 @@ left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); @@ -4024,7 +4033,8 @@ left = stack_pointer[-1]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache0 = _stack_item_0; SET_CURRENT_CACHED_VALUES(1); @@ -4048,7 +4058,8 @@ left = _stack_item_0; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache1 = _stack_item_1; _tos_cache0 = left; @@ -4072,7 +4083,8 @@ left = _stack_item_1; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); assert(Py_TYPE(left_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)left_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)left_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache2 = _stack_item_2; _tos_cache1 = left; @@ -4095,7 +4107,8 @@ value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); assert(Py_TYPE(value_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)value_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); @@ -4116,7 +4129,8 @@ value = _stack_item_0; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); assert(Py_TYPE(value_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)value_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache0 = value; SET_CURRENT_CACHED_VALUES(1); @@ -4137,7 +4151,8 @@ value = _stack_item_1; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); assert(Py_TYPE(value_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)value_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache1 = value; _tos_cache0 = _stack_item_0; @@ -4161,7 +4176,8 @@ value = _stack_item_2; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); assert(Py_TYPE(value_o) == &PyLong_Type); - if (!_PyLong_IsCompact((PyLongObject *)value_o)) { + int ok = _PyLong_MightFitInt64((PyLongObject *)value_o); + if (!ok) { UOP_STAT_INC(uopcode, miss); _tos_cache2 = value; _tos_cache1 = _stack_item_1; @@ -4191,7 +4207,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4201,6 +4216,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4226,7 +4250,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4237,6 +4260,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4263,7 +4295,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4275,6 +4306,10 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4297,7 +4332,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4307,6 +4341,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4332,7 +4375,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4343,6 +4385,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4369,7 +4420,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4381,6 +4431,10 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4403,7 +4457,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4413,6 +4466,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4438,7 +4500,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4449,6 +4510,15 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4475,7 +4545,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -4487,6 +4556,10 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4514,6 +4587,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4545,6 +4627,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4578,6 +4669,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4605,6 +4700,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4636,6 +4740,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4669,6 +4782,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4696,6 +4813,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4727,6 +4853,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4760,6 +4895,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4787,6 +4926,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4818,6 +4966,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4851,6 +5008,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4878,6 +5039,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4909,6 +5079,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4942,6 +5121,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -4969,6 +5152,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-2] = res; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -5000,6 +5192,15 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + stack_pointer[-1] = res; + stack_pointer[0] = l; + stack_pointer[1] = r; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -5033,6 +5234,10 @@ res = _int_inplace_res; l = left; r = right; + if (PyStackRef_IsError(_int_inplace_res)) { + SET_CURRENT_CACHED_VALUES(0); + JUMP_TO_ERROR(); + } _tos_cache2 = r; _tos_cache1 = l; _tos_cache0 = res; @@ -13259,13 +13464,26 @@ left = _stack_item_0; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(_PyLong_IsCompact((PyLongObject *)left_o)); - assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); - assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && - _PyLong_DigitCount((PyLongObject *)right_o) <= 1); - Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); - Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int64_t ileft; + int64_t iright; + stack_pointer[0] = left; + stack_pointer[1] = right; + stack_pointer += 2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + int ok = _PyLong_TryAsInt64Exact((PyLongObject *)left_o, &ileft) + && _PyLong_TryAsInt64Exact((PyLongObject *)right_o, &iright); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!ok) { + UOP_STAT_INC(uopcode, miss); + _tos_cache1 = right; + _tos_cache0 = left; + SET_CURRENT_CACHED_VALUES(2); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + JUMP_TO_JUMP_TARGET(); + } int sign_ish = COMPARISON_BIT(ileft, iright); l = left; r = right; @@ -13274,6 +13492,8 @@ _tos_cache1 = l; _tos_cache0 = res; SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 94384d5db3c107f..0559054b7a403e8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -188,7 +188,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -198,7 +199,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -212,7 +214,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -222,6 +223,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -574,7 +578,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -584,7 +589,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -598,7 +604,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -608,6 +613,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -807,7 +815,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -981,7 +990,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1070,7 +1080,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1162,7 +1173,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1324,7 +1336,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1334,7 +1347,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(BINARY_OP); assert(_PyOpcode_Deopt[opcode] == (BINARY_OP)); JUMP_TO_PREDICTED(BINARY_OP); @@ -1348,7 +1362,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res)) { @@ -1358,6 +1371,9 @@ } l = left; r = right; + if (PyStackRef_IsError(res)) { + JUMP_TO_LABEL(pop_2_error); + } } // _POP_TOP_INT { @@ -5151,7 +5167,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -5161,7 +5178,8 @@ { left = stack_pointer[-2]; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - if (!_PyLong_CheckExactAndCompact(left_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(left_o); + if (!ok) { UPDATE_MISS_STATS(COMPARE_OP); assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); JUMP_TO_PREDICTED(COMPARE_OP); @@ -5173,13 +5191,18 @@ right = value; PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(_PyLong_IsCompact((PyLongObject *)left_o)); - assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); - assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && - _PyLong_DigitCount((PyLongObject *)right_o) <= 1); - Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); - Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int64_t ileft; + int64_t iright; + _PyFrame_SetStackPointer(frame, stack_pointer); + int ok = _PyLong_TryAsInt64Exact((PyLongObject *)left_o, &ileft) + && _PyLong_TryAsInt64Exact((PyLongObject *)right_o, &iright); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (!ok) { + UPDATE_MISS_STATS(COMPARE_OP); + assert(_PyOpcode_Deopt[opcode] == (COMPARE_OP)); + JUMP_TO_PREDICTED(COMPARE_OP); + } int sign_ish = COMPARISON_BIT(ileft, iright); l = left; r = right; @@ -12186,7 +12209,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(STORE_SUBSCR); assert(_PyOpcode_Deopt[opcode] == (STORE_SUBSCR)); JUMP_TO_PREDICTED(STORE_SUBSCR); @@ -12423,7 +12447,8 @@ { value = stack_pointer[-1]; PyObject *value_o = PyStackRef_AsPyObjectBorrow(value); - if (!_PyLong_CheckExactAndCompact(value_o)) { + int ok = _PyLong_CheckExactAndMightFitInt64(value_o); + if (!ok) { UPDATE_MISS_STATS(TO_BOOL); assert(_PyOpcode_Deopt[opcode] == (TO_BOOL)); JUMP_TO_PREDICTED(TO_BOOL); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 8895e02d47b1693..557b68ef15cca16 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -622,7 +622,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res_stackref )) { @@ -631,6 +630,9 @@ } l_stackref = left; r_stackref = right; + if (PyStackRef_IsError(res_stackref )) { + goto error; + } /* End of uop copied from bytecodes for constant evaluation */ (void)l_stackref; (void)r_stackref; @@ -693,7 +695,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res_stackref )) { @@ -702,6 +703,9 @@ } l_stackref = left; r_stackref = right; + if (PyStackRef_IsError(res_stackref )) { + goto error; + } /* End of uop copied from bytecodes for constant evaluation */ (void)l_stackref; (void)r_stackref; @@ -764,7 +768,6 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(PyLong_CheckExact(left_o)); assert(PyLong_CheckExact(right_o)); - assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); STAT_INC(BINARY_OP, hit); res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); if (PyStackRef_IsNull(res_stackref )) { @@ -773,6 +776,9 @@ } l_stackref = left; r_stackref = right; + if (PyStackRef_IsError(res_stackref )) { + goto error; + } /* End of uop copied from bytecodes for constant evaluation */ (void)l_stackref; (void)r_stackref; @@ -3063,13 +3069,15 @@ /* Start of uop copied from bytecodes for constant evaluation */ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - assert(_PyLong_IsCompact((PyLongObject *)left_o)); - assert(_PyLong_IsCompact((PyLongObject *)right_o)); STAT_INC(COMPARE_OP, hit); - assert(_PyLong_DigitCount((PyLongObject *)left_o) <= 1 && - _PyLong_DigitCount((PyLongObject *)right_o) <= 1); - Py_ssize_t ileft = _PyLong_CompactValue((PyLongObject *)left_o); - Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); + int64_t ileft; + int64_t iright; + int ok = _PyLong_TryAsInt64Exact((PyLongObject *)left_o, &ileft) + && _PyLong_TryAsInt64Exact((PyLongObject *)right_o, &iright); + if (!ok) { + ctx->done = true; + break; + } int sign_ish = COMPARISON_BIT(ileft, iright); l_stackref = left; r_stackref = right; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 79f81482d247e37..70b45f4e4962d0b 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -272,7 +272,7 @@ _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) if (const_val == NULL) { return false; } - if (_PyLong_CheckExactAndCompact(const_val)) { + if (_PyLong_CheckExactAndMightFitInt64(const_val)) { return true; } PyTypeObject *typ = Py_TYPE(const_val); @@ -566,7 +566,7 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef ref, PyObject *const_val) make_const(sym, const_val); return; case JIT_SYM_COMPACT_INT: - if (_PyLong_CheckExactAndCompact(const_val)) { + if (_PyLong_CheckExactAndMightFitInt64(const_val)) { make_const(sym, const_val); } else { @@ -970,7 +970,7 @@ _Py_uop_sym_is_compact_int(JitOptRef ref) { JitOptSymbol *sym = PyJitRef_Unwrap(ref); if (sym->tag == JIT_SYM_KNOWN_VALUE_TAG) { - return (bool)_PyLong_CheckExactAndCompact(sym->value.value); + return (bool)_PyLong_CheckExactAndMightFitInt64(sym->value.value); } return sym->tag == JIT_SYM_COMPACT_INT; } @@ -1008,7 +1008,7 @@ _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef ref) } return; case JIT_SYM_KNOWN_VALUE_TAG: - if (!_PyLong_CheckExactAndCompact(sym->value.value)) { + if (!_PyLong_CheckExactAndMightFitInt64(sym->value.value)) { Py_CLEAR(sym->value.value); sym_set_bottom(ctx, sym); } @@ -1954,7 +1954,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) TEST_PREDICATE(_Py_uop_sym_is_const(ctx, value) == true, "value is not constant"); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, value) == one_obj, "value is not 1"); - val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(66)); + val_big = PyNumber_Lshift(_PyLong_GetOne(), PyLong_FromLong(200)); if (val_big == NULL) { goto fail; } @@ -1963,7 +1963,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) JitOptRef ref_big = _Py_uop_sym_new_const(ctx, val_big); JitOptRef ref_int = _Py_uop_sym_new_compact_int(ctx); TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_42), "42 is not a compact int"); - TEST_PREDICATE(!_Py_uop_sym_is_compact_int(ref_big), "(1 << 66) is a compact int"); + TEST_PREDICATE(!_Py_uop_sym_is_compact_int(ref_big), "(1 << 200) is a compact int"); TEST_PREDICATE(_Py_uop_sym_is_compact_int(ref_int), "compact int is not a compact int"); TEST_PREDICATE(_Py_uop_sym_matches_type(ref_int, &PyLong_Type), "compact int is not an int"); diff --git a/Python/specialize.c b/Python/specialize.c index 2ff0a9d0072cec3..276a2512d7ba4c6 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2350,7 +2350,9 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in specialize(instr, BINARY_OP_ADD_UNICODE); return; } - if (_PyLong_CheckExactAndCompact(lhs) && _PyLong_CheckExactAndCompact(rhs)) { + if (_PyLong_CheckExactAndMightFitInt64(lhs) && + _PyLong_CheckExactAndMightFitInt64(rhs)) + { specialize(instr, BINARY_OP_ADD_INT); return; } @@ -2364,7 +2366,9 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in if (!Py_IS_TYPE(lhs, Py_TYPE(rhs))) { break; } - if (_PyLong_CheckExactAndCompact(lhs) && _PyLong_CheckExactAndCompact(rhs)) { + if (_PyLong_CheckExactAndMightFitInt64(lhs) && + _PyLong_CheckExactAndMightFitInt64(rhs)) + { specialize(instr, BINARY_OP_MULTIPLY_INT); return; } @@ -2378,7 +2382,9 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in if (!Py_IS_TYPE(lhs, Py_TYPE(rhs))) { break; } - if (_PyLong_CheckExactAndCompact(lhs) && _PyLong_CheckExactAndCompact(rhs)) { + if (_PyLong_CheckExactAndMightFitInt64(lhs) && + _PyLong_CheckExactAndMightFitInt64(rhs)) + { specialize(instr, BINARY_OP_SUBTRACT_INT); return; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 22a321b4953de7d..9350033054e0065 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -721,6 +721,8 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_Wrap", "PyStackRef_Unwrap", "_PyLong_CheckExactAndCompact", + "_PyLong_CheckExactAndMightFitInt64", + "_PyLong_MightFitInt64", "_PyExecutor_FromExit", "_PyJit_TryInitializeTracing", "_Py_unset_eval_breaker_bit", diff --git a/Tools/scripts/jit_int_benchmark.py b/Tools/scripts/jit_int_benchmark.py new file mode 100644 index 000000000000000..d049171dcf1c0d2 --- /dev/null +++ b/Tools/scripts/jit_int_benchmark.py @@ -0,0 +1,139 @@ +""" +Benchmark for JIT int fast path optimization. + +Measures performance of integer arithmetic in hot loops +where intermediate results overflow the 30-bit compact range. + +Usage: + python Tools/scripts/jit_int_benchmark.py + python -X jit Tools/scripts/jit_int_benchmark.py + PYTHON_JIT_STRESS=1 python -X jit Tools/scripts/jit_int_benchmark.py + +For statistically rigorous comparison: + python -m pyperf timeit "..." # microbenchmark + pyperformance run -b # application benchmarks +""" + +import sys +import time +import statistics + + +# Benchmark configurations +N = 5_000_000 +WARMUP = 3 +RUNS = 7 + + +def bench_intermediate_overflow(): + """a + b exceeds 30-bit compact range, but (a+b)-c fits back in. + Tests the case where JIT would exit on the intermediate result.""" + a = (1 << 30) - 1 + b = (1 << 30) - 1000 + c = 1 << 30 + total = 0 + for i in range(N): + total += a + b - c + return total + + +def bench_double_add(): + """Two adds in a row: (a+b)+c where intermediate is non-compact.""" + a = (1 << 30) - 1 + b = (1 << 30) - 1000 + c = -500 + total = 0 + for i in range(N): + total += a + b + c + return total + + +def bench_accumulate(): + """Values slowly grow through the compact boundary. + Tests the case where a non-compact value is accumulated.""" + total = 0 + for i in range(N): + total += 1000000 + return total + + +def bench_always_large(): + """Both inputs are compact, but the result always exceeds compact range. + Tests the result widening optimization.""" + total = 0 + a = 1 << 29 + b = 1 << 29 + for i in range(N): + total += a + b + total &= (1 << 62) - 1 + return total + + +def bench_mixed(): + """Mix of small and medium ints alternating. + Tests ability to stay in JIT trace across varying int sizes.""" + total = 0 + for i in range(N): + if i & 1: + total += (1 << 30) + (i & 0xFF) + else: + total += i & 0xFFF + total &= (1 << 61) - 1 + return total + + +def bench_chain(): + """Chained operations: a + b + c + d. + Tests consecutive int ops without boxing between them.""" + total = 0 + for i in range(N // 4): + total += i + i + i + i + return total + + +def run_benchmark(name, fn, runs=RUNS, warmup=WARMUP): + # Warmup + for _ in range(warmup): + fn() + + # Timed runs + times = [] + last_result = None + for _ in range(runs): + start = time.perf_counter() + last_result = fn() + elapsed = time.perf_counter() - start + times.append(elapsed) + + mean = statistics.mean(times) + stdev = statistics.stdev(times) if len(times) > 1 else 0 + ns_per = mean / N * 1e9 + print( + f" {name:30s} {mean * 1000:7.2f} ms {ns_per:5.1f} ns/iter" + f" ±{stdev / mean * 100:4.1f}% (verify={str(last_result)[:12]})" + ) + + +if __name__ == "__main__": + jit = "jit" in sys._xoptions or "PYTHON_JIT" in __import__("os").environ + print( + f"Python {sys.version.split()[0]} ({sys.platform})" + f" JIT={'on' if jit else 'off'} PYTHON_JIT_STRESS=" + f"{__import__('os').environ.get('PYTHON_JIT_STRESS', '0')}" + ) + print() + + benchmarks = [ + ("intermediate_overflow", bench_intermediate_overflow), + ("double_add", bench_double_add), + ("always_large", bench_always_large), + ("accumulate", bench_accumulate), + ("mixed", bench_mixed), + ("chain", bench_chain), + ] + + for name, fn in benchmarks: + run_benchmark(name, fn) + + print() + print("Done.") diff --git a/Tools/scripts/jit_int_benchmark_pyperf.py b/Tools/scripts/jit_int_benchmark_pyperf.py new file mode 100644 index 000000000000000..2caf615d556b2c1 --- /dev/null +++ b/Tools/scripts/jit_int_benchmark_pyperf.py @@ -0,0 +1,303 @@ +"""pyperf microbenchmark for JIT int fast path. + +Measures int arithmetic performance in hot JIT loops where +intermediate results overflow the 30-bit compact range. + +Usage: + ./python Tools/scripts/jit_int_benchmark_pyperf.py -o nojit.json + ./python -X jit Tools/scripts/jit_int_benchmark_pyperf.py -o jit.json + ./python -m pyperf compare_to nojit.json jit.json +""" + +import time + + +def bench_intermediate_overflow(loops): + """a + b exceeds 30-bit range, (a+b)-c fits back in.""" + a = (1 << 30) - 1 + b = (1 << 30) - 1000 + c = 1 << 30 + t0 = time.perf_counter() + for _ in range(loops): + # inner loop: 50 ops to amplify the signal + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + x = a + b - c + return time.perf_counter() - t0 + + +def bench_double_add(loops): + """(a+b)+c where a+b is non-compact but a,b,c are compact.""" + a = (1 << 30) - 1 + b = (1 << 30) - 1000 + c = -500 + t0 = time.perf_counter() + for _ in range(loops): + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + x = a + b + c + return time.perf_counter() - t0 + + +def bench_accumulate(loops): + """Values grow through compact boundary (2^30).""" + total = 0 + t0 = time.perf_counter() + for _ in range(loops): + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + total += 1000000 + return time.perf_counter() - t0 + + +def bench_always_large(loops): + """Compact inputs, non-compact result.""" + total = 0 + va = 1 << 29 + vb = 1 << 29 + t0 = time.perf_counter() + for _ in range(loops): + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + total += va + vb + total &= (1 << 62) - 1 + return time.perf_counter() - t0 + + +def bench_mixed(loops): + """Alternating small/medium ints.""" + total = 0 + t0 = time.perf_counter() + for _ in range(loops): + total += (1 << 30) + 1 + total += 100 + total += (1 << 30) + 2 + total += 200 + total += (1 << 30) + 3 + total += 300 + total += (1 << 30) + 4 + total += 400 + total += (1 << 30) + 5 + total += 500 + return time.perf_counter() - t0 + + +def bench_small(loops): + """All small ints, bounded to stay in the small-int cache.""" + total = 0 + t0 = time.perf_counter() + for _ in range(loops): + total = (total + 1) & 255 + total = (total + 2) & 255 + total = (total + 3) & 255 + total = (total + 4) & 255 + total = (total + 5) & 255 + total = (total + 6) & 255 + total = (total + 7) & 255 + total = (total + 8) & 255 + total = (total + 9) & 255 + total = (total + 10) & 255 + total = (total + 11) & 255 + total = (total + 12) & 255 + total = (total + 13) & 255 + total = (total + 14) & 255 + total = (total + 15) & 255 + total = (total + 16) & 255 + total = (total + 17) & 255 + total = (total + 18) & 255 + total = (total + 19) & 255 + total = (total + 20) & 255 + return time.perf_counter() - t0 + + +def bench_compact(loops): + """Compact ints outside the small-int cache, bounded below 2**30.""" + total = 1 << 20 + mask = (1 << 29) - 1 + t0 = time.perf_counter() + for _ in range(loops): + total = (total + 10_000_001) & mask + total = (total + 10_000_003) & mask + total = (total + 10_000_019) & mask + total = (total + 10_000_079) & mask + total = (total + 10_000_103) & mask + total = (total + 10_000_121) & mask + total = (total + 10_000_123) & mask + total = (total + 10_000_133) & mask + total = (total + 10_000_139) & mask + total = (total + 10_000_159) & mask + return time.perf_counter() - t0 + + +BENCHMARKS = [ + ("jit_int_small", bench_small), + ("jit_int_compact", bench_compact), + ("jit_int_intermediate_overflow", bench_intermediate_overflow), + ("jit_int_double_add", bench_double_add), + ("jit_int_accumulate", bench_accumulate), + ("jit_int_always_large", bench_always_large), + ("jit_int_mixed", bench_mixed), +] + +if __name__ == "__main__": + import pyperf + runner = pyperf.Runner() + for name, fn in BENCHMARKS: + runner.bench_time_func(name, fn)