From 5b6079ba2b6ea7fef098acc117628b9ac05f332d Mon Sep 17 00:00:00 2001 From: Alexander Ljungberg Date: Wed, 18 Oct 2023 13:17:03 +0100 Subject: [PATCH 1/5] gh-110941: Fix `json.dump` encoding dict subclasses as empty. See #110941 for full details but the tldr is that the C optimised JSON encoder did not handle `dict` subclasses correctly and encoded them as empty if they did not put something into `super()`'s storage. --- Lib/test/test_json/test_default.py | 39 ++++++++++++++++++++++++++++++ Modules/_json.c | 10 ++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_json/test_default.py b/Lib/test/test_json/test_default.py index 3ce16684a08272d..ecb234a6a7102b5 100644 --- a/Lib/test/test_json/test_default.py +++ b/Lib/test/test_json/test_default.py @@ -1,7 +1,29 @@ import collections +import collections.abc from test.test_json import PyTest, CTest +class CustomIndexDict(collections.abc.Mapping, dict): + # Using `Mapping` here to make this a complete dict subclass, but note the bug in #110941 + # is not specific to `Mapping` subclasses and the outcome would have been the same with a + # fully manually implemented dict subclass. + + def __init__(self, keys: tuple = ()): + self._keys = keys + + def __getitem__(self, k): + try: + return self._keys.index(k) + except ValueError: + raise KeyError(k) + + def __iter__(self): + return iter(self._keys) + + def __len__(self): + return len(self._keys) + + class TestDefault: def test_default(self): self.assertEqual( @@ -18,6 +40,23 @@ def test_ordereddict(self): self.dumps(od, sort_keys=True), '{"a": 1, "b": 2, "c": 3, "d": 4}') + # This should behave identically for PyTest and CTest: see #110941. + def test_custom_dict(self): + cd = CustomIndexDict(("b", "a")) + self.assertEqual( + self.dumps(cd), + '{"b": 0, "a": 1}') + self.assertEqual( + self.dumps(cd, sort_keys=True), + '{"a": 1, "b": 0}') + + def test_empty_custom_dict(self): + # Exercise the fast path when a dict subclass is empty. + cd = CustomIndexDict() + self.assertEqual( + self.dumps(cd), + '{}') + class TestPyDefault(TestDefault, PyTest): pass class TestCDefault(TestDefault, CTest): pass diff --git a/Modules/_json.c b/Modules/_json.c index 41495e2012f1522..18db70430f4182e 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1518,8 +1518,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *key, *value; bool first = true; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_CheckExact(dct)) { + if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ + return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + } else { + // Note that as noted in #55186, we can't use `PyDict_Size` here since we're dealing with a subclass. + if (PyMapping_Size(dct) == 0) /* Fast path for subclasses */ + return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; From 40a62970e4d12d79295ba80a65af7e33e564a9c3 Mon Sep 17 00:00:00 2001 From: Alexander Ljungberg Date: Wed, 18 Oct 2023 13:31:52 +0100 Subject: [PATCH 2/5] gh-110941: Add NEWS blurb. --- .../next/Library/2023-10-18-13-31-10.gh-issue-110941.UVrYGE.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-10-18-13-31-10.gh-issue-110941.UVrYGE.rst diff --git a/Misc/NEWS.d/next/Library/2023-10-18-13-31-10.gh-issue-110941.UVrYGE.rst b/Misc/NEWS.d/next/Library/2023-10-18-13-31-10.gh-issue-110941.UVrYGE.rst new file mode 100644 index 000000000000000..5824b593460e9be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-18-13-31-10.gh-issue-110941.UVrYGE.rst @@ -0,0 +1,2 @@ +Fix ``json.dump`` and ``json.dumps`` encoding certain dict subclasses as +empty. From cd34712b98c1b4e03fac981bf93467c7b568f83f Mon Sep 17 00:00:00 2001 From: Alexander Ljungberg Date: Mon, 20 Nov 2023 13:25:24 +0000 Subject: [PATCH 3/5] Make implicit curly braces explicit as per PEP 0007. Co-authored-by: Pieter Eendebak --- Modules/_json.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 18db70430f4182e..d2bd2bf7c10d419 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1519,8 +1519,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, bool first = true; if (PyDict_CheckExact(dct)) { - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ + if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + } } else { // Note that as noted in #55186, we can't use `PyDict_Size` here since we're dealing with a subclass. if (PyMapping_Size(dct) == 0) /* Fast path for subclasses */ From 74ded68f2d427babea35a8f38b5b2a5b1ce28e88 Mon Sep 17 00:00:00 2001 From: Alexander Ljungberg Date: Mon, 20 Nov 2023 13:26:03 +0000 Subject: [PATCH 4/5] Make implicit curly braces explicit as per PEP 0007. Co-authored-by: Pieter Eendebak --- Modules/_json.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index d2bd2bf7c10d419..107e45a1f21e5ce 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1524,8 +1524,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } else { // Note that as noted in #55186, we can't use `PyDict_Size` here since we're dealing with a subclass. - if (PyMapping_Size(dct) == 0) /* Fast path for subclasses */ + if (PyMapping_Size(dct) == 0) { /* Fast path for subclasses */ return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + } } if (s->markers != Py_None) { From ff4d89863c388aaedf498058b41a971818a0c965 Mon Sep 17 00:00:00 2001 From: Alexander Ljungberg Date: Mon, 20 Nov 2023 13:26:44 +0000 Subject: [PATCH 5/5] Rewording. Co-authored-by: Pieter Eendebak --- Modules/_json.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_json.c b/Modules/_json.c index 107e45a1f21e5ce..04bf88172823017 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1523,7 +1523,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); } } else { - // Note that as noted in #55186, we can't use `PyDict_Size` here since we're dealing with a subclass. + // we can't use `PyDict_Size` here since we're dealing with a subclass, see #55186 if (PyMapping_Size(dct) == 0) { /* Fast path for subclasses */ return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); }