From 24c6bbc92b6dd0ce9b7ff799049498299f70f97d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 27 May 2026 20:56:38 +0300 Subject: [PATCH 1/2] gh-84353: Preserve non-UTF-8 filenames when appending to ZipFile (GH-150091) Preserve non-UTF-8 filenames when appending to a ZipFile. --------- Co-authored-by: Gregory P. Smith --- Lib/test/test_zipfile/test_core.py | 40 +++++++++++-------- Lib/zipfile/__init__.py | 8 +++- ...6-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst | 5 +++ 3 files changed, 35 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 30550263ad50aab..ffed328b171fda2 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3640,29 +3640,23 @@ def test_read_with_unsuitable_metadata_encoding(self): def test_read_after_append(self): newname = '\u56db' # Han 'four' - expected_names = [name.encode('shift_jis').decode('cp437') - for name in self.file_names[:2]] + self.file_names[2:] - expected_names.append(newname) - expected_content = (*self.file_content, b"newcontent") + newname2 = 'fünf' # representable in cp437, but still stored as UTF-8 + expected_names = [*self.file_names, newname, newname2] + mojibake_expected_names = [name.encode('shift_jis').decode('cp437') + if i < 2 else name + for i, name in enumerate(expected_names)] + expected_content = (*self.file_content, b"newcontent", b"newcontent2") with zipfile.ZipFile(TESTFN, "a") as zipfp: zipfp.writestr(newname, "newcontent") - self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) + zipfp.writestr(newname2, "newcontent2") + self.assertEqual(sorted(zipfp.namelist()), sorted(mojibake_expected_names)) with zipfile.ZipFile(TESTFN, "r") as zipfp: - self._test_read(zipfp, expected_names, expected_content) + self._test_read(zipfp, mojibake_expected_names, expected_content) with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: - self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) - for i, (name, content) in enumerate(zip(expected_names, expected_content)): - info = zipfp.getinfo(name) - self.assertEqual(info.filename, name) - self.assertEqual(info.file_size, len(content)) - if i < 2: - with self.assertRaises(zipfile.BadZipFile): - zipfp.read(name) - else: - self.assertEqual(zipfp.read(name), content) + self._test_read(zipfp, expected_names, expected_content) def test_write_with_metadata_encoding(self): ZF = zipfile.ZipFile @@ -3671,6 +3665,20 @@ def test_write_with_metadata_encoding(self): "^metadata_encoding is only"): ZF("nonesuch.zip", mode, metadata_encoding="shift_jis") + def test_add_comment(self): + with zipfile.ZipFile(TESTFN, "r") as zipfp: + mojibake_expected_names = zipfp.namelist() + + with zipfile.ZipFile(TESTFN, "a") as zipfp: + zipfp.comment = b'comment' + self.assertEqual(zipfp.namelist(), mojibake_expected_names) + + with zipfile.ZipFile(TESTFN, "r") as zipfp: + self._test_read(zipfp, mojibake_expected_names, self.file_content) + + with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: + self._test_read(zipfp, self.file_names, self.file_content) + def test_cli_with_metadata_encoding(self): errmsg = "Non-conforming encodings not supported with -c." args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"] diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index d91cb509a6ff4ff..71e4dd4f6f625ce 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -566,8 +566,12 @@ def FileHeader(self, zip64=None): return header + filename + extra def _encodeFilenameFlags(self): + if self.flag_bits & _MASK_UTF_FILENAME: + encoding = 'ascii' + else: + encoding = 'cp437' try: - return self.filename.encode('ascii'), self.flag_bits + return self.filename.encode(encoding), self.flag_bits & ~_MASK_UTF_FILENAME except UnicodeEncodeError: return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME @@ -1812,7 +1816,7 @@ def _open_to_write(self, zinfo, force_zip64=False): zinfo.compress_size = 0 zinfo.CRC = 0 - zinfo.flag_bits = 0x00 + zinfo.flag_bits = _MASK_UTF_FILENAME if zinfo.compress_type == ZIP_LZMA: # Compressed data includes an end-of-stream (EOS) marker zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 diff --git a/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst new file mode 100644 index 000000000000000..84fb12e2abd81a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst @@ -0,0 +1,5 @@ +Preserve non-UTF-8 encoded filenames when appending to a +:class:`zipfile.ZipFile`. Previously, non-ASCII names stored in a legacy +encoding (without the UTF-8 flag bit set) could be corrupted when the +central directory was rewritten: they were decoded as cp437 and then +re-stored as UTF-8. From 9242700c149c490c56d2a415b395b5f51d94a49a Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Wed, 27 May 2026 23:03:34 +0100 Subject: [PATCH 2/2] gh-149029: Update SQLite to 3.53.1 for binary releases (#149767) --- Mac/BuildScript/build-installer.py | 6 +++--- .../2026-04-26-23-14-45.gh-issue-149029.oPTXP4.rst | 1 + .../macOS/2026-04-26-23-15-09.gh-issue-149029.Lsx--T.rst | 1 + Misc/externals.spdx.json | 8 ++++---- PCbuild/get_externals.bat | 2 +- PCbuild/python.props | 2 +- PCbuild/readme.txt | 2 +- Platforms/Android/__main__.py | 2 +- 8 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2026-04-26-23-14-45.gh-issue-149029.oPTXP4.rst create mode 100644 Misc/NEWS.d/next/macOS/2026-04-26-23-15-09.gh-issue-149029.Lsx--T.rst diff --git a/Mac/BuildScript/build-installer.py b/Mac/BuildScript/build-installer.py index d533723a502e8e0..d4df8fbc42ddcba 100755 --- a/Mac/BuildScript/build-installer.py +++ b/Mac/BuildScript/build-installer.py @@ -359,9 +359,9 @@ def library_recipes(): ), ), dict( - name="SQLite 3.50.4", - url="https://www.sqlite.org/2025/sqlite-autoconf-3500400.tar.gz", - checksum="a3db587a1b92ee5ddac2f66b3edb41b26f9c867275782d46c3a088977d6a5b18", + name="SQLite 3.53.1", + url="https://www.sqlite.org/2026/sqlite-autoconf-3530100.tar.gz", + checksum="83e6b2020a034e9a7ad4a72feea59e1ad52f162e09cbd26735a3ffb98359fc4f", extra_cflags=('-Os ' '-DSQLITE_ENABLE_FTS5 ' '-DSQLITE_ENABLE_FTS4 ' diff --git a/Misc/NEWS.d/next/Windows/2026-04-26-23-14-45.gh-issue-149029.oPTXP4.rst b/Misc/NEWS.d/next/Windows/2026-04-26-23-14-45.gh-issue-149029.oPTXP4.rst new file mode 100644 index 000000000000000..6c4c6403b989847 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2026-04-26-23-14-45.gh-issue-149029.oPTXP4.rst @@ -0,0 +1 @@ +Update Windows installer to ship with SQLite 3.53.1. diff --git a/Misc/NEWS.d/next/macOS/2026-04-26-23-15-09.gh-issue-149029.Lsx--T.rst b/Misc/NEWS.d/next/macOS/2026-04-26-23-15-09.gh-issue-149029.Lsx--T.rst new file mode 100644 index 000000000000000..157a70f5e3cefc9 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2026-04-26-23-15-09.gh-issue-149029.Lsx--T.rst @@ -0,0 +1 @@ +Update macOS installer to ship with SQLite version 3.53.1. diff --git a/Misc/externals.spdx.json b/Misc/externals.spdx.json index 9a571fba732ab4a..080330c1cb75a53 100644 --- a/Misc/externals.spdx.json +++ b/Misc/externals.spdx.json @@ -91,21 +91,21 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "fb5ab81f27612b0a7b4861ba655906c76dc85ee969e7a4905d2075aff931e8d0" + "checksumValue": "15e8fc7dc059f7b156e53629540951c2691acd71e027f6f8f66dacab5c66c884" } ], - "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/sqlite-3.50.4.0.tar.gz", + "downloadLocation": "https://github.com/python/cpython-source-deps/archive/refs/tags/sqlite-3.53.1.0.tar.gz", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:sqlite:sqlite:3.50.4.0:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:sqlite:sqlite:3.53.1.0:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], "licenseConcluded": "NOASSERTION", "name": "sqlite", "primaryPackagePurpose": "SOURCE", - "versionInfo": "3.50.4.0" + "versionInfo": "3.53.1.0" }, { "SPDXID": "SPDXRef-PACKAGE-tcl", diff --git a/PCbuild/get_externals.bat b/PCbuild/get_externals.bat index 368bc489bfa9680..f6ba3d0fef3a60b 100644 --- a/PCbuild/get_externals.bat +++ b/PCbuild/get_externals.bat @@ -56,7 +56,7 @@ set libraries=%libraries% bzip2-1.0.8 if NOT "%IncludeLibffiSrc%"=="false" set libraries=%libraries% libffi-3.4.4 if NOT "%IncludeSSLSrc%"=="false" set libraries=%libraries% openssl-3.5.6 set libraries=%libraries% mpdecimal-4.0.0 -set libraries=%libraries% sqlite-3.50.4.0 +set libraries=%libraries% sqlite-3.53.1.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tcl-9.0.3.0 if NOT "%IncludeTkinterSrc%"=="false" set libraries=%libraries% tk-9.0.3.1 set libraries=%libraries% xz-5.8.1.1 diff --git a/PCbuild/python.props b/PCbuild/python.props index f70321f887ef8c0..edcda8fd8fc55d9 100644 --- a/PCbuild/python.props +++ b/PCbuild/python.props @@ -98,7 +98,7 @@ - $(ExternalsDir)sqlite-3.50.4.0\ + $(ExternalsDir)sqlite-3.53.1.0\ $(ExternalsDir)bzip2-1.0.8\ $(ExternalsDir)xz-5.8.1.1\ $(ExternalsDir)libffi-3.4.4\ diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 6aecbfff182dcb4..ea8adf21c279a68 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -242,7 +242,7 @@ _ssl again when building. _sqlite3 - Wraps SQLite 3.50.4, which is itself built by sqlite3.vcxproj + Wraps SQLite 3.53.1, which is itself built by sqlite3.vcxproj Homepage: https://www.sqlite.org/ diff --git a/Platforms/Android/__main__.py b/Platforms/Android/__main__.py index d2546cf76c206b0..5c41aaca6ebf0b4 100755 --- a/Platforms/Android/__main__.py +++ b/Platforms/Android/__main__.py @@ -220,7 +220,7 @@ def unpack_deps(host, prefix_dir, cache_dir): "bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.5.6-0", - "sqlite-3.50.4-0", + "sqlite-3.53.1-0", "xz-5.4.6-1", "zstd-1.5.7-2" ]: