From e51580105c2eaae81c6da3b2785fec0690de6ae7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 1 Jun 2026 19:21:01 +0300 Subject: [PATCH] gh-93417: Improve test_mimetypes * Separate tests for module-level API and for the MimeTypes class. * Add tests for mimetypes.init() and MimeTypes() with knownfiles and with explicitly passed files. --- Lib/test/mime.types2 | 3 + Lib/test/test_mimetypes.py | 359 +++++++++++++++++++++---------------- 2 files changed, 206 insertions(+), 156 deletions(-) create mode 100644 Lib/test/mime.types2 diff --git a/Lib/test/mime.types2 b/Lib/test/mime.types2 new file mode 100644 index 00000000000000..b05c318d1c05c2 --- /dev/null +++ b/Lib/test/mime.types2 @@ -0,0 +1,3 @@ +# MIME type Extensions +testing/test2 test test2 +testing/test3 test test3 diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index eccdc5bdf70795..1a3b49b87b121f 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -31,50 +31,11 @@ def tearDownModule(): mimetypes.knownfiles = knownfiles -class MimeTypesTestCase(unittest.TestCase): +class MimeTypesModuleTestCase(unittest.TestCase): def setUp(self): - self.db = mimetypes.MimeTypes() - - def test_case_sensitivity(self): - eq = self.assertEqual - eq(self.db.guess_file_type("foobar.html"), ("text/html", None)) - eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None)) - eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None)) - eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None)) - eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip")) - eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip")) - eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip")) - eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip")) - eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress")) - eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress")) - eq(self.db.guess_file_type("foobar.tar.z"), (None, None)) - eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None)) - - def test_default_data(self): - eq = self.assertEqual - eq(self.db.guess_file_type("foo.html"), ("text/html", None)) - eq(self.db.guess_file_type("foo.HTML"), ("text/html", None)) - eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip")) - eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip")) - eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress")) - eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2")) - eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz")) - - def test_data_urls(self): - eq = self.assertEqual - guess_type = self.db.guess_type - eq(guess_type("data:invalidDataWithoutComma"), (None, None)) - eq(guess_type("data:,thisIsTextPlain"), ("text/plain", None)) - eq(guess_type("data:;base64,thisIsTextPlain"), ("text/plain", None)) - eq(guess_type("data:text/x-foo,thisIsTextXFoo"), ("text/x-foo", None)) - - def test_file_parsing(self): - eq = self.assertEqual - sio = io.StringIO("x-application/x-unittest pyunit\n") - self.db.readfp(sio) - eq(self.db.guess_file_type("foo.pyunit"), - ("x-application/x-unittest", None)) - eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit") + mimetypes.inited = False + mimetypes._default_mime_types() + mimetypes._db = None def test_read_mime_types(self): eq = self.assertEqual @@ -109,100 +70,6 @@ def test_read_mime_types(self): mock_open.assert_called_with(filename, encoding='utf-8') eq(mime_dict[".Français"], "application/no-mans-land") - def test_non_standard_types(self): - eq = self.assertEqual - # First try strict - eq(self.db.guess_file_type('foo.xul', strict=True), (None, None)) - # And then non-strict - eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None)) - eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None)) - eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None)) - eq(self.db.guess_extension('image/jpeg', strict=False), '.jpg') - eq(self.db.guess_extension('image/JPEG', strict=False), '.jpg') - - def test_filename_with_url_delimiters(self): - # bpo-38449: URL delimiters cases should be handled also. - # They would have different mime types if interpreted as URL as - # compared to when interpreted as filename because of the semicolon. - eq = self.assertEqual - gzip_expected = ('application/x-tar', 'gzip') - for name in ( - ';1.tar.gz', - '?1.tar.gz', - '#1.tar.gz', - '#1#.tar.gz', - ';1#.tar.gz', - ';&1=123;?.tar.gz', - '?k1=v1&k2=v2.tar.gz', - ): - for prefix in ('', '/', '\\', - 'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\', - '//share/server/', '\\\\share\\server\\'): - path = prefix + name - with self.subTest(path=path): - eq(self.db.guess_file_type(path), gzip_expected) - eq(self.db.guess_type(path), gzip_expected) - expected = (None, None) if os.name == 'nt' else gzip_expected - for prefix in ('//', '\\\\', '//share/', '\\\\share\\'): - path = prefix + name - with self.subTest(path=path): - eq(self.db.guess_file_type(path), expected) - eq(self.db.guess_type(path), expected) - eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) - eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) - - eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip')) - eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip')) - expected = (None, 'gzip') if os.name == 'nt' else gzip_expected - eq(self.db.guess_file_type(r'foo\.tar.gz'), expected) - eq(self.db.guess_type(r'foo\.tar.gz'), expected) - eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected) - - def test_url(self): - result = self.db.guess_type('http://example.com/host.html') - result = self.db.guess_type('http://host.html') - msg = 'URL only has a host name, not a file' - self.assertSequenceEqual(result, (None, None), msg) - result = self.db.guess_type('http://example.com/host.html') - msg = 'Should be text/html' - self.assertSequenceEqual(result, ('text/html', None), msg) - result = self.db.guess_type('http://example.com/host.html#x.tar') - self.assertSequenceEqual(result, ('text/html', None)) - result = self.db.guess_type('http://example.com/host.html?q=x.tar') - self.assertSequenceEqual(result, ('text/html', None)) - - def test_guess_all_types(self): - # First try strict. Use a set here for testing the results because if - # test_urllib2 is run before test_mimetypes, global state is modified - # such that the 'all' set will have more items in it. - all = self.db.guess_all_extensions('text/plain', strict=True) - self.assertTrue(set(all) >= {'.bat', '.c', '.h', '.ksh', '.pl', '.txt'}) - self.assertEqual(len(set(all)), len(all)) # no duplicates - # And now non-strict - all = self.db.guess_all_extensions('image/jpeg', strict=False) - self.assertEqual(all, ['.jpg', '.jpe', '.jpeg']) - # And now for no hits - all = self.db.guess_all_extensions('image/jpg', strict=True) - self.assertEqual(all, []) - # And now for type existing in both strict and non-strict mappings. - self.db.add_type('test-type', '.strict-ext') - self.db.add_type('test-type', '.non-strict-ext', strict=False) - all = self.db.guess_all_extensions('test-type', strict=False) - self.assertEqual(all, ['.strict-ext', '.non-strict-ext']) - all = self.db.guess_all_extensions('test-type') - self.assertEqual(all, ['.strict-ext']) - # Test that changing the result list does not affect the global state - all.append('.no-such-ext') - all = self.db.guess_all_extensions('test-type') - self.assertNotIn('.no-such-ext', all) - - def test_encoding(self): - filename = support.findfile("mime.types") - mimes = mimetypes.MimeTypes([filename]) - exts = mimes.guess_all_extensions('application/vnd.geocube+xml', - strict=True) - self.assertEqual(exts, ['.g3', '.g\xb3']) - def test_init_reinitializes(self): # Issue 4936: make sure an init starts clean # First, put some poison into the types table @@ -341,6 +208,205 @@ def test_init_stability(self): self.assertEqual(types_map, mimetypes.types_map) self.assertEqual(common_types, mimetypes.common_types) + def test_init_files(self): + guess_file_type = mimetypes.guess_file_type + self.assertEqual(guess_file_type("file.test2")[0], None) + + filename = support.findfile("mime.types2") + mimetypes.init([filename]) + self.assertEqual(guess_file_type("file.test2")[0], "testing/test2") + + mimetypes.init() + self.assertEqual(guess_file_type("file.test2")[0], None) + + def test_init_knownfiles(self): + guess_file_type = mimetypes.guess_file_type + self.assertEqual(guess_file_type("file.test2")[0], None) + + filename = support.findfile("mime.types2") + mimetypes.knownfiles = [filename, "non-existent"] + self.addCleanup(mimetypes.knownfiles.clear) + + mimetypes.init() + self.assertEqual(guess_file_type("file.test2")[0], "testing/test2") + + def test_added_types_are_used(self): + mimetypes.add_type('testing/default-type', '') + mime_type, _ = mimetypes.guess_type('') + self.assertEqual(mime_type, 'testing/default-type') + + mime_type, _ = mimetypes.guess_type('test.myext') + self.assertEqual(mime_type, None) + + mimetypes.add_type('testing/type', '.myext') + mime_type, _ = mimetypes.guess_type('test.myext') + self.assertEqual(mime_type, 'testing/type') + + def test_add_type_with_undotted_extension_not_supported(self): + msg = "Extension 'undotted' must start with '.'" + with self.assertRaisesRegex(ValueError, msg): + mimetypes.add_type("testing/type", "undotted") + with self.assertRaisesRegex(ValueError, msg): + mimetypes.add_type("", "undotted") + + +class MimeTypesClassTestCase(unittest.TestCase): + def setUp(self): + self.db = mimetypes.MimeTypes() + + def test_init_files(self): + guess_file_type = self.db.guess_file_type + self.assertEqual(guess_file_type("file.test2")[0], None) + + filename = support.findfile("mime.types2") + db = mimetypes.MimeTypes([filename]) + guess_file_type = db.guess_file_type + self.assertEqual(guess_file_type("file.test2")[0], "testing/test2") + + def test_init_knownfiles(self): + filename = support.findfile("mime.types2") + mimetypes.knownfiles = [filename, "non-existent"] + self.addCleanup(mimetypes.knownfiles.clear) + + db = mimetypes.MimeTypes() + guess_file_type = db.guess_file_type + self.assertEqual(guess_file_type("file.test2")[0], None) + + def test_case_sensitivity(self): + eq = self.assertEqual + eq(self.db.guess_file_type("foobar.html"), ("text/html", None)) + eq(self.db.guess_type("scheme:foobar.html"), ("text/html", None)) + eq(self.db.guess_file_type("foobar.HTML"), ("text/html", None)) + eq(self.db.guess_type("scheme:foobar.HTML"), ("text/html", None)) + eq(self.db.guess_file_type("foobar.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_type("scheme:foobar.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foobar.TGZ"), ("application/x-tar", "gzip")) + eq(self.db.guess_type("scheme:foobar.TGZ"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foobar.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_type("scheme:foobar.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_file_type("foobar.tar.z"), (None, None)) + eq(self.db.guess_type("scheme:foobar.tar.z"), (None, None)) + + def test_default_data(self): + eq = self.assertEqual + eq(self.db.guess_file_type("foo.html"), ("text/html", None)) + eq(self.db.guess_file_type("foo.HTML"), ("text/html", None)) + eq(self.db.guess_file_type("foo.tgz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foo.tar.gz"), ("application/x-tar", "gzip")) + eq(self.db.guess_file_type("foo.tar.Z"), ("application/x-tar", "compress")) + eq(self.db.guess_file_type("foo.tar.bz2"), ("application/x-tar", "bzip2")) + eq(self.db.guess_file_type("foo.tar.xz"), ("application/x-tar", "xz")) + + def test_data_urls(self): + eq = self.assertEqual + guess_type = self.db.guess_type + eq(guess_type("data:invalidDataWithoutComma"), (None, None)) + eq(guess_type("data:,thisIsTextPlain"), ("text/plain", None)) + eq(guess_type("data:;base64,thisIsTextPlain"), ("text/plain", None)) + eq(guess_type("data:text/x-foo,thisIsTextXFoo"), ("text/x-foo", None)) + + def test_file_parsing(self): + eq = self.assertEqual + sio = io.StringIO("x-application/x-unittest pyunit\n") + self.db.readfp(sio) + eq(self.db.guess_file_type("foo.pyunit"), + ("x-application/x-unittest", None)) + eq(self.db.guess_extension("x-application/x-unittest"), ".pyunit") + + def test_non_standard_types(self): + eq = self.assertEqual + # First try strict + eq(self.db.guess_file_type('foo.xul', strict=True), (None, None)) + # And then non-strict + eq(self.db.guess_file_type('foo.xul', strict=False), ('text/xul', None)) + eq(self.db.guess_file_type('foo.XUL', strict=False), ('text/xul', None)) + eq(self.db.guess_file_type('foo.invalid', strict=False), (None, None)) + eq(self.db.guess_extension('image/jpeg', strict=False), '.jpg') + eq(self.db.guess_extension('image/JPEG', strict=False), '.jpg') + + def test_filename_with_url_delimiters(self): + # bpo-38449: URL delimiters cases should be handled also. + # They would have different mime types if interpreted as URL as + # compared to when interpreted as filename because of the semicolon. + eq = self.assertEqual + gzip_expected = ('application/x-tar', 'gzip') + for name in ( + ';1.tar.gz', + '?1.tar.gz', + '#1.tar.gz', + '#1#.tar.gz', + ';1#.tar.gz', + ';&1=123;?.tar.gz', + '?k1=v1&k2=v2.tar.gz', + ): + for prefix in ('', '/', '\\', + 'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\', + '//share/server/', '\\\\share\\server\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_file_type(path), gzip_expected) + eq(self.db.guess_type(path), gzip_expected) + expected = (None, None) if os.name == 'nt' else gzip_expected + for prefix in ('//', '\\\\', '//share/', '\\\\share\\'): + path = prefix + name + with self.subTest(path=path): + eq(self.db.guess_file_type(path), expected) + eq(self.db.guess_type(path), expected) + eq(self.db.guess_file_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) + eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected) + + eq(self.db.guess_file_type(r'foo/.tar.gz'), (None, 'gzip')) + eq(self.db.guess_type(r'foo/.tar.gz'), (None, 'gzip')) + expected = (None, 'gzip') if os.name == 'nt' else gzip_expected + eq(self.db.guess_file_type(r'foo\.tar.gz'), expected) + eq(self.db.guess_type(r'foo\.tar.gz'), expected) + eq(self.db.guess_type(r'scheme:foo\.tar.gz'), gzip_expected) + + def test_url(self): + result = self.db.guess_type('http://example.com/host.html') + result = self.db.guess_type('http://host.html') + msg = 'URL only has a host name, not a file' + self.assertSequenceEqual(result, (None, None), msg) + result = self.db.guess_type('http://example.com/host.html') + msg = 'Should be text/html' + self.assertSequenceEqual(result, ('text/html', None), msg) + result = self.db.guess_type('http://example.com/host.html#x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + result = self.db.guess_type('http://example.com/host.html?q=x.tar') + self.assertSequenceEqual(result, ('text/html', None)) + + def test_guess_all_types(self): + # First try strict. Use a set here for testing the results because if + # test_urllib2 is run before test_mimetypes, global state is modified + # such that the 'all' set will have more items in it. + all = self.db.guess_all_extensions('text/plain', strict=True) + self.assertTrue(set(all) >= {'.bat', '.c', '.h', '.ksh', '.pl', '.txt'}) + self.assertEqual(len(set(all)), len(all)) # no duplicates + # And now non-strict + all = self.db.guess_all_extensions('image/jpeg', strict=False) + self.assertEqual(all, ['.jpg', '.jpe', '.jpeg']) + # And now for no hits + all = self.db.guess_all_extensions('image/jpg', strict=True) + self.assertEqual(all, []) + # And now for type existing in both strict and non-strict mappings. + self.db.add_type('test-type', '.strict-ext') + self.db.add_type('test-type', '.non-strict-ext', strict=False) + all = self.db.guess_all_extensions('test-type', strict=False) + self.assertEqual(all, ['.strict-ext', '.non-strict-ext']) + all = self.db.guess_all_extensions('test-type') + self.assertEqual(all, ['.strict-ext']) + # Test that changing the result list does not affect the global state + all.append('.no-such-ext') + all = self.db.guess_all_extensions('test-type') + self.assertNotIn('.no-such-ext', all) + + def test_encoding(self): + filename = support.findfile("mime.types") + mimes = mimetypes.MimeTypes([filename]) + exts = mimes.guess_all_extensions('application/vnd.geocube+xml', + strict=True) + self.assertEqual(exts, ['.g3', '.g\xb3']) + def test_path_like_ob(self): filename = "LICENSE.txt" filepath = os_helper.FakePath(filename) @@ -378,25 +444,6 @@ def test_keywords_args_api(self): self.assertEqual(self.db.guess_all_extensions( type='image/jpeg', strict=True), ['.jpg', '.jpe', '.jpeg']) - def test_added_types_are_used(self): - mimetypes.add_type('testing/default-type', '') - mime_type, _ = mimetypes.guess_type('') - self.assertEqual(mime_type, 'testing/default-type') - - mime_type, _ = mimetypes.guess_type('test.myext') - self.assertEqual(mime_type, None) - - mimetypes.add_type('testing/type', '.myext') - mime_type, _ = mimetypes.guess_type('test.myext') - self.assertEqual(mime_type, 'testing/type') - - def test_add_type_with_undotted_extension_not_supported(self): - msg = "Extension 'undotted' must start with '.'" - with self.assertRaisesRegex(ValueError, msg): - mimetypes.add_type("testing/type", "undotted") - with self.assertRaisesRegex(ValueError, msg): - mimetypes.add_type("", "undotted") - @unittest.skipUnless(sys.platform.startswith("win"), "Windows only") class Win32MimeTypesTestCase(unittest.TestCase):