From fffa4e07480b1275538a96787ded69fd9a380833 Mon Sep 17 00:00:00 2001 From: Carl Bordum Hansen Date: Tue, 10 May 2022 21:46:53 +0200 Subject: [PATCH 1/4] gh-91400: make sure email parsing dont unquote realnames with spaces --- Lib/email/utils.py | 2 +- Lib/test/test_email/test_email.py | 10 +++++++++- .../2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst diff --git a/Lib/email/utils.py b/Lib/email/utils.py index cfdfeb3f1a86e4..1d6ee1572d8d6c 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -45,7 +45,7 @@ CRLF = '\r\n' TICK = "'" -specialsre = re.compile(r'[][\\()<>@,:;".]') +specialsre = re.compile(r'[][\\()<>@,:;". ]') escapesre = re.compile(r'[\\"]') def _has_surrogates(s): diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 69f883a3673f26..a417385b645790 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3143,7 +3143,7 @@ def test_parseaddr_multiple_domains(self): def test_noquote_dump(self): self.assertEqual( utils.formataddr(('A Silly Person', 'person@dom.ain')), - 'A Silly Person ') + '"A Silly Person" ') def test_escape_dump(self): self.assertEqual( @@ -3164,6 +3164,14 @@ def test_escape_backslashes(self): b = 'person@dom.ain' self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) + def test_parseaddr_formataddr_inverse(self): + # gh-91400 + identity = '"foo bar" ' + single = utils.formataddr(utils.parseaddr(identity)) + double = utils.formataddr(utils.parseaddr(single)) + self.assertEqual(identity, single) + self.assertEqual(single, double) + def test_quotes_unicode_names(self): # issue 1690608. email.utils.formataddr() should be rfc2047 aware. name = "H\u00e4ns W\u00fcrst" diff --git a/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst b/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst new file mode 100644 index 00000000000000..ca3f225cf172cc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst @@ -0,0 +1 @@ +Make email.utils.formataddr and email.utils.parseaddr inverse of each other From 7aa20b8c29d32a64f16415dcd3b40913e795f7cc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 16 Mar 2023 17:10:09 -0700 Subject: [PATCH 2/4] Restrict NEWS wording to the exact behavior change. This way it describes the observable behavior change. Rather than the more broad wording that'd make a guarantee we don't actually test exhaustively for. --- .../next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst b/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst index ca3f225cf172cc..e74020fdb19c80 100644 --- a/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst +++ b/Misc/NEWS.d/next/Library/2022-05-10-21-46-28.gh-issue-91400.usUzm_.rst @@ -1 +1,2 @@ -Make email.utils.formataddr and email.utils.parseaddr inverse of each other +Make :func:`email.utils.formataddr` "quote" the name portion of an email address if +it contains spaces. From 5723107ff9d9504f5e50484b867f1bfb221aaf0b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 16 Mar 2023 17:11:32 -0700 Subject: [PATCH 3/4] simplify the test running it a second time when a round tripped identical value has already been guaranteed doesn't do anything. Though it was meaningful during the original bug report, in correct code it is not. --- Lib/test/test_email/test_email.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index a417385b645790..b965854ad4a1b7 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3168,9 +3168,7 @@ def test_parseaddr_formataddr_inverse(self): # gh-91400 identity = '"foo bar" ' single = utils.formataddr(utils.parseaddr(identity)) - double = utils.formataddr(utils.parseaddr(single)) self.assertEqual(identity, single) - self.assertEqual(single, double) def test_quotes_unicode_names(self): # issue 1690608. email.utils.formataddr() should be rfc2047 aware. From 027aa0ac025fe4d370aa689f5d34cc0e55ee417a Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 16 Mar 2023 17:14:52 -0700 Subject: [PATCH 4/4] Add a comment describing the purpose of the regexes. --- Lib/email/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 1d6ee1572d8d6c..21488a318c283b 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -45,6 +45,8 @@ CRLF = '\r\n' TICK = "'" +# These are used by formataddr() to understand what characters require a name +# field to be quoted and what characters within that must be \escaped. specialsre = re.compile(r'[][\\()<>@,:;". ]') escapesre = re.compile(r'[\\"]')