summaryrefslogtreecommitdiff
path: root/tests/mail
diff options
context:
space:
mode:
authorMike Edmunds <medmunds@gmail.com>2025-06-24 18:24:41 -0700
committerSarah Boyce <42296566+sarahboyce@users.noreply.github.com>2025-07-16 15:33:14 +0200
commit40d6eb2c0104f2119d5260b77402737cebf3f85c (patch)
treeafc5cca8089d7da6336a551f3f2b78e22871bb06 /tests/mail
parent5ca470a52e99b84446c838416fbdac921bfb8146 (diff)
Refs #35581 -- Added workaround for Python bug in mail tests.
See python/cpython#128110.
Diffstat (limited to 'tests/mail')
-rw-r--r--tests/mail/tests.py72
1 files changed, 71 insertions, 1 deletions
diff --git a/tests/mail/tests.py b/tests/mail/tests.py
index 9483f4a6c1..8501fc97e7 100644
--- a/tests/mail/tests.py
+++ b/tests/mail/tests.py
@@ -49,12 +49,76 @@ except ImportError:
HAS_AIOSMTPD = False
+# Check whether python/cpython#128110 has been fixed by seeing if space between
+# encoded-words is ignored (as required by RFC 2047 section 6.2).
+NEEDS_CPYTHON_128110_WORKAROUND = (
+ _message_from_bytes(b"To: =??q?a?= =??q?b?= <to@ex>", policy=policy.default)
+)["To"].addresses[0].display_name != "ab"
+
+RFC2047_PREFIX = "=?" # start of an encoded-word.
+
+
+def _apply_cpython_128110_workaround(message, msg_bytes):
+ """
+ Updates message in place to correct misparsed rfc2047 display-names in
+ address headers caused by https://github.com/python/cpython/issues/128110.
+ """
+ from email.header import decode_header
+ from email.headerregistry import AddressHeader
+ from email.parser import BytesHeaderParser
+ from email.utils import getaddresses
+
+ def rfc2047_decode(s):
+ # Decode using legacy decode_header() (which doesn't have the bug).
+ return "".join(
+ (
+ segment
+ if charset is None and isinstance(segment, str)
+ else segment.decode(charset or "ascii")
+ )
+ for segment, charset in decode_header(s)
+ )
+
+ def build_address(name, address):
+ if "@" in address:
+ return Address(display_name=name, addr_spec=address)
+ return Address(display_name=name, username=address, domain="")
+
+ # This workaround only applies to messages parsed with a modern policy.
+ assert not isinstance(message.policy, policy.Compat32)
+
+ # Reparse with compat32 to get access to raw (undecoded) headers.
+ raw_headers = BytesHeaderParser(policy=policy.compat32).parsebytes(msg_bytes)
+ for header, modern_value in message.items():
+ if not isinstance(modern_value, AddressHeader):
+ # The bug only affects structured address headers.
+ continue
+ raw_value = raw_headers[header]
+ if RFC2047_PREFIX in raw_value:
+ # Headers should not appear more than once.
+ assert len(message.get_all(header)) == 1
+ # Reconstruct Address objects using legacy APIs.
+ unfolded = raw_value.replace("\r\n", "").replace("\n", "")
+ corrected_addresses = (
+ build_address(rfc2047_decode(name), address)
+ for name, address in getaddresses([unfolded])
+ )
+ message.replace_header(header, corrected_addresses)
+
+
def message_from_bytes(s):
"""
email.message_from_bytes() using modern email.policy.default.
Returns a modern email.message.EmailMessage.
"""
- return _message_from_bytes(s, policy=policy.default)
+ # The modern email parser has a bug with adjacent rfc2047 encoded-words.
+ # This doesn't affect django.core.mail (which doesn't parse messages),
+ # but it can confuse our tests that try to verify sent content by reparsing
+ # the generated message. Apply a workaround if needed.
+ message = _message_from_bytes(s, policy=policy.default)
+ if NEEDS_CPYTHON_128110_WORKAROUND and RFC2047_PREFIX.encode() in s:
+ _apply_cpython_128110_workaround(message, s)
+ return message
class MailTestsMixin:
@@ -1447,6 +1511,12 @@ class MailTests(MailTestsMixin, SimpleTestCase):
"To Example",
'"to@other.com"@example.com',
),
+ # Addresses with long non-ASCII display names.
+ (
+ "Tó Example very long" * 4 + " <to@example.com>",
+ "Tó Example very long" * 4,
+ "to@example.com",
+ ),
# Address with long display name and non-ASCII domain.
(
"To Example very long" * 4 + " <to@exampl€.com>",