summaryrefslogtreecommitdiff
path: root/django/utils
diff options
context:
space:
mode:
authorMike Edmunds <medmunds@gmail.com>2024-12-14 16:54:42 -0800
committerSarah Boyce <42296566+sarahboyce@users.noreply.github.com>2025-01-23 10:38:15 +0100
commit29ba75e6e57414f0e6f9528d08a520b8b931fb28 (patch)
tree59757ea3ef4cb0cffa14ee1a499209c83e982110 /django/utils
parent23c960a98e0d054d51dadda7049a54a083ef1155 (diff)
Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode") encoding of international domain names in smart_urlquote() and Urlizer, which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc template filters. Changed to use percent-encoded UTF-8, which defers IDNA details to the browser (like other URLs rendered by Django).
Diffstat (limited to 'django/utils')
-rw-r--r--django/utils/html.py16
1 files changed, 6 insertions, 10 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 5671f39db4..182b7d4cec 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -9,7 +9,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
from django.core.exceptions import SuspiciousOperation, ValidationError
from django.core.validators import EmailValidator
-from django.utils.encoding import punycode
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.regex_helper import _lazy_re_compile
@@ -237,17 +236,16 @@ def smart_urlquote(url):
# see also https://bugs.python.org/issue16285
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
- # Handle IDN before quoting.
try:
scheme, netloc, path, query, fragment = urlsplit(url)
except ValueError:
# invalid IPv6 URL (normally square brackets in hostname part).
return unquote_quote(url)
- try:
- netloc = punycode(netloc) # IDN -> ACE
- except UnicodeError: # invalid domain part
- return unquote_quote(url)
+ # Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
+ # section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
+ # See #36013.
+ netloc = unquote_quote(netloc)
if query:
# Separately unquoting key/value, so as to not mix querystring separators
@@ -348,10 +346,8 @@ class Urlizer:
url = smart_urlquote("http://%s" % html.unescape(middle))
elif ":" not in middle and self.is_email_simple(middle):
local, domain = middle.rsplit("@", 1)
- try:
- domain = punycode(domain)
- except UnicodeError:
- return word
+ # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
+ # to the user agent. See #36013.
local = quote(local, safe="")
domain = quote(domain, safe="")
url = self.mailto_template.format(local=local, domain=domain)