summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Edmunds <medmunds@gmail.com>2024-12-14 16:54:42 -0800
committerSarah Boyce <42296566+sarahboyce@users.noreply.github.com>2025-01-23 10:38:15 +0100
commit29ba75e6e57414f0e6f9528d08a520b8b931fb28 (patch)
tree59757ea3ef4cb0cffa14ee1a499209c83e982110
parent23c960a98e0d054d51dadda7049a54a083ef1155 (diff)
Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode") encoding of international domain names in smart_urlquote() and Urlizer, which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc template filters. Changed to use percent-encoded UTF-8, which defers IDNA details to the browser (like other URLs rendered by Django).
-rw-r--r--AUTHORS1
-rw-r--r--django/utils/html.py16
-rw-r--r--tests/admin_widgets/tests.py7
-rw-r--r--tests/template_tests/filter_tests/test_urlize.py23
-rw-r--r--tests/utils_tests/test_html.py47
5 files changed, 74 insertions, 20 deletions
diff --git a/AUTHORS b/AUTHORS
index c9a26fa6c8..9d8956bda0 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better:
Mihai Preda <mihai_preda@yahoo.com>
Mikaël Barbero <mikael.barbero nospam at nospam free.fr>
Mike Axiak <axiak@mit.edu>
+ Mike Edmunds <medmunds@gmail.com>
Mike Grouchy <https://mikegrouchy.com/>
Mike Malone <mjmalone@gmail.com>
Mike Richardson
diff --git a/django/utils/html.py b/django/utils/html.py
index 5671f39db4..182b7d4cec 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -9,7 +9,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp
from django.core.exceptions import SuspiciousOperation, ValidationError
from django.core.validators import EmailValidator
-from django.utils.encoding import punycode
from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.regex_helper import _lazy_re_compile
@@ -237,17 +236,16 @@ def smart_urlquote(url):
# see also https://bugs.python.org/issue16285
return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~")
- # Handle IDN before quoting.
try:
scheme, netloc, path, query, fragment = urlsplit(url)
except ValueError:
# invalid IPv6 URL (normally square brackets in hostname part).
return unquote_quote(url)
- try:
- netloc = punycode(netloc) # IDN -> ACE
- except UnicodeError: # invalid domain part
- return unquote_quote(url)
+ # Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification
+ # section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent.
+ # See #36013.
+ netloc = unquote_quote(netloc)
if query:
# Separately unquoting key/value, so as to not mix querystring separators
@@ -348,10 +346,8 @@ class Urlizer:
url = smart_urlquote("http://%s" % html.unescape(middle))
elif ":" not in middle and self.is_email_simple(middle):
local, domain = middle.rsplit("@", 1)
- try:
- domain = punycode(domain)
- except UnicodeError:
- return word
+ # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA
+ # to the user agent. See #36013.
local = quote(local, safe="")
domain = quote(domain, safe="")
url = self.mailto_template.format(local=local, domain=domain)
diff --git a/tests/admin_widgets/tests.py b/tests/admin_widgets/tests.py
index fb296c8655..efff4e47d7 100644
--- a/tests/admin_widgets/tests.py
+++ b/tests/admin_widgets/tests.py
@@ -486,11 +486,13 @@ class AdminURLWidgetTest(SimpleTestCase):
w = widgets.AdminURLFieldWidget()
self.assertHTMLEqual(
w.render("test", "http://example-äüö.com"),
- '<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">'
+ '<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">'
"http://example-äüö.com</a><br>"
'Change:<input class="vURLField" name="test" type="url" '
'value="http://example-äüö.com"></p>',
)
+ # Does not use obsolete IDNA-2003 encoding (#36013).
+ self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com"))
def test_render_quoting(self):
"""
@@ -517,7 +519,8 @@ class AdminURLWidgetTest(SimpleTestCase):
output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>")
self.assertEqual(
HREF_RE.search(output)[1],
- "http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E",
+ "http://example-%C3%A4%C3%BC%C3%B6.com/"
+ "%3Csometag%3Esome-text%3C/sometag%3E",
)
self.assertEqual(
TEXT_RE.search(output)[1],
diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py
index 80dd94cd9f..c186acd948 100644
--- a/tests/template_tests/filter_tests/test_urlize.py
+++ b/tests/template_tests/filter_tests/test_urlize.py
@@ -229,19 +229,34 @@ class FunctionTests(SimpleTestCase):
"""
#13704 - Check urlize handles IDN correctly
"""
+ # The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}.
self.assertEqual(
urlize("http://c✶.ws"),
- '<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>',
+ '<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>',
)
self.assertEqual(
urlize("www.c✶.ws"),
- '<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>',
+ '<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>',
)
self.assertEqual(
- urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>'
+ urlize("c✶.org"),
+ '<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>',
)
self.assertEqual(
- urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>'
+ urlize("info@c✶.org"),
+ '<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>',
+ )
+
+ # Pre-encoded IDNA is urlized but not re-encoded.
+ self.assertEqual(
+ urlize("www.xn--iny-zx5a.com/idna2003"),
+ '<a href="http://www.xn--iny-zx5a.com/idna2003"'
+ ' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>',
+ )
+ self.assertEqual(
+ urlize("www.xn--fa-hia.com/idna2008"),
+ '<a href="http://www.xn--fa-hia.com/idna2008"'
+ ' rel="nofollow">www.xn--fa-hia.com/idna2008</a>',
)
def test_malformed(self):
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index b7d89bfe59..4db3816c72 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -264,8 +264,26 @@ class TestUtilsHtml(SimpleTestCase):
def test_smart_urlquote(self):
items = (
- ("http://öäü.com/", "http://xn--4ca9at.com/"),
- ("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
+ # IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
+ ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
+ ("https://faß.example.com", "https://fa%C3%9F.example.com"),
+ (
+ "http://öäü.com/öäü/",
+ "http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
+ ),
+ (
+ # Valid under IDNA 2008, but was invalid in IDNA 2003.
+ "https://މިހާރު.com",
+ "https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
+ ),
+ (
+ # Valid under WHATWG URL Specification but not IDNA 2008.
+ "http://👓.ws",
+ "http://%F0%9F%91%93.ws",
+ ),
+ # Pre-encoded IDNA is left unchanged.
+ ("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
+ ("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
# safe as per RFC.
(
@@ -287,8 +305,10 @@ class TestUtilsHtml(SimpleTestCase):
"django",
),
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
+ ('http://example.com">', "http://example.com%22%3E"),
+ ("http://10.22.1.1/", "http://10.22.1.1/"),
+ ("http://[fd00::1]/", "http://[fd00::1]/"),
)
- # IDNs are properly quoted
for value, output in items:
with self.subTest(value=value, output=output):
self.assertEqual(smart_urlquote(value), output)
@@ -361,11 +381,21 @@ class TestUtilsHtml(SimpleTestCase):
lazystr("Search for google.com/?q=!"),
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
),
+ (
+ "http://www.foo.bar/",
+ '<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
+ ),
+ (
+ "Look on www.نامه‌ای.com.",
+ "Look on <a "
+ 'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
+ ">www.نامه‌ای.com</a>.",
+ ),
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
(
"test@" + "한.글." * 15 + "aaa",
'<a href="mailto:test@'
- + "xn--6q8b.xn--bj0b." * 15
+ + "%ED%95%9C.%EA%B8%80." * 15
+ 'aaa">'
+ "test@"
+ "한.글." * 15
@@ -378,6 +408,15 @@ class TestUtilsHtml(SimpleTestCase):
'<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
">yes+this=is&a%valid!email@example.com</a>",
),
+ (
+ "foo@faß.example.com",
+ '<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
+ ),
+ (
+ "idna-2008@މިހާރު.example.mv",
+ '<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
+ 'ample.mv">idna-2008@މިހާރު.example.mv</a>',
+ ),
)
for value, output in tests:
with self.subTest(value=value):