diff options
| author | Mike Edmunds <medmunds@gmail.com> | 2024-12-14 16:54:42 -0800 |
|---|---|---|
| committer | Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> | 2025-01-23 10:38:15 +0100 |
| commit | 29ba75e6e57414f0e6f9528d08a520b8b931fb28 (patch) | |
| tree | 59757ea3ef4cb0cffa14ee1a499209c83e982110 | |
| parent | 23c960a98e0d054d51dadda7049a54a083ef1155 (diff) | |
Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode")
encoding of international domain names in smart_urlquote() and Urlizer,
which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc
template filters. Changed to use percent-encoded UTF-8, which defers
IDNA details to the browser (like other URLs rendered by Django).
| -rw-r--r-- | AUTHORS | 1 | ||||
| -rw-r--r-- | django/utils/html.py | 16 | ||||
| -rw-r--r-- | tests/admin_widgets/tests.py | 7 | ||||
| -rw-r--r-- | tests/template_tests/filter_tests/test_urlize.py | 23 | ||||
| -rw-r--r-- | tests/utils_tests/test_html.py | 47 |
5 files changed, 74 insertions, 20 deletions
@@ -735,6 +735,7 @@ answer newbie questions, and generally made Django that much better: Mihai Preda <mihai_preda@yahoo.com> Mikaël Barbero <mikael.barbero nospam at nospam free.fr> Mike Axiak <axiak@mit.edu> + Mike Edmunds <medmunds@gmail.com> Mike Grouchy <https://mikegrouchy.com/> Mike Malone <mjmalone@gmail.com> Mike Richardson diff --git a/django/utils/html.py b/django/utils/html.py index 5671f39db4..182b7d4cec 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -9,7 +9,6 @@ from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsp from django.core.exceptions import SuspiciousOperation, ValidationError from django.core.validators import EmailValidator -from django.utils.encoding import punycode from django.utils.functional import Promise, cached_property, keep_lazy, keep_lazy_text from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.regex_helper import _lazy_re_compile @@ -237,17 +236,16 @@ def smart_urlquote(url): # see also https://bugs.python.org/issue16285 return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~") - # Handle IDN before quoting. try: scheme, netloc, path, query, fragment = urlsplit(url) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). return unquote_quote(url) - try: - netloc = punycode(netloc) # IDN -> ACE - except UnicodeError: # invalid domain part - return unquote_quote(url) + # Handle IDN as percent-encoded UTF-8 octets, per WHATWG URL Specification + # section 3.5 and RFC 3986 section 3.2.2. Defer any IDNA to the user agent. + # See #36013. + netloc = unquote_quote(netloc) if query: # Separately unquoting key/value, so as to not mix querystring separators @@ -348,10 +346,8 @@ class Urlizer: url = smart_urlquote("http://%s" % html.unescape(middle)) elif ":" not in middle and self.is_email_simple(middle): local, domain = middle.rsplit("@", 1) - try: - domain = punycode(domain) - except UnicodeError: - return word + # Encode per RFC 6068 Section 2 (items 1, 4, 5). Defer any IDNA + # to the user agent. See #36013. local = quote(local, safe="") domain = quote(domain, safe="") url = self.mailto_template.format(local=local, domain=domain) diff --git a/tests/admin_widgets/tests.py b/tests/admin_widgets/tests.py index fb296c8655..efff4e47d7 100644 --- a/tests/admin_widgets/tests.py +++ b/tests/admin_widgets/tests.py @@ -486,11 +486,13 @@ class AdminURLWidgetTest(SimpleTestCase): w = widgets.AdminURLFieldWidget() self.assertHTMLEqual( w.render("test", "http://example-äüö.com"), - '<p class="url">Currently: <a href="http://xn--example--7za4pnc.com">' + '<p class="url">Currently: <a href="http://example-%C3%A4%C3%BC%C3%B6.com">' "http://example-äüö.com</a><br>" 'Change:<input class="vURLField" name="test" type="url" ' 'value="http://example-äüö.com"></p>', ) + # Does not use obsolete IDNA-2003 encoding (#36013). + self.assertNotIn("fass.example.com", w.render("test", "http://faß.example.com")) def test_render_quoting(self): """ @@ -517,7 +519,8 @@ class AdminURLWidgetTest(SimpleTestCase): output = w.render("test", "http://example-äüö.com/<sometag>some-text</sometag>") self.assertEqual( HREF_RE.search(output)[1], - "http://xn--example--7za4pnc.com/%3Csometag%3Esome-text%3C/sometag%3E", + "http://example-%C3%A4%C3%BC%C3%B6.com/" + "%3Csometag%3Esome-text%3C/sometag%3E", ) self.assertEqual( TEXT_RE.search(output)[1], diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py index 80dd94cd9f..c186acd948 100644 --- a/tests/template_tests/filter_tests/test_urlize.py +++ b/tests/template_tests/filter_tests/test_urlize.py @@ -229,19 +229,34 @@ class FunctionTests(SimpleTestCase): """ #13704 - Check urlize handles IDN correctly """ + # The "✶" below is \N{SIX POINTED BLACK STAR}, not "*" \N{ASTERISK}. self.assertEqual( urlize("http://c✶.ws"), - '<a href="http://xn--c-lgq.ws" rel="nofollow">http://c✶.ws</a>', + '<a href="http://c%E2%9C%B6.ws" rel="nofollow">http://c✶.ws</a>', ) self.assertEqual( urlize("www.c✶.ws"), - '<a href="http://www.xn--c-lgq.ws" rel="nofollow">www.c✶.ws</a>', + '<a href="http://www.c%E2%9C%B6.ws" rel="nofollow">www.c✶.ws</a>', ) self.assertEqual( - urlize("c✶.org"), '<a href="http://xn--c-lgq.org" rel="nofollow">c✶.org</a>' + urlize("c✶.org"), + '<a href="http://c%E2%9C%B6.org" rel="nofollow">c✶.org</a>', ) self.assertEqual( - urlize("info@c✶.org"), '<a href="mailto:info@xn--c-lgq.org">info@c✶.org</a>' + urlize("info@c✶.org"), + '<a href="mailto:info@c%E2%9C%B6.org">info@c✶.org</a>', + ) + + # Pre-encoded IDNA is urlized but not re-encoded. + self.assertEqual( + urlize("www.xn--iny-zx5a.com/idna2003"), + '<a href="http://www.xn--iny-zx5a.com/idna2003"' + ' rel="nofollow">www.xn--iny-zx5a.com/idna2003</a>', + ) + self.assertEqual( + urlize("www.xn--fa-hia.com/idna2008"), + '<a href="http://www.xn--fa-hia.com/idna2008"' + ' rel="nofollow">www.xn--fa-hia.com/idna2008</a>', ) def test_malformed(self): diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index b7d89bfe59..4db3816c72 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -264,8 +264,26 @@ class TestUtilsHtml(SimpleTestCase): def test_smart_urlquote(self): items = ( - ("http://öäü.com/", "http://xn--4ca9at.com/"), - ("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"), + # IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013). + ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"), + ("https://faß.example.com", "https://fa%C3%9F.example.com"), + ( + "http://öäü.com/öäü/", + "http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/", + ), + ( + # Valid under IDNA 2008, but was invalid in IDNA 2003. + "https://މިހާރު.com", + "https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com", + ), + ( + # Valid under WHATWG URL Specification but not IDNA 2008. + "http://👓.ws", + "http://%F0%9F%91%93.ws", + ), + # Pre-encoded IDNA is left unchanged. + ("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"), + ("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"), # Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered # safe as per RFC. ( @@ -287,8 +305,10 @@ class TestUtilsHtml(SimpleTestCase): "django", ), ("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"), + ('http://example.com">', "http://example.com%22%3E"), + ("http://10.22.1.1/", "http://10.22.1.1/"), + ("http://[fd00::1]/", "http://[fd00::1]/"), ) - # IDNs are properly quoted for value, output in items: with self.subTest(value=value, output=output): self.assertEqual(smart_urlquote(value), output) @@ -361,11 +381,21 @@ class TestUtilsHtml(SimpleTestCase): lazystr("Search for google.com/?q=!"), 'Search for <a href="http://google.com/?q=">google.com/?q=</a>!', ), + ( + "http://www.foo.bar/", + '<a href="http://www.foo.bar/">http://www.foo.bar/</a>', + ), + ( + "Look on www.نامهای.com.", + "Look on <a " + 'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"' + ">www.نامهای.com</a>.", + ), ("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'), ( "test@" + "한.글." * 15 + "aaa", '<a href="mailto:test@' - + "xn--6q8b.xn--bj0b." * 15 + + "%ED%95%9C.%EA%B8%80." * 15 + 'aaa">' + "test@" + "한.글." * 15 @@ -378,6 +408,15 @@ class TestUtilsHtml(SimpleTestCase): '<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"' ">yes+this=is&a%valid!email@example.com</a>", ), + ( + "foo@faß.example.com", + '<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>', + ), + ( + "idna-2008@މިހާރު.example.mv", + '<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex' + 'ample.mv">idna-2008@މިހާރު.example.mv</a>', + ), ) for value, output in tests: with self.subTest(value=value): |
