summaryrefslogtreecommitdiff
path: root/tests/utils_tests/test_html.py
diff options
context:
space:
mode:
authorMike Edmunds <medmunds@gmail.com>2024-12-15 01:54:42 +0100
committerSarah Boyce <42296566+sarahboyce@users.noreply.github.com>2025-01-23 10:40:58 +0100
commit698d05c11c27d4ed5fd75194ac0edcf133bd7600 (patch)
tree32887587eba7b1c86f15af0bf50f64d3d39d4683 /tests/utils_tests/test_html.py
parentd6c2b6788405d0370b29a7ee9aa81ee8ead6a25b (diff)
[5.2.x] Fixed #36013 -- Removed use of IDNA-2003 in django.utils.html.
Removed obsolete and potentially problematic IDNA 2003 ("punycode") encoding of international domain names in smart_urlquote() and Urlizer, which are used (only) by AdminURLFieldWidget and the urlize/urlizetrunc template filters. Changed to use percent-encoded UTF-8, which defers IDNA details to the browser (like other URLs rendered by Django). Backport of 29ba75e6e57414f0e6f9528d08a520b8b931fb28 from main.
Diffstat (limited to 'tests/utils_tests/test_html.py')
-rw-r--r--tests/utils_tests/test_html.py47
1 files changed, 43 insertions, 4 deletions
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 341e211c96..6d259d76d7 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -269,8 +269,26 @@ class TestUtilsHtml(SimpleTestCase):
def test_smart_urlquote(self):
items = (
- ("http://öäü.com/", "http://xn--4ca9at.com/"),
- ("http://öäü.com/öäü/", "http://xn--4ca9at.com/%C3%B6%C3%A4%C3%BC/"),
+ # IDN is encoded as percent-encoded ("quoted") UTF-8 (#36013).
+ ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
+ ("https://faß.example.com", "https://fa%C3%9F.example.com"),
+ (
+ "http://öäü.com/öäü/",
+ "http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
+ ),
+ (
+ # Valid under IDNA 2008, but was invalid in IDNA 2003.
+ "https://މިހާރު.com",
+ "https://%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.com",
+ ),
+ (
+ # Valid under WHATWG URL Specification but not IDNA 2008.
+ "http://👓.ws",
+ "http://%F0%9F%91%93.ws",
+ ),
+ # Pre-encoded IDNA is left unchanged.
+ ("http://xn--iny-zx5a.com/idna2003", "http://xn--iny-zx5a.com/idna2003"),
+ ("http://xn--fa-hia.com/idna2008", "http://xn--fa-hia.com/idna2008"),
# Everything unsafe is quoted, !*'();:@&=+$,/?#[]~ is considered
# safe as per RFC.
(
@@ -292,8 +310,10 @@ class TestUtilsHtml(SimpleTestCase):
"django",
),
("http://.www.f oo.bar/", "http://.www.f%20oo.bar/"),
+ ('http://example.com">', "http://example.com%22%3E"),
+ ("http://10.22.1.1/", "http://10.22.1.1/"),
+ ("http://[fd00::1]/", "http://[fd00::1]/"),
)
- # IDNs are properly quoted
for value, output in items:
with self.subTest(value=value, output=output):
self.assertEqual(smart_urlquote(value), output)
@@ -366,11 +386,21 @@ class TestUtilsHtml(SimpleTestCase):
lazystr("Search for google.com/?q=!"),
'Search for <a href="http://google.com/?q=">google.com/?q=</a>!',
),
+ (
+ "http://www.foo.bar/",
+ '<a href="http://www.foo.bar/">http://www.foo.bar/</a>',
+ ),
+ (
+ "Look on www.نامه‌ای.com.",
+ "Look on <a "
+ 'href="http://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com"'
+ ">www.نامه‌ای.com</a>.",
+ ),
("foo@example.com", '<a href="mailto:foo@example.com">foo@example.com</a>'),
(
"test@" + "한.글." * 15 + "aaa",
'<a href="mailto:test@'
- + "xn--6q8b.xn--bj0b." * 15
+ + "%ED%95%9C.%EA%B8%80." * 15
+ 'aaa">'
+ "test@"
+ "한.글." * 15
@@ -383,6 +413,15 @@ class TestUtilsHtml(SimpleTestCase):
'<a href="mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com"'
">yes+this=is&a%valid!email@example.com</a>",
),
+ (
+ "foo@faß.example.com",
+ '<a href="mailto:foo@fa%C3%9F.example.com">foo@faß.example.com</a>',
+ ),
+ (
+ "idna-2008@މިހާރު.example.mv",
+ '<a href="mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.ex'
+ 'ample.mv">idna-2008@މިހާރު.example.mv</a>',
+ ),
)
for value, output in tests:
with self.subTest(value=value):