summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Manfre <mike@manfre.net>2022-06-29 20:39:51 -0400
committerMariusz Felisiak <felisiak.mariusz@gmail.com>2022-07-01 08:48:38 +0200
commit03eec9ff6cc78e7c1bcf88bb76ecd11f0d433c72 (patch)
treec9ad05e93f1ce711e13e72bda616367988f453a5
parent5c93a84f44054034f495267ff2400a5de69a4fc1 (diff)
Updated vendored _urlsplit() to strip newline and tabs.
Refs Python CVE-2022-0391. Django is not affected, but others who incorrectly use internal function url_has_allowed_host_and_scheme() with unsanitized input could be at risk.
-rw-r--r--django/utils/http.py19
-rw-r--r--docs/releases/4.2.txt7
-rw-r--r--tests/utils_tests/test_http.py1
3 files changed, 25 insertions, 2 deletions
diff --git a/django/utils/http.py b/django/utils/http.py
index 51fdc4b149..d2ec2638b0 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -46,6 +46,10 @@ ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y))
RFC3986_GENDELIMS = ":/?#[]@"
RFC3986_SUBDELIMS = "!$&'()*+,;="
+# TODO: Remove when dropping support for PY38.
+# Unsafe bytes to be removed per WHATWG spec.
+_UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]
+
def urlencode(query, doseq=False):
"""
@@ -278,6 +282,7 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
)
+# TODO: Remove when dropping support for PY38.
# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function.
def _urlparse(url, scheme="", allow_fragments=True):
"""Parse a URL into 6 components:
@@ -296,8 +301,15 @@ def _urlparse(url, scheme="", allow_fragments=True):
return _coerce_result(result)
-# Copied from urllib.parse.urlsplit() with
-# https://github.com/python/cpython/pull/661 applied.
+# TODO: Remove when dropping support for PY38.
+def _remove_unsafe_bytes_from_url(url):
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+ url = url.replace(b, "")
+ return url
+
+
+# TODO: Remove when dropping support for PY38.
+# Backport of urllib.parse.urlsplit() from Python 3.9.
def _urlsplit(url, scheme="", allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -305,6 +317,9 @@ def _urlsplit(url, scheme="", allow_fragments=True):
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
url, scheme, _coerce_result = _coerce_args(url, scheme)
+ url = _remove_unsafe_bytes_from_url(url)
+ scheme = _remove_unsafe_bytes_from_url(scheme)
+
netloc = query = fragment = ""
i = url.find(":")
if i > 0:
diff --git a/docs/releases/4.2.txt b/docs/releases/4.2.txt
index fad89ea008..34f8362be9 100644
--- a/docs/releases/4.2.txt
+++ b/docs/releases/4.2.txt
@@ -217,6 +217,13 @@ Utilities
* The new ``encoder`` parameter for :meth:`django.utils.html.json_script`
function allows customizing a JSON encoder class.
+* The private internal vendored copy of ``urllib.parse.urlsplit()`` now strips
+ ``'\r'``, ``'\n'``, and ``'\t'`` (see :cve:`2022-0391` and :bpo:`43882`).
+ This is to protect projects that may be incorrectly using the internal
+ ``url_has_allowed_host_and_scheme()`` function, instead of using one of the
+ documented functions for handling URL redirects. The Django functions were
+ not affected.
+
Validators
~~~~~~~~~~
diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py
index b2754b4ddb..9978c7bb52 100644
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -177,6 +177,7 @@ class URLHasAllowedHostAndSchemeTests(unittest.TestCase):
r"http:/\example.com",
'javascript:alert("XSS")',
"\njavascript:alert(x)",
+ "java\nscript:alert(x)",
"\x08//example.com",
r"http://otherserver\@example.com",
r"http:\\testserver\@example.com",