summaryrefslogtreecommitdiff
path: root/django
diff options
context:
space:
mode:
authorJake Howard <RealOrangeOne@users.noreply.github.com>2024-05-29 14:48:27 +0100
committerGitHub <noreply@github.com>2024-05-29 10:48:27 -0300
commitff308a06047cd60806d604a7cf612e5656ee2ac9 (patch)
treef2139fbf020cbdf33bad64a3377700623c18a44f /django
parent02dab94c7b8585c7ae3854465574d768e1df75d3 (diff)
Fixed 35467 -- Replaced urlparse with urlsplit where appropriate.
This work should not generate any change of functionality, and `urlsplit` is approximately 6x faster. Most use cases of `urlparse` didn't touch the path, so they can be converted to `urlsplit` without any issue. Most of those which do use `.path`, simply parse the URL, mutate the querystring, then put them back together, which is also fine (so long as urlunsplit is used).
Diffstat (limited to 'django')
-rw-r--r--django/contrib/admin/options.py4
-rw-r--r--django/contrib/admin/templatetags/admin_urls.py10
-rw-r--r--django/contrib/auth/decorators.py6
-rw-r--r--django/contrib/auth/middleware.py6
-rw-r--r--django/contrib/auth/mixins.py6
-rw-r--r--django/contrib/auth/views.py10
-rw-r--r--django/contrib/staticfiles/handlers.py4
-rw-r--r--django/forms/fields.py4
-rw-r--r--django/http/response.py4
-rw-r--r--django/middleware/common.py4
-rw-r--r--django/middleware/csrf.py10
-rw-r--r--django/test/client.py17
-rw-r--r--django/test/testcases.py12
-rw-r--r--django/utils/http.py6
14 files changed, 48 insertions, 55 deletions
diff --git a/django/contrib/admin/options.py b/django/contrib/admin/options.py
index 12467de74d..9cc891d807 100644
--- a/django/contrib/admin/options.py
+++ b/django/contrib/admin/options.py
@@ -6,7 +6,7 @@ import warnings
from functools import partial, update_wrapper
from urllib.parse import parse_qsl
from urllib.parse import quote as urlquote
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django import forms
from django.conf import settings
@@ -1384,7 +1384,7 @@ class ModelAdmin(BaseModelAdmin):
)
def _get_preserved_qsl(self, request, preserved_filters):
- query_string = urlparse(request.build_absolute_uri()).query
+ query_string = urlsplit(request.build_absolute_uri()).query
return parse_qsl(query_string.replace(preserved_filters, ""))
def response_add(self, request, obj, post_url_continue=None):
diff --git a/django/contrib/admin/templatetags/admin_urls.py b/django/contrib/admin/templatetags/admin_urls.py
index 871b0d5f20..176e7a49ed 100644
--- a/django/contrib/admin/templatetags/admin_urls.py
+++ b/django/contrib/admin/templatetags/admin_urls.py
@@ -1,4 +1,4 @@
-from urllib.parse import parse_qsl, unquote, urlparse, urlunparse
+from urllib.parse import parse_qsl, unquote, urlsplit, urlunsplit
from django import template
from django.contrib.admin.utils import quote
@@ -24,8 +24,8 @@ def add_preserved_filters(context, url, popup=False, to_field=None):
preserved_filters = context.get("preserved_filters")
preserved_qsl = context.get("preserved_qsl")
- parsed_url = list(urlparse(url))
- parsed_qs = dict(parse_qsl(parsed_url[4]))
+ parsed_url = list(urlsplit(url))
+ parsed_qs = dict(parse_qsl(parsed_url[3]))
merged_qs = {}
if preserved_qsl:
@@ -66,5 +66,5 @@ def add_preserved_filters(context, url, popup=False, to_field=None):
merged_qs.update(parsed_qs)
- parsed_url[4] = urlencode(merged_qs)
- return urlunparse(parsed_url)
+ parsed_url[3] = urlencode(merged_qs)
+ return urlunsplit(parsed_url)
diff --git a/django/contrib/auth/decorators.py b/django/contrib/auth/decorators.py
index ea1cef0795..78e76a9ae9 100644
--- a/django/contrib/auth/decorators.py
+++ b/django/contrib/auth/decorators.py
@@ -1,6 +1,6 @@
import asyncio
from functools import wraps
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from asgiref.sync import async_to_sync, sync_to_async
@@ -25,8 +25,8 @@ def user_passes_test(
resolved_login_url = resolve_url(login_url or settings.LOGIN_URL)
# If the login url is the same scheme and net location then just
# use the path as the "next" url.
- login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
- current_scheme, current_netloc = urlparse(path)[:2]
+ login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
+ current_scheme, current_netloc = urlsplit(path)[:2]
if (not login_scheme or login_scheme == current_scheme) and (
not login_netloc or login_netloc == current_netloc
):
diff --git a/django/contrib/auth/middleware.py b/django/contrib/auth/middleware.py
index 761929d67d..cb409ee778 100644
--- a/django/contrib/auth/middleware.py
+++ b/django/contrib/auth/middleware.py
@@ -1,5 +1,5 @@
from functools import partial
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django.conf import settings
from django.contrib import auth
@@ -74,8 +74,8 @@ class LoginRequiredMiddleware(MiddlewareMixin):
resolved_login_url = resolve_url(self.get_login_url(view_func))
# If the login url is the same scheme and net location then use the
# path as the "next" url.
- login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
- current_scheme, current_netloc = urlparse(path)[:2]
+ login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
+ current_scheme, current_netloc = urlsplit(path)[:2]
if (not login_scheme or login_scheme == current_scheme) and (
not login_netloc or login_netloc == current_netloc
):
diff --git a/django/contrib/auth/mixins.py b/django/contrib/auth/mixins.py
index 0e46000d97..1f2e95ff00 100644
--- a/django/contrib/auth/mixins.py
+++ b/django/contrib/auth/mixins.py
@@ -1,4 +1,4 @@
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django.conf import settings
from django.contrib.auth import REDIRECT_FIELD_NAME
@@ -51,8 +51,8 @@ class AccessMixin:
resolved_login_url = resolve_url(self.get_login_url())
# If the login url is the same scheme and net location then use the
# path as the "next" url.
- login_scheme, login_netloc = urlparse(resolved_login_url)[:2]
- current_scheme, current_netloc = urlparse(path)[:2]
+ login_scheme, login_netloc = urlsplit(resolved_login_url)[:2]
+ current_scheme, current_netloc = urlsplit(path)[:2]
if (not login_scheme or login_scheme == current_scheme) and (
not login_netloc or login_netloc == current_netloc
):
diff --git a/django/contrib/auth/views.py b/django/contrib/auth/views.py
index 9a6d18bcd2..a18cfdb347 100644
--- a/django/contrib/auth/views.py
+++ b/django/contrib/auth/views.py
@@ -1,4 +1,4 @@
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlsplit, urlunsplit
from django.conf import settings
@@ -183,13 +183,13 @@ def redirect_to_login(next, login_url=None, redirect_field_name=REDIRECT_FIELD_N
"""
resolved_url = resolve_url(login_url or settings.LOGIN_URL)
- login_url_parts = list(urlparse(resolved_url))
+ login_url_parts = list(urlsplit(resolved_url))
if redirect_field_name:
- querystring = QueryDict(login_url_parts[4], mutable=True)
+ querystring = QueryDict(login_url_parts[3], mutable=True)
querystring[redirect_field_name] = next
- login_url_parts[4] = querystring.urlencode(safe="/")
+ login_url_parts[3] = querystring.urlencode(safe="/")
- return HttpResponseRedirect(urlunparse(login_url_parts))
+ return HttpResponseRedirect(urlunsplit(login_url_parts))
# Class-based password reset views
diff --git a/django/contrib/staticfiles/handlers.py b/django/contrib/staticfiles/handlers.py
index 7394eff818..686718a355 100644
--- a/django/contrib/staticfiles/handlers.py
+++ b/django/contrib/staticfiles/handlers.py
@@ -36,13 +36,13 @@ class StaticFilesHandlerMixin:
* the host is provided as part of the base_url
* the request's path isn't under the media path (or equal)
"""
- return path.startswith(self.base_url[2]) and not self.base_url[1]
+ return path.startswith(self.base_url.path) and not self.base_url.netloc
def file_path(self, url):
"""
Return the relative path to the media file on disk for the given URL.
"""
- relative_url = url.removeprefix(self.base_url[2])
+ relative_url = url.removeprefix(self.base_url.path)
return url2pathname(relative_url)
def serve(self, request):
diff --git a/django/forms/fields.py b/django/forms/fields.py
index 4ec7b7aee7..1a58a60743 100644
--- a/django/forms/fields.py
+++ b/django/forms/fields.py
@@ -792,13 +792,13 @@ class URLField(CharField):
def to_python(self, value):
def split_url(url):
"""
- Return a list of url parts via urlparse.urlsplit(), or raise
+ Return a list of url parts via urlsplit(), or raise
ValidationError for some malformed URLs.
"""
try:
return list(urlsplit(url))
except ValueError:
- # urlparse.urlsplit can raise a ValueError with some
+ # urlsplit can raise a ValueError with some
# misformatted URLs.
raise ValidationError(self.error_messages["invalid"], code="invalid")
diff --git a/django/http/response.py b/django/http/response.py
index eecd972cd6..0d756403db 100644
--- a/django/http/response.py
+++ b/django/http/response.py
@@ -9,7 +9,7 @@ import time
import warnings
from email.header import Header
from http.client import responses
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from asgiref.sync import async_to_sync, sync_to_async
@@ -616,7 +616,7 @@ class HttpResponseRedirectBase(HttpResponse):
def __init__(self, redirect_to, *args, **kwargs):
super().__init__(*args, **kwargs)
self["Location"] = iri_to_uri(redirect_to)
- parsed = urlparse(str(redirect_to))
+ parsed = urlsplit(str(redirect_to))
if parsed.scheme and parsed.scheme not in self.allowed_schemes:
raise DisallowedRedirect(
"Unsafe redirect to URL with protocol '%s'" % parsed.scheme
diff --git a/django/middleware/common.py b/django/middleware/common.py
index 9f71b9d278..bf22d00f01 100644
--- a/django/middleware/common.py
+++ b/django/middleware/common.py
@@ -1,5 +1,5 @@
import re
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django.conf import settings
from django.core.exceptions import PermissionDenied
@@ -171,7 +171,7 @@ class BrokenLinkEmailsMiddleware(MiddlewareMixin):
# The referer is equal to the current URL, ignoring the scheme (assumed
# to be a poorly implemented bot).
- parsed_referer = urlparse(referer)
+ parsed_referer = urlsplit(referer)
if parsed_referer.netloc in ["", domain] and parsed_referer.path == uri:
return True
diff --git a/django/middleware/csrf.py b/django/middleware/csrf.py
index f7943494ba..5ae1aae5c6 100644
--- a/django/middleware/csrf.py
+++ b/django/middleware/csrf.py
@@ -8,7 +8,7 @@ against request forgeries from other sites.
import logging
import string
from collections import defaultdict
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django.conf import settings
from django.core.exceptions import DisallowedHost, ImproperlyConfigured
@@ -174,7 +174,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
@cached_property
def csrf_trusted_origins_hosts(self):
return [
- urlparse(origin).netloc.lstrip("*")
+ urlsplit(origin).netloc.lstrip("*")
for origin in settings.CSRF_TRUSTED_ORIGINS
]
@@ -190,7 +190,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
"""
allowed_origin_subdomains = defaultdict(list)
for parsed in (
- urlparse(origin)
+ urlsplit(origin)
for origin in settings.CSRF_TRUSTED_ORIGINS
if "*" in origin
):
@@ -284,7 +284,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
if request_origin in self.allowed_origins_exact:
return True
try:
- parsed_origin = urlparse(request_origin)
+ parsed_origin = urlsplit(request_origin)
except ValueError:
return False
request_scheme = parsed_origin.scheme
@@ -300,7 +300,7 @@ class CsrfViewMiddleware(MiddlewareMixin):
raise RejectRequest(REASON_NO_REFERER)
try:
- referer = urlparse(referer)
+ referer = urlsplit(referer)
except ValueError:
raise RejectRequest(REASON_MALFORMED_REFERER)
diff --git a/django/test/client.py b/django/test/client.py
index aa42c1f60a..a755aae05c 100644
--- a/django/test/client.py
+++ b/django/test/client.py
@@ -8,7 +8,7 @@ from functools import partial
from http import HTTPStatus
from importlib import import_module
from io import BytesIO, IOBase
-from urllib.parse import unquote_to_bytes, urljoin, urlparse, urlsplit
+from urllib.parse import unquote_to_bytes, urljoin, urlsplit
from asgiref.sync import sync_to_async
@@ -458,11 +458,7 @@ class RequestFactory:
return json.dumps(data, cls=self.json_encoder) if should_encode else data
def _get_path(self, parsed):
- path = parsed.path
- # If there are parameters, add them
- if parsed.params:
- path += ";" + parsed.params
- path = unquote_to_bytes(path)
+ path = unquote_to_bytes(parsed.path)
# Replace the behavior where non-ASCII values in the WSGI environ are
# arbitrarily decoded with ISO-8859-1.
# Refs comment in `get_bytes_from_wsgi()`.
@@ -647,7 +643,7 @@ class RequestFactory:
**extra,
):
"""Construct an arbitrary HTTP request."""
- parsed = urlparse(str(path)) # path can be lazy
+ parsed = urlsplit(str(path)) # path can be lazy
data = force_bytes(data, settings.DEFAULT_CHARSET)
r = {
"PATH_INFO": self._get_path(parsed),
@@ -671,8 +667,7 @@ class RequestFactory:
# If QUERY_STRING is absent or empty, we want to extract it from the URL.
if not r.get("QUERY_STRING"):
# WSGI requires latin-1 encoded strings. See get_path_info().
- query_string = parsed[4].encode().decode("iso-8859-1")
- r["QUERY_STRING"] = query_string
+ r["QUERY_STRING"] = parsed.query.encode().decode("iso-8859-1")
return self.request(**r)
@@ -748,7 +743,7 @@ class AsyncRequestFactory(RequestFactory):
**extra,
):
"""Construct an arbitrary HTTP request."""
- parsed = urlparse(str(path)) # path can be lazy.
+ parsed = urlsplit(str(path)) # path can be lazy.
data = force_bytes(data, settings.DEFAULT_CHARSET)
s = {
"method": method,
@@ -772,7 +767,7 @@ class AsyncRequestFactory(RequestFactory):
else:
# If QUERY_STRING is absent or empty, we want to extract it from
# the URL.
- s["query_string"] = parsed[4]
+ s["query_string"] = parsed.query
if headers:
extra.update(HttpHeaders.to_asgi_names(headers))
s["headers"] += [
diff --git a/django/test/testcases.py b/django/test/testcases.py
index 0a802c887b..f1c6b5ae9c 100644
--- a/django/test/testcases.py
+++ b/django/test/testcases.py
@@ -21,7 +21,7 @@ from urllib.parse import (
urljoin,
urlparse,
urlsplit,
- urlunparse,
+ urlunsplit,
)
from urllib.request import url2pathname
@@ -541,11 +541,9 @@ class SimpleTestCase(unittest.TestCase):
def normalize(url):
"""Sort the URL's query string parameters."""
url = str(url) # Coerce reverse_lazy() URLs.
- scheme, netloc, path, params, query, fragment = urlparse(url)
+ scheme, netloc, path, query, fragment = urlsplit(url)
query_parts = sorted(parse_qsl(query))
- return urlunparse(
- (scheme, netloc, path, params, urlencode(query_parts), fragment)
- )
+ return urlunsplit((scheme, netloc, path, urlencode(query_parts), fragment))
if msg_prefix:
msg_prefix += ": "
@@ -1637,11 +1635,11 @@ class FSFilesHandler(WSGIHandler):
* the host is provided as part of the base_url
* the request's path isn't under the media path (or equal)
"""
- return path.startswith(self.base_url[2]) and not self.base_url[1]
+ return path.startswith(self.base_url.path) and not self.base_url.netloc
def file_path(self, url):
"""Return the relative path to the file on disk for the given URL."""
- relative_url = url.removeprefix(self.base_url[2])
+ relative_url = url.removeprefix(self.base_url.path)
return url2pathname(relative_url)
def get_response(self, request):
diff --git a/django/utils/http.py b/django/utils/http.py
index 78dfee7fee..bf783562dd 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -6,7 +6,7 @@ from datetime import datetime, timezone
from email.utils import formatdate
from urllib.parse import quote, unquote
from urllib.parse import urlencode as original_urlencode
-from urllib.parse import urlparse
+from urllib.parse import urlsplit
from django.utils.datastructures import MultiValueDict
from django.utils.regex_helper import _lazy_re_compile
@@ -271,11 +271,11 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False):
# Chrome considers any URL with more than two slashes to be absolute, but
- # urlparse is not so flexible. Treat any url with three slashes as unsafe.
+ # urlsplit is not so flexible. Treat any url with three slashes as unsafe.
if url.startswith("///"):
return False
try:
- url_info = urlparse(url)
+ url_info = urlsplit(url)
except ValueError: # e.g. invalid IPv6 addresses
return False
# Forbid URLs like http:///example.com - with a scheme, but without a hostname.