Fixed 35467 -- Replaced urlparse with urlsplit where appropriate.

This work should not generate any change of functionality, and `urlsplit` is approximately 6x faster. Most use cases of `urlparse` didn't touch the path, so they can be converted to `urlsplit` without any issue. Most of those which do use `.path`, simply parse the URL, mutate the querystring, then put them back together, which is also fine (so long as urlunsplit is used).
author: Jake Howard <RealOrangeOne@users.noreply.github.com> 2024-05-29 14:48:27 +0100
committer: GitHub <noreply@github.com> 2024-05-29 10:48:27 -0300
commit: ff308a06047cd60806d604a7cf612e5656ee2ac9 (patch)
tree: f2139fbf020cbdf33bad64a3377700623c18a44f /django/test
parent: 02dab94c7b8585c7ae3854465574d768e1df75d3 (diff)
2 files changed, 11 insertions, 18 deletions
diff --git a/django/test/client.py b/django/test/client.py
index aa42c1f60a..a755aae05c 100644
--- a/django/test/client.py
+++ b/django/test/client.py
@@ -8,7 +8,7 @@ from functools import partial
 from http import HTTPStatus
 from importlib import import_module
 from io import BytesIO, IOBase
-from urllib.parse import unquote_to_bytes, urljoin, urlparse, urlsplit
+from urllib.parse import unquote_to_bytes, urljoin, urlsplit
 
 from asgiref.sync import sync_to_async
 
@@ -458,11 +458,7 @@ class RequestFactory:
         return json.dumps(data, cls=self.json_encoder) if should_encode else data
 
     def _get_path(self, parsed):
-        path = parsed.path
-        # If there are parameters, add them
-        if parsed.params:
-            path += ";" + parsed.params
-        path = unquote_to_bytes(path)
+        path = unquote_to_bytes(parsed.path)
         # Replace the behavior where non-ASCII values in the WSGI environ are
         # arbitrarily decoded with ISO-8859-1.
         # Refs comment in `get_bytes_from_wsgi()`.
@@ -647,7 +643,7 @@ class RequestFactory:
         **extra,
     ):
         """Construct an arbitrary HTTP request."""
-        parsed = urlparse(str(path))  # path can be lazy
+        parsed = urlsplit(str(path))  # path can be lazy
         data = force_bytes(data, settings.DEFAULT_CHARSET)
         r = {
             "PATH_INFO": self._get_path(parsed),
@@ -671,8 +667,7 @@ class RequestFactory:
         # If QUERY_STRING is absent or empty, we want to extract it from the URL.
         if not r.get("QUERY_STRING"):
             # WSGI requires latin-1 encoded strings. See get_path_info().
-            query_string = parsed[4].encode().decode("iso-8859-1")
-            r["QUERY_STRING"] = query_string
+            r["QUERY_STRING"] = parsed.query.encode().decode("iso-8859-1")
         return self.request(**r)
 
 
@@ -748,7 +743,7 @@ class AsyncRequestFactory(RequestFactory):
         **extra,
     ):
         """Construct an arbitrary HTTP request."""
-        parsed = urlparse(str(path))  # path can be lazy.
+        parsed = urlsplit(str(path))  # path can be lazy.
         data = force_bytes(data, settings.DEFAULT_CHARSET)
         s = {
             "method": method,
@@ -772,7 +767,7 @@ class AsyncRequestFactory(RequestFactory):
         else:
             # If QUERY_STRING is absent or empty, we want to extract it from
             # the URL.
-            s["query_string"] = parsed[4]
+            s["query_string"] = parsed.query
         if headers:
             extra.update(HttpHeaders.to_asgi_names(headers))
         s["headers"] += [
diff --git a/django/test/testcases.py b/django/test/testcases.py
index 0a802c887b..f1c6b5ae9c 100644
--- a/django/test/testcases.py
+++ b/django/test/testcases.py
@@ -21,7 +21,7 @@ from urllib.parse import (
     urljoin,
     urlparse,
     urlsplit,
-    urlunparse,
+    urlunsplit,
 )
 from urllib.request import url2pathname
 
@@ -541,11 +541,9 @@ class SimpleTestCase(unittest.TestCase):
         def normalize(url):
             """Sort the URL's query string parameters."""
             url = str(url)  # Coerce reverse_lazy() URLs.
-            scheme, netloc, path, params, query, fragment = urlparse(url)
+            scheme, netloc, path, query, fragment = urlsplit(url)
             query_parts = sorted(parse_qsl(query))
-            return urlunparse(
-                (scheme, netloc, path, params, urlencode(query_parts), fragment)
-            )
+            return urlunsplit((scheme, netloc, path, urlencode(query_parts), fragment))
 
         if msg_prefix:
             msg_prefix += ": "
@@ -1637,11 +1635,11 @@ class FSFilesHandler(WSGIHandler):
         * the host is provided as part of the base_url
         * the request's path isn't under the media path (or equal)
         """
-        return path.startswith(self.base_url[2]) and not self.base_url[1]
+        return path.startswith(self.base_url.path) and not self.base_url.netloc
 
     def file_path(self, url):
         """Return the relative path to the file on disk for the given URL."""
-        relative_url = url.removeprefix(self.base_url[2])
+        relative_url = url.removeprefix(self.base_url.path)
         return url2pathname(relative_url)
 
     def get_response(self, request):
author	Jake Howard <RealOrangeOne@users.noreply.github.com>	2024-05-29 14:48:27 +0100
committer	GitHub <noreply@github.com>	2024-05-29 10:48:27 -0300
commit	ff308a06047cd60806d604a7cf612e5656ee2ac9 (patch)
tree	f2139fbf020cbdf33bad64a3377700623c18a44f /django/test
parent	02dab94c7b8585c7ae3854465574d768e1df75d3 (diff)