Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.

This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
author: Mehrdad <mhrddmoradii@gmail.com> 2022-06-24 14:46:34 -0400
committer: Mariusz Felisiak <felisiak.mariusz@gmail.com> 2022-06-28 09:42:47 +0200
commit: d4d5427571b4bf3a21c902276c2a00215c2a37cc (patch)
tree: 59cc1bc214b414636b57b0e61fc6515a6e528f5f
parent: bff5c114be2b7a3fbc735c232abcc6ad4db89a9d (diff)
4 files changed, 52 insertions, 95 deletions
diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py
index 73ef074744..b3e0925a42 100644
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@@ -8,7 +8,6 @@ import base64
 import binascii
 import collections
 import html
-from urllib.parse import unquote
 
 from django.conf import settings
 from django.core.exceptions import (
@@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
         # This terminology ("main value" and "dictionary of
         # parameters") is from the Python docs.
         try:
-            main_value_pair, params = parse_header(line)
+            main_value_pair, params = parse_header_parameters(line.decode())
             name, value = main_value_pair.split(":", 1)
+            params = {k: v.encode() for k, v in params.items()}
         except ValueError:  # Invalid header.
             continue
 
@@ -703,50 +703,3 @@ class Parser:
         for sub_stream in boundarystream:
             # Iterate over each part
             yield parse_boundary_stream(sub_stream, 1024)
-
-
-def parse_header(line):
-    """
-    Parse the header into a key-value.
-
-    Input (line): bytes, output: str for key/name, bytes for values which
-    will be decoded later.
-    """
-    plist = _parse_header_params(b";" + line)
-    key = plist.pop(0).lower().decode("ascii")
-    pdict = {}
-    for p in plist:
-        i = p.find(b"=")
-        if i >= 0:
-            has_encoding = False
-            name = p[:i].strip().lower().decode("ascii")
-            if name.endswith("*"):
-                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
-                # https://tools.ietf.org/html/rfc2231#section-4
-                name = name[:-1]
-                if p.count(b"'") == 2:
-                    has_encoding = True
-            value = p[i + 1 :].strip()
-            if len(value) >= 2 and value[:1] == value[-1:] == b'"':
-                value = value[1:-1]
-                value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
-            if has_encoding:
-                encoding, lang, value = value.split(b"'")
-                value = unquote(value.decode(), encoding=encoding.decode())
-            pdict[name] = value
-    return key, pdict
-
-
-def _parse_header_params(s):
-    plist = []
-    while s[:1] == b";":
-        s = s[1:]
-        end = s.find(b";")
-        while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
-            end = s.find(b";", end + 1)
-        if end < 0:
-            end = len(s)
-        f = s[:end]
-        plist.append(f.strip())
-        s = s[end:]
-    return plist
diff --git a/django/utils/http.py b/django/utils/http.py
index 6e2091bf52..51fdc4b149 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -11,6 +11,7 @@ from urllib.parse import (
     _splitnetloc,
     _splitparams,
     scheme_chars,
+    unquote,
 )
 from urllib.parse import urlencode as original_urlencode
 from urllib.parse import uses_params
@@ -387,15 +388,25 @@ def parse_header_parameters(line):
     Return the main content-type and a dictionary of options.
     """
     parts = _parseparam(";" + line)
-    key = parts.__next__()
+    key = parts.__next__().lower()
     pdict = {}
     for p in parts:
         i = p.find("=")
         if i >= 0:
+            has_encoding = False
             name = p[:i].strip().lower()
+            if name.endswith("*"):
+                # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
+                # https://tools.ietf.org/html/rfc2231#section-4
+                name = name[:-1]
+                if p.count("'") == 2:
+                    has_encoding = True
             value = p[i + 1 :].strip()
             if len(value) >= 2 and value[0] == value[-1] == '"':
                 value = value[1:-1]
                 value = value.replace("\\\\", "\\").replace('\\"', '"')
+            if has_encoding:
+                encoding, lang, value = value.split("'")
+                value = unquote(value, encoding=encoding)
             pdict[name] = value
     return key, pdict
diff --git a/tests/file_uploads/tests.py b/tests/file_uploads/tests.py
index 44c54d908e..c6d76aa4c9 100644
--- a/tests/file_uploads/tests.py
+++ b/tests/file_uploads/tests.py
@@ -17,7 +17,6 @@ from django.http.multipartparser import (
     MultiPartParser,
     MultiPartParserError,
     Parser,
-    parse_header,
 )
 from django.test import SimpleTestCase, TestCase, client, override_settings
 
@@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
         for file_name in CANDIDATE_INVALID_FILE_NAMES:
             with self.subTest(file_name=file_name):
                 self.assertIsNone(parser.sanitize_file_name(file_name))
-
-    def test_rfc2231_parsing(self):
-        test_data = (
-            (
-                b"Content-Type: application/x-stuff; "
-                b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
-                "This is ***fun***",
-            ),
-            (
-                b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
-                "foo-ä.html",
-            ),
-            (
-                b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
-                "foo-ä.html",
-            ),
-        )
-        for raw_line, expected_title in test_data:
-            parsed = parse_header(raw_line)
-            self.assertEqual(parsed[1]["title"], expected_title)
-
-    def test_rfc2231_wrong_title(self):
-        """
-        Test wrongly formatted RFC 2231 headers (missing double single quotes).
-        Parsing should not crash (#24209).
-        """
-        test_data = (
-            (
-                b"Content-Type: application/x-stuff; "
-                b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
-                b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
-            ),
-            (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
-            (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
-        )
-        for raw_line, expected_title in test_data:
-            parsed = parse_header(raw_line)
-            self.assertEqual(parsed[1]["title"], expected_title)
-
-    def test_parse_header_with_double_quotes_and_semicolon(self):
-        self.assertEqual(
-            parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
-            ("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
-        )
diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py
index 2ba617dfc9..b2754b4ddb 100644
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
         for header, expected in tests:
             with self.subTest(header=header):
                 self.assertEqual(parse_header_parameters(header), expected)
+
+    def test_rfc2231_parsing(self):
+        test_data = (
+            (
+                "Content-Type: application/x-stuff; "
+                "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+                "This is ***fun***",
+            ),
+            (
+                "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
+                "foo-ä.html",
+            ),
+            (
+                "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
+                "foo-ä.html",
+            ),
+        )
+        for raw_line, expected_title in test_data:
+            parsed = parse_header_parameters(raw_line)
+            self.assertEqual(parsed[1]["title"], expected_title)
+
+    def test_rfc2231_wrong_title(self):
+        """
+        Test wrongly formatted RFC 2231 headers (missing double single quotes).
+        Parsing should not crash (#24209).
+        """
+        test_data = (
+            (
+                "Content-Type: application/x-stuff; "
+                "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
+                "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+            ),
+            ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
+            ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
+        )
+        for raw_line, expected_title in test_data:
+            parsed = parse_header_parameters(raw_line)
+            self.assertEqual(parsed[1]["title"], expected_title)
author	Mehrdad <mhrddmoradii@gmail.com>	2022-06-24 14:46:34 -0400
committer	Mariusz Felisiak <felisiak.mariusz@gmail.com>	2022-06-28 09:42:47 +0200
commit	d4d5427571b4bf3a21c902276c2a00215c2a37cc (patch)
tree	59cc1bc214b414636b57b0e61fc6515a6e528f5f
parent	bff5c114be2b7a3fbc735c232abcc6ad4db89a9d (diff)