summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMehrdad <mhrddmoradii@gmail.com>2022-06-24 14:46:34 -0400
committerMariusz Felisiak <felisiak.mariusz@gmail.com>2022-06-28 09:42:47 +0200
commitd4d5427571b4bf3a21c902276c2a00215c2a37cc (patch)
tree59cc1bc214b414636b57b0e61fc6515a6e528f5f
parentbff5c114be2b7a3fbc735c232abcc6ad4db89a9d (diff)
Refs #33697 -- Used django.utils.http.parse_header_parameters() for parsing boundary streams.
This also removes unused parse_header() and _parse_header_params() helpers in django.http.multipartparser.
-rw-r--r--django/http/multipartparser.py51
-rw-r--r--django/utils/http.py13
-rw-r--r--tests/file_uploads/tests.py45
-rw-r--r--tests/utils_tests/test_http.py38
4 files changed, 52 insertions, 95 deletions
diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py
index 73ef074744..b3e0925a42 100644
--- a/django/http/multipartparser.py
+++ b/django/http/multipartparser.py
@@ -8,7 +8,6 @@ import base64
import binascii
import collections
import html
-from urllib.parse import unquote
from django.conf import settings
from django.core.exceptions import (
@@ -675,8 +674,9 @@ def parse_boundary_stream(stream, max_header_size):
# This terminology ("main value" and "dictionary of
# parameters") is from the Python docs.
try:
- main_value_pair, params = parse_header(line)
+ main_value_pair, params = parse_header_parameters(line.decode())
name, value = main_value_pair.split(":", 1)
+ params = {k: v.encode() for k, v in params.items()}
except ValueError: # Invalid header.
continue
@@ -703,50 +703,3 @@ class Parser:
for sub_stream in boundarystream:
# Iterate over each part
yield parse_boundary_stream(sub_stream, 1024)
-
-
-def parse_header(line):
- """
- Parse the header into a key-value.
-
- Input (line): bytes, output: str for key/name, bytes for values which
- will be decoded later.
- """
- plist = _parse_header_params(b";" + line)
- key = plist.pop(0).lower().decode("ascii")
- pdict = {}
- for p in plist:
- i = p.find(b"=")
- if i >= 0:
- has_encoding = False
- name = p[:i].strip().lower().decode("ascii")
- if name.endswith("*"):
- # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
- # https://tools.ietf.org/html/rfc2231#section-4
- name = name[:-1]
- if p.count(b"'") == 2:
- has_encoding = True
- value = p[i + 1 :].strip()
- if len(value) >= 2 and value[:1] == value[-1:] == b'"':
- value = value[1:-1]
- value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"')
- if has_encoding:
- encoding, lang, value = value.split(b"'")
- value = unquote(value.decode(), encoding=encoding.decode())
- pdict[name] = value
- return key, pdict
-
-
-def _parse_header_params(s):
- plist = []
- while s[:1] == b";":
- s = s[1:]
- end = s.find(b";")
- while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2:
- end = s.find(b";", end + 1)
- if end < 0:
- end = len(s)
- f = s[:end]
- plist.append(f.strip())
- s = s[end:]
- return plist
diff --git a/django/utils/http.py b/django/utils/http.py
index 6e2091bf52..51fdc4b149 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -11,6 +11,7 @@ from urllib.parse import (
_splitnetloc,
_splitparams,
scheme_chars,
+ unquote,
)
from urllib.parse import urlencode as original_urlencode
from urllib.parse import uses_params
@@ -387,15 +388,25 @@ def parse_header_parameters(line):
Return the main content-type and a dictionary of options.
"""
parts = _parseparam(";" + line)
- key = parts.__next__()
+ key = parts.__next__().lower()
pdict = {}
for p in parts:
i = p.find("=")
if i >= 0:
+ has_encoding = False
name = p[:i].strip().lower()
+ if name.endswith("*"):
+ # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext")
+ # https://tools.ietf.org/html/rfc2231#section-4
+ name = name[:-1]
+ if p.count("'") == 2:
+ has_encoding = True
value = p[i + 1 :].strip()
if len(value) >= 2 and value[0] == value[-1] == '"':
value = value[1:-1]
value = value.replace("\\\\", "\\").replace('\\"', '"')
+ if has_encoding:
+ encoding, lang, value = value.split("'")
+ value = unquote(value, encoding=encoding)
pdict[name] = value
return key, pdict
diff --git a/tests/file_uploads/tests.py b/tests/file_uploads/tests.py
index 44c54d908e..c6d76aa4c9 100644
--- a/tests/file_uploads/tests.py
+++ b/tests/file_uploads/tests.py
@@ -17,7 +17,6 @@ from django.http.multipartparser import (
MultiPartParser,
MultiPartParserError,
Parser,
- parse_header,
)
from django.test import SimpleTestCase, TestCase, client, override_settings
@@ -906,47 +905,3 @@ class MultiParserTests(SimpleTestCase):
for file_name in CANDIDATE_INVALID_FILE_NAMES:
with self.subTest(file_name=file_name):
self.assertIsNone(parser.sanitize_file_name(file_name))
-
- def test_rfc2231_parsing(self):
- test_data = (
- (
- b"Content-Type: application/x-stuff; "
- b"title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
- "This is ***fun***",
- ),
- (
- b"Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
- "foo-ä.html",
- ),
- (
- b"Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
- "foo-ä.html",
- ),
- )
- for raw_line, expected_title in test_data:
- parsed = parse_header(raw_line)
- self.assertEqual(parsed[1]["title"], expected_title)
-
- def test_rfc2231_wrong_title(self):
- """
- Test wrongly formatted RFC 2231 headers (missing double single quotes).
- Parsing should not crash (#24209).
- """
- test_data = (
- (
- b"Content-Type: application/x-stuff; "
- b"title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
- b"'This%20is%20%2A%2A%2Afun%2A%2A%2A",
- ),
- (b"Content-Type: application/x-stuff; title*='foo.html", b"'foo.html"),
- (b"Content-Type: application/x-stuff; title*=bar.html", b"bar.html"),
- )
- for raw_line, expected_title in test_data:
- parsed = parse_header(raw_line)
- self.assertEqual(parsed[1]["title"], expected_title)
-
- def test_parse_header_with_double_quotes_and_semicolon(self):
- self.assertEqual(
- parse_header(b'form-data; name="files"; filename="fo\\"o;bar"'),
- ("form-data", {"name": b"files", "filename": b'fo"o;bar'}),
- )
diff --git a/tests/utils_tests/test_http.py b/tests/utils_tests/test_http.py
index 2ba617dfc9..b2754b4ddb 100644
--- a/tests/utils_tests/test_http.py
+++ b/tests/utils_tests/test_http.py
@@ -472,3 +472,41 @@ class ParseHeaderParameterTests(unittest.TestCase):
for header, expected in tests:
with self.subTest(header=header):
self.assertEqual(parse_header_parameters(header), expected)
+
+ def test_rfc2231_parsing(self):
+ test_data = (
+ (
+ "Content-Type: application/x-stuff; "
+ "title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+ "This is ***fun***",
+ ),
+ (
+ "Content-Type: application/x-stuff; title*=UTF-8''foo-%c3%a4.html",
+ "foo-ä.html",
+ ),
+ (
+ "Content-Type: application/x-stuff; title*=iso-8859-1''foo-%E4.html",
+ "foo-ä.html",
+ ),
+ )
+ for raw_line, expected_title in test_data:
+ parsed = parse_header_parameters(raw_line)
+ self.assertEqual(parsed[1]["title"], expected_title)
+
+ def test_rfc2231_wrong_title(self):
+ """
+ Test wrongly formatted RFC 2231 headers (missing double single quotes).
+ Parsing should not crash (#24209).
+ """
+ test_data = (
+ (
+ "Content-Type: application/x-stuff; "
+ "title*='This%20is%20%2A%2A%2Afun%2A%2A%2A",
+ "'This%20is%20%2A%2A%2Afun%2A%2A%2A",
+ ),
+ ("Content-Type: application/x-stuff; title*='foo.html", "'foo.html"),
+ ("Content-Type: application/x-stuff; title*=bar.html", "bar.html"),
+ )
+ for raw_line, expected_title in test_data:
+ parsed = parse_header_parameters(raw_line)
+ self.assertEqual(parsed[1]["title"], expected_title)