diff options
| author | django-bot <ops@djangoproject.com> | 2022-02-03 20:24:19 +0100 |
|---|---|---|
| committer | Mariusz Felisiak <felisiak.mariusz@gmail.com> | 2022-02-07 20:37:05 +0100 |
| commit | 9c19aff7c7561e3a82978a272ecdaad40dda5c00 (patch) | |
| tree | f0506b668a013d0063e5fba3dbf4863b466713ba /django/http/multipartparser.py | |
| parent | f68fa8b45dfac545cfc4111d4e52804c86db68d3 (diff) | |
Refs #33476 -- Reformatted code with Black.
Diffstat (limited to 'django/http/multipartparser.py')
| -rw-r--r-- | django/http/multipartparser.py | 180 |
1 files changed, 107 insertions, 73 deletions
diff --git a/django/http/multipartparser.py b/django/http/multipartparser.py index ef0b339d1b..13ed2fa4cd 100644 --- a/django/http/multipartparser.py +++ b/django/http/multipartparser.py @@ -13,15 +13,15 @@ from urllib.parse import unquote from django.conf import settings from django.core.exceptions import ( - RequestDataTooBig, SuspiciousMultipartForm, TooManyFieldsSent, -) -from django.core.files.uploadhandler import ( - SkipFile, StopFutureHandlers, StopUpload, + RequestDataTooBig, + SuspiciousMultipartForm, + TooManyFieldsSent, ) +from django.core.files.uploadhandler import SkipFile, StopFutureHandlers, StopUpload from django.utils.datastructures import MultiValueDict from django.utils.encoding import force_str -__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted') +__all__ = ("MultiPartParser", "MultiPartParserError", "InputStreamExhausted") class MultiPartParserError(Exception): @@ -32,6 +32,7 @@ class InputStreamExhausted(Exception): """ No more reads are allowed from this device. """ + pass @@ -47,6 +48,7 @@ class MultiPartParser: ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. """ + def __init__(self, META, input_data, upload_handlers, encoding=None): """ Initialize the MultiPartParser object. @@ -62,23 +64,28 @@ class MultiPartParser: The encoding with which to treat the incoming data. """ # Content-Type should contain multipart and the boundary information. - content_type = META.get('CONTENT_TYPE', '') - if not content_type.startswith('multipart/'): - raise MultiPartParserError('Invalid Content-Type: %s' % content_type) + content_type = META.get("CONTENT_TYPE", "") + if not content_type.startswith("multipart/"): + raise MultiPartParserError("Invalid Content-Type: %s" % content_type) # Parse the header to get the boundary to split the parts. try: - ctypes, opts = parse_header(content_type.encode('ascii')) + ctypes, opts = parse_header(content_type.encode("ascii")) except UnicodeEncodeError: - raise MultiPartParserError('Invalid non-ASCII Content-Type in multipart: %s' % force_str(content_type)) - boundary = opts.get('boundary') + raise MultiPartParserError( + "Invalid non-ASCII Content-Type in multipart: %s" + % force_str(content_type) + ) + boundary = opts.get("boundary") if not boundary or not cgi.valid_boundary(boundary): - raise MultiPartParserError('Invalid boundary in multipart: %s' % force_str(boundary)) + raise MultiPartParserError( + "Invalid boundary in multipart: %s" % force_str(boundary) + ) # Content-Length should contain the length of the body we are about # to receive. try: - content_length = int(META.get('CONTENT_LENGTH', 0)) + content_length = int(META.get("CONTENT_LENGTH", 0)) except (ValueError, TypeError): content_length = 0 @@ -87,14 +94,14 @@ class MultiPartParser: raise MultiPartParserError("Invalid content length: %r" % content_length) if isinstance(boundary, str): - boundary = boundary.encode('ascii') + boundary = boundary.encode("ascii") self._boundary = boundary self._input_data = input_data # For compatibility with low-level network APIs (with 32-bit integers), # the chunk size should be < 2^31, but still divisible by 4. possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size] - self._chunk_size = min([2 ** 31 - 4] + possible_sizes) + self._chunk_size = min([2**31 - 4] + possible_sizes) self._meta = META self._encoding = encoding or settings.DEFAULT_CHARSET @@ -163,32 +170,36 @@ class MultiPartParser: uploaded_file = True try: - disposition = meta_data['content-disposition'][1] - field_name = disposition['name'].strip() + disposition = meta_data["content-disposition"][1] + field_name = disposition["name"].strip() except (KeyError, IndexError, AttributeError): continue - transfer_encoding = meta_data.get('content-transfer-encoding') + transfer_encoding = meta_data.get("content-transfer-encoding") if transfer_encoding is not None: transfer_encoding = transfer_encoding[0].strip() - field_name = force_str(field_name, encoding, errors='replace') + field_name = force_str(field_name, encoding, errors="replace") if item_type == FIELD: # Avoid storing more than DATA_UPLOAD_MAX_NUMBER_FIELDS. num_post_keys += 1 - if (settings.DATA_UPLOAD_MAX_NUMBER_FIELDS is not None and - settings.DATA_UPLOAD_MAX_NUMBER_FIELDS < num_post_keys): + if ( + settings.DATA_UPLOAD_MAX_NUMBER_FIELDS is not None + and settings.DATA_UPLOAD_MAX_NUMBER_FIELDS < num_post_keys + ): raise TooManyFieldsSent( - 'The number of GET/POST parameters exceeded ' - 'settings.DATA_UPLOAD_MAX_NUMBER_FIELDS.' + "The number of GET/POST parameters exceeded " + "settings.DATA_UPLOAD_MAX_NUMBER_FIELDS." ) # Avoid reading more than DATA_UPLOAD_MAX_MEMORY_SIZE. if settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None: - read_size = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - num_bytes_read + read_size = ( + settings.DATA_UPLOAD_MAX_MEMORY_SIZE - num_bytes_read + ) # This is a post field, we can just set it in the post - if transfer_encoding == 'base64': + if transfer_encoding == "base64": raw_data = field_stream.read(size=read_size) num_bytes_read += len(raw_data) try: @@ -202,26 +213,34 @@ class MultiPartParser: # Add two here to make the check consistent with the # x-www-form-urlencoded check that includes '&='. num_bytes_read += len(field_name) + 2 - if (settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None and - num_bytes_read > settings.DATA_UPLOAD_MAX_MEMORY_SIZE): - raise RequestDataTooBig('Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE.') + if ( + settings.DATA_UPLOAD_MAX_MEMORY_SIZE is not None + and num_bytes_read > settings.DATA_UPLOAD_MAX_MEMORY_SIZE + ): + raise RequestDataTooBig( + "Request body exceeded settings.DATA_UPLOAD_MAX_MEMORY_SIZE." + ) - self._post.appendlist(field_name, force_str(data, encoding, errors='replace')) + self._post.appendlist( + field_name, force_str(data, encoding, errors="replace") + ) elif item_type == FILE: # This is a file, use the handler... - file_name = disposition.get('filename') + file_name = disposition.get("filename") if file_name: - file_name = force_str(file_name, encoding, errors='replace') + file_name = force_str(file_name, encoding, errors="replace") file_name = self.sanitize_file_name(file_name) if not file_name: continue - content_type, content_type_extra = meta_data.get('content-type', ('', {})) + content_type, content_type_extra = meta_data.get( + "content-type", ("", {}) + ) content_type = content_type.strip() - charset = content_type_extra.get('charset') + charset = content_type_extra.get("charset") try: - content_length = int(meta_data.get('content-length')[0]) + content_length = int(meta_data.get("content-length")[0]) except (IndexError, TypeError, ValueError): content_length = None @@ -231,14 +250,18 @@ class MultiPartParser: for handler in handlers: try: handler.new_file( - field_name, file_name, content_type, - content_length, charset, content_type_extra, + field_name, + file_name, + content_type, + content_length, + charset, + content_type_extra, ) except StopFutureHandlers: break for chunk in field_stream: - if transfer_encoding == 'base64': + if transfer_encoding == "base64": # We only special-case base64 transfer encoding # We should always decode base64 chunks by multiple of 4, # ignoring whitespace. @@ -257,7 +280,9 @@ class MultiPartParser: chunk = base64.b64decode(stripped_chunk) except Exception as exc: # Since this is only a chunk, any error is an unfixable error. - raise MultiPartParserError("Could not decode base64 data.") from exc + raise MultiPartParserError( + "Could not decode base64 data." + ) from exc for i, handler in enumerate(handlers): chunk_length = len(chunk) @@ -303,7 +328,10 @@ class MultiPartParser: file_obj = handler.file_complete(counters[i]) if file_obj: # If it returns a file object, then set the files dict. - self._files.appendlist(force_str(old_field_name, self._encoding, errors='replace'), file_obj) + self._files.appendlist( + force_str(old_field_name, self._encoding, errors="replace"), + file_obj, + ) break def sanitize_file_name(self, file_name): @@ -320,12 +348,12 @@ class MultiPartParser: resulting filename should still be considered as untrusted user input. """ file_name = html.unescape(file_name) - file_name = file_name.rsplit('/')[-1] - file_name = file_name.rsplit('\\')[-1] + file_name = file_name.rsplit("/")[-1] + file_name = file_name.rsplit("\\")[-1] # Remove non-printable characters. - file_name = ''.join([char for char in file_name if char.isprintable()]) + file_name = "".join([char for char in file_name if char.isprintable()]) - if file_name in {'', '.', '..'}: + if file_name in {"", ".", ".."}: return None return file_name @@ -336,7 +364,7 @@ class MultiPartParser: # FIXME: this currently assumes that upload handlers store the file as 'file' # We should document that... (Maybe add handler.free_file to complement new_file) for handler in self._upload_handlers: - if hasattr(handler, 'file'): + if hasattr(handler, "file"): handler.file.close() @@ -348,6 +376,7 @@ class LazyStream: LazyStream object will support iteration, reading, and keeping a "look-back" variable in case you need to "unget" some bytes. """ + def __init__(self, producer, length=None): """ Every LazyStream must have a producer when instantiated. @@ -357,7 +386,7 @@ class LazyStream: """ self._producer = producer self._empty = False - self._leftover = b'' + self._leftover = b"" self.length = length self.position = 0 self._remaining = length @@ -371,14 +400,14 @@ class LazyStream: remaining = self._remaining if size is None else size # do the whole thing in one shot if no limit was provided. if remaining is None: - yield b''.join(self) + yield b"".join(self) return # otherwise do some bookkeeping to return exactly enough # of the stream and stashing any extra content we get from # the producer while remaining != 0: - assert remaining > 0, 'remaining bytes to read should never go negative' + assert remaining > 0, "remaining bytes to read should never go negative" try: chunk = next(self) @@ -390,7 +419,7 @@ class LazyStream: remaining -= len(emitting) yield emitting - return b''.join(parts()) + return b"".join(parts()) def __next__(self): """ @@ -401,7 +430,7 @@ class LazyStream: """ if self._leftover: output = self._leftover - self._leftover = b'' + self._leftover = b"" else: output = next(self._producer) self._unget_history = [] @@ -442,10 +471,13 @@ class LazyStream: maliciously-malformed MIME request. """ self._unget_history = [num_bytes] + self._unget_history[:49] - number_equal = len([ - current_number for current_number in self._unget_history - if current_number == num_bytes - ]) + number_equal = len( + [ + current_number + for current_number in self._unget_history + if current_number == num_bytes + ] + ) if number_equal > 40: raise SuspiciousMultipartForm( @@ -460,6 +492,7 @@ class ChunkIter: An iterable that will yield chunks of data. Given a file-like object as the constructor, yield chunks of read operations from that object. """ + def __init__(self, flo, chunk_size=64 * 1024): self.flo = flo self.chunk_size = chunk_size @@ -482,6 +515,7 @@ class InterBoundaryIter: """ A Producer that will iterate over boundaries. """ + def __init__(self, stream, boundary): self._stream = stream self._boundary = boundary @@ -548,7 +582,7 @@ class BoundaryIter: if not chunks: raise StopIteration() - chunk = b''.join(chunks) + chunk = b"".join(chunks) boundary = self._find_boundary(chunk) if boundary: @@ -584,10 +618,10 @@ class BoundaryIter: next = index + len(self._boundary) # backup over CRLF last = max(0, end - 1) - if data[last:last + 1] == b'\n': + if data[last : last + 1] == b"\n": end -= 1 last = max(0, end - 1) - if data[last:last + 1] == b'\r': + if data[last : last + 1] == b"\r": end -= 1 return end, next @@ -613,12 +647,12 @@ def parse_boundary_stream(stream, max_header_size): # 'find' returns the top of these four bytes, so we'll # need to munch them later to prevent them from polluting # the payload. - header_end = chunk.find(b'\r\n\r\n') + header_end = chunk.find(b"\r\n\r\n") def _parse_header(line): main_value_pair, params = parse_header(line) try: - name, value = main_value_pair.split(':', 1) + name, value = main_value_pair.split(":", 1) except ValueError: raise ValueError("Invalid header: %r" % line) return name, (value, params) @@ -633,13 +667,13 @@ def parse_boundary_stream(stream, max_header_size): # here we place any excess chunk back onto the stream, as # well as throwing away the CRLFCRLF bytes from above. - stream.unget(chunk[header_end + 4:]) + stream.unget(chunk[header_end + 4 :]) TYPE = RAW outdict = {} # Eliminate blank lines - for line in header.split(b'\r\n'): + for line in header.split(b"\r\n"): # This terminology ("main value" and "dictionary of # parameters") is from the Python docs. try: @@ -647,9 +681,9 @@ def parse_boundary_stream(stream, max_header_size): except ValueError: continue - if name == 'content-disposition': + if name == "content-disposition": TYPE = FIELD - if params.get('filename'): + if params.get("filename"): TYPE = FILE outdict[name] = value, params @@ -663,7 +697,7 @@ def parse_boundary_stream(stream, max_header_size): class Parser: def __init__(self, stream, boundary): self._stream = stream - self._separator = b'--' + boundary + self._separator = b"--" + boundary def __iter__(self): boundarystream = InterBoundaryIter(self._stream, self._separator) @@ -679,24 +713,24 @@ def parse_header(line): Input (line): bytes, output: str for key/name, bytes for values which will be decoded later. """ - plist = _parse_header_params(b';' + line) - key = plist.pop(0).lower().decode('ascii') + plist = _parse_header_params(b";" + line) + key = plist.pop(0).lower().decode("ascii") pdict = {} for p in plist: - i = p.find(b'=') + i = p.find(b"=") if i >= 0: has_encoding = False - name = p[:i].strip().lower().decode('ascii') - if name.endswith('*'): + name = p[:i].strip().lower().decode("ascii") + if name.endswith("*"): # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") # https://tools.ietf.org/html/rfc2231#section-4 name = name[:-1] if p.count(b"'") == 2: has_encoding = True - value = p[i + 1:].strip() + value = p[i + 1 :].strip() if len(value) >= 2 and value[:1] == value[-1:] == b'"': value = value[1:-1] - value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') + value = value.replace(b"\\\\", b"\\").replace(b'\\"', b'"') if has_encoding: encoding, lang, value = value.split(b"'") value = unquote(value.decode(), encoding=encoding.decode()) @@ -706,11 +740,11 @@ def parse_header(line): def _parse_header_params(s): plist = [] - while s[:1] == b';': + while s[:1] == b";": s = s[1:] - end = s.find(b';') + end = s.find(b";") while end > 0 and s.count(b'"', 0, end) % 2: - end = s.find(b';', end + 1) + end = s.find(b";", end + 1) if end < 0: end = len(s) f = s[:end] |
