diff options
Diffstat (limited to 'django/utils/http.py')
| -rw-r--r-- | django/utils/http.py | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/django/utils/http.py b/django/utils/http.py index bd2daad4b2..1433df4ff0 100644 --- a/django/utils/http.py +++ b/django/utils/http.py @@ -7,8 +7,9 @@ import warnings from binascii import Error as BinasciiError from email.utils import formatdate from urllib.parse import ( - quote, quote_plus, unquote, unquote_plus, urlencode as original_urlencode, - urlparse, + ParseResult, SplitResult, _coerce_args, _splitnetloc, _splitparams, quote, + quote_plus, scheme_chars, unquote, unquote_plus, + urlencode as original_urlencode, uses_params, ) from django.core.exceptions import TooManyFieldsSent @@ -293,12 +294,62 @@ def is_safe_url(url, host=None, allowed_hosts=None, require_https=False): _is_safe_url(url.replace('\\', '/'), allowed_hosts, require_https=require_https)) +# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function. +def _urlparse(url, scheme='', allow_fragments=True): + """Parse a URL into 6 components: + <scheme>://<netloc>/<path>;<params>?<query>#<fragment> + Return a 6-tuple: (scheme, netloc, path, params, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + splitresult = _urlsplit(url, scheme, allow_fragments) + scheme, netloc, url, query, fragment = splitresult + if scheme in uses_params and ';' in url: + url, params = _splitparams(url) + else: + params = '' + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) + + +# Copied from urllib.parse.urlsplit() with +# https://github.com/python/cpython/pull/661 applied. +def _urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + <scheme>://<netloc>/<path>?<query>#<fragment> + Return a 5-tuple: (scheme, netloc, path, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + allow_fragments = bool(allow_fragments) + netloc = query = fragment = '' + i = url.find(':') + if i > 0: + for c in url[:i]: + if c not in scheme_chars: + break + else: + scheme, url = url[:i].lower(), url[i + 1:] + + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + return _coerce_result(v) + + def _is_safe_url(url, allowed_hosts, require_https=False): # Chrome considers any URL with more than two slashes to be absolute, but # urlparse is not so flexible. Treat any url with three slashes as unsafe. if url.startswith('///'): return False - url_info = urlparse(url) + url_info = _urlparse(url) # Forbid URLs like http:///example.com - with a scheme, but without a hostname. # In that URL, example.com is not the hostname but, a path component. However, # Chrome will still consider example.com to be the hostname, so we must not |
