diff options
| author | Claude Paroz <claude@2xlibre.net> | 2014-06-26 21:14:30 +0200 |
|---|---|---|
| committer | Claude Paroz <claude@2xlibre.net> | 2014-09-09 21:58:07 +0200 |
| commit | 4b8a1d2c0d1a8c5107f3aef01597db78d2a2a5ce (patch) | |
| tree | f23e274b5f112fadf6f4df992db65c8c91a73083 /django | |
| parent | 9562ffea9719f2b1795e676718c6d433a1f8d4ed (diff) | |
Fixed #22267 -- Fixed unquote/quote in smart_urlquote
Thanks Md. Enzam Hossain for the report and initial patch, and
Tim Graham for the review.
Diffstat (limited to 'django')
| -rw-r--r-- | django/utils/html.py | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/django/utils/html.py b/django/utils/html.py index 58165ff84c..6eed0b0192 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -12,7 +12,7 @@ from django.utils.functional import allow_lazy from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS from django.utils.safestring import SafeData, mark_safe from django.utils import six -from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit +from django.utils.six.moves.urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from django.utils.text import normalize_newlines from .html_parser import HTMLParser, HTMLParseError @@ -218,25 +218,38 @@ strip_entities = allow_lazy(strip_entities, six.text_type) def smart_urlquote(url): "Quotes a URL if it isn't already quoted." + def unquote_quote(segment): + segment = unquote(force_str(segment)) + # Tilde is part of RFC3986 Unreserved Characters + # http://tools.ietf.org/html/rfc3986#section-2.3 + # See also http://bugs.python.org/issue16285 + segment = quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) + return force_text(segment) + # Handle IDN before quoting. try: scheme, netloc, path, query, fragment = urlsplit(url) - try: - netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE - except UnicodeError: # invalid domain part - pass - else: - url = urlunsplit((scheme, netloc, path, query, fragment)) except ValueError: # invalid IPv6 URL (normally square brackets in hostname part). - pass + return unquote_quote(url) + + try: + netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE + except UnicodeError: # invalid domain part + return unquote_quote(url) - url = unquote(force_str(url)) - # See http://bugs.python.org/issue2637 - url = quote(url, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~')) + if query: + # Separately unquoting key/value, so as to not mix querystring separators + # included in query values. See #22267. + query_parts = [(unquote(force_str(q[0])), unquote(force_str(q[1]))) + for q in parse_qsl(query, keep_blank_values=True)] + # urlencode will take care of quoting + query = urlencode(query_parts) - return force_text(url) + path = unquote_quote(path) + fragment = unquote_quote(fragment) + return urlunsplit((scheme, netloc, path, query, fragment)) def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): """ |
