summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2014-06-26 21:14:30 +0200
committerClaude Paroz <claude@2xlibre.net>2014-09-09 21:58:07 +0200
commit4b8a1d2c0d1a8c5107f3aef01597db78d2a2a5ce (patch)
treef23e274b5f112fadf6f4df992db65c8c91a73083 /django/utils/html.py
parent9562ffea9719f2b1795e676718c6d433a1f8d4ed (diff)
Fixed #22267 -- Fixed unquote/quote in smart_urlquote
Thanks Md. Enzam Hossain for the report and initial patch, and Tim Graham for the review.
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py37
1 files changed, 25 insertions, 12 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 58165ff84c..6eed0b0192 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -12,7 +12,7 @@ from django.utils.functional import allow_lazy
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.safestring import SafeData, mark_safe
from django.utils import six
-from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit
+from django.utils.six.moves.urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit
from django.utils.text import normalize_newlines
from .html_parser import HTMLParser, HTMLParseError
@@ -218,25 +218,38 @@ strip_entities = allow_lazy(strip_entities, six.text_type)
def smart_urlquote(url):
"Quotes a URL if it isn't already quoted."
+ def unquote_quote(segment):
+ segment = unquote(force_str(segment))
+ # Tilde is part of RFC3986 Unreserved Characters
+ # http://tools.ietf.org/html/rfc3986#section-2.3
+ # See also http://bugs.python.org/issue16285
+ segment = quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~'))
+ return force_text(segment)
+
# Handle IDN before quoting.
try:
scheme, netloc, path, query, fragment = urlsplit(url)
- try:
- netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
- except UnicodeError: # invalid domain part
- pass
- else:
- url = urlunsplit((scheme, netloc, path, query, fragment))
except ValueError:
# invalid IPv6 URL (normally square brackets in hostname part).
- pass
+ return unquote_quote(url)
+
+ try:
+ netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
+ except UnicodeError: # invalid domain part
+ return unquote_quote(url)
- url = unquote(force_str(url))
- # See http://bugs.python.org/issue2637
- url = quote(url, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~'))
+ if query:
+ # Separately unquoting key/value, so as to not mix querystring separators
+ # included in query values. See #22267.
+ query_parts = [(unquote(force_str(q[0])), unquote(force_str(q[1])))
+ for q in parse_qsl(query, keep_blank_values=True)]
+ # urlencode will take care of quoting
+ query = urlencode(query_parts)
- return force_text(url)
+ path = unquote_quote(path)
+ fragment = unquote_quote(fragment)
+ return urlunsplit((scheme, netloc, path, query, fragment))
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
"""