summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2014-08-09 12:44:48 +0200
committerClaude Paroz <claude@2xlibre.net>2014-09-09 21:59:35 +0200
commitb9d9287f59eb5c33dd8bc81179b4cf197fd54456 (patch)
treed0160d7e39b06d95abb2404aed16a6c8191ce29f /django/utils/html.py
parent4b8a1d2c0d1a8c5107f3aef01597db78d2a2a5ce (diff)
Fixed urlize after smart_urlquote rewrite
Refs #22267.
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py31
1 files changed, 27 insertions, 4 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 6eed0b0192..3974bbbc22 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -251,6 +251,7 @@ def smart_urlquote(url):
return urlunsplit((scheme, netloc, path, query, fragment))
+
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
"""
Converts any URLs in text into clickable links.
@@ -268,11 +269,31 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
If autoescape is True, the link text and URLs will be autoescaped.
"""
+ safe_input = isinstance(text, SafeData)
+
def trim_url(x, limit=trim_url_limit):
if limit is None or len(x) <= limit:
return x
return '%s...' % x[:max(0, limit - 3)]
- safe_input = isinstance(text, SafeData)
+
+ def unescape(text, trail):
+ """
+ If input URL is HTML-escaped, unescape it so as we can safely feed it to
+ smart_urlquote. For example:
+ http://example.com?x=1&amp;y=&lt;2&gt; => http://example.com?x=1&y=<2>
+ """
+ if not safe_input:
+ return text, text, trail
+ unescaped = (text + trail).replace('&amp;', '&').replace('&lt;', '<'
+ ).replace('&gt;', '>').replace('&quot;', '"'
+ ).replace('&#39;', "'")
+ # ';' in trail can be either trailing punctuation or end-of-entity marker
+ if unescaped.endswith(';'):
+ return text, unescaped[:-1], trail
+ else:
+ text += trail
+ return text, unescaped, ''
+
words = word_split_re.split(force_text(text))
for i, word in enumerate(words):
if '.' in word or '@' in word or ':' in word:
@@ -296,9 +317,11 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
url = None
nofollow_attr = ' rel="nofollow"' if nofollow else ''
if simple_url_re.match(middle):
- url = smart_urlquote(middle)
+ middle, middle_unescaped, trail = unescape(middle, trail)
+ url = smart_urlquote(middle_unescaped)
elif simple_url_2_re.match(middle):
- url = smart_urlquote('http://%s' % middle)
+ middle, middle_unescaped, trail = unescape(middle, trail)
+ url = smart_urlquote('http://%s' % middle_unescaped)
elif ':' not in middle and simple_email_re.match(middle):
local, domain = middle.rsplit('@', 1)
try:
@@ -313,7 +336,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed = trim_url(middle)
if autoescape and not safe_input:
lead, trail = escape(lead), escape(trail)
- url, trimmed = escape(url), escape(trimmed)
+ trimmed = escape(trimmed)
middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
else: