summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorTim Graham <timograham@gmail.com>2018-10-12 12:15:26 -0400
committerGitHub <noreply@github.com>2018-10-12 12:15:26 -0400
commit1e3cd5116367a15e2c9855dced062de885e1f0f0 (patch)
tree83cf94c2dd11a400a82b3f452bcd06e2c3b46f32 /django/utils/html.py
parent910548634a23f7a3346158e93de0ab308ae52c0c (diff)
Simplified django.utils.html.urlize().
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py36
1 files changed, 13 insertions, 23 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 24754553b6..44a3f16459 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
return x
return '%s…' % x[:max(0, limit - 1)]
- def unescape(text, trail):
+ def unescape(text):
"""
If input URL is HTML-escaped, unescape it so that it can be safely fed
to smart_urlquote. For example:
http://example.com?x=1&amp;y=&lt;2&gt; => http://example.com?x=1&y=<2>
"""
- unescaped = (text + trail).replace(
- '&amp;', '&').replace('&lt;', '<').replace(
+ return text.replace('&amp;', '&').replace('&lt;', '<').replace(
'&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")
- if trail and unescaped.endswith(trail):
- # Remove trail for unescaped if it was not consumed by unescape
- unescaped = unescaped[:-len(trail)]
- elif trail == ';':
- # Trail was consumed by unescape (as end-of-entity marker), move it to text
- text += trail
- trail = ''
- return text, unescaped, trail
def trim_punctuation(lead, middle, trail):
"""
@@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = True
while trimmed_something:
trimmed_something = False
-
- # Trim trailing punctuation.
- stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
- if middle != stripped:
- trail = middle[len(stripped):] + trail
- middle = stripped
- trimmed_something = True
-
# Trim wrapping punctuation.
for opening, closing in WRAPPING_PUNCTUATION:
if middle.startswith(opening):
@@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
middle = middle[:-len(closing)]
trail = closing + trail
trimmed_something = True
+ # Trim trailing punctuation (after trimming wrapping punctuation,
+ # as encoded entities contain ';'). Unescape entites to avoid
+ # breaking them by removing ';'.
+ middle_unescaped = unescape(middle)
+ stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
+ if middle_unescaped != stripped:
+ trail = middle[len(stripped):] + trail
+ middle = middle[:len(stripped) - len(middle_unescaped)]
+ trimmed_something = True
return lead, middle, trail
def is_email_simple(value):
@@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
url = None
nofollow_attr = ' rel="nofollow"' if nofollow else ''
if simple_url_re.match(middle):
- middle, middle_unescaped, trail = unescape(middle, trail)
- url = smart_urlquote(middle_unescaped)
+ url = smart_urlquote(unescape(middle))
elif simple_url_2_re.match(middle):
- middle, middle_unescaped, trail = unescape(middle, trail)
- url = smart_urlquote('http://%s' % middle_unescaped)
+ url = smart_urlquote('http://%s' % unescape(middle))
elif ':' not in middle and is_email_simple(middle):
local, domain = middle.rsplit('@', 1)
try: