Simplified django.utils.html.urlize().

author: Tim Graham <timograham@gmail.com> 2018-10-12 12:15:26 -0400
committer: GitHub <noreply@github.com> 2018-10-12 12:15:26 -0400
commit: 1e3cd5116367a15e2c9855dced062de885e1f0f0 (patch)
tree: 83cf94c2dd11a400a82b3f452bcd06e2c3b46f32 /django/utils/html.py
parent: 910548634a23f7a3346158e93de0ab308ae52c0c (diff)
1 files changed, 13 insertions, 23 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 24754553b6..44a3f16459 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
             return x
         return '%s…' % x[:max(0, limit - 1)]
 
-    def unescape(text, trail):
+    def unescape(text):
         """
         If input URL is HTML-escaped, unescape it so that it can be safely fed
         to smart_urlquote. For example:
         http://example.com?x=1&amp;y=&lt;2&gt; => http://example.com?x=1&y=<2>
         """
-        unescaped = (text + trail).replace(
-            '&amp;', '&').replace('&lt;', '<').replace(
+        return text.replace('&amp;', '&').replace('&lt;', '<').replace(
             '&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")
-        if trail and unescaped.endswith(trail):
-            # Remove trail for unescaped if it was not consumed by unescape
-            unescaped = unescaped[:-len(trail)]
-        elif trail == ';':
-            # Trail was consumed by unescape (as end-of-entity marker), move it to text
-            text += trail
-            trail = ''
-        return text, unescaped, trail
 
     def trim_punctuation(lead, middle, trail):
         """
@@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
         trimmed_something = True
         while trimmed_something:
             trimmed_something = False
-
-            # Trim trailing punctuation.
-            stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
-            if middle != stripped:
-                trail = middle[len(stripped):] + trail
-                middle = stripped
-                trimmed_something = True
-
             # Trim wrapping punctuation.
             for opening, closing in WRAPPING_PUNCTUATION:
                 if middle.startswith(opening):
@@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
                     middle = middle[:-len(closing)]
                     trail = closing + trail
                     trimmed_something = True
+            # Trim trailing punctuation (after trimming wrapping punctuation,
+            # as encoded entities contain ';'). Unescape entites to avoid
+            # breaking them by removing ';'.
+            middle_unescaped = unescape(middle)
+            stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
+            if middle_unescaped != stripped:
+                trail = middle[len(stripped):] + trail
+                middle = middle[:len(stripped) - len(middle_unescaped)]
+                trimmed_something = True
         return lead, middle, trail
 
     def is_email_simple(value):
@@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
             url = None
             nofollow_attr = ' rel="nofollow"' if nofollow else ''
             if simple_url_re.match(middle):
-                middle, middle_unescaped, trail = unescape(middle, trail)
-                url = smart_urlquote(middle_unescaped)
+                url = smart_urlquote(unescape(middle))
             elif simple_url_2_re.match(middle):
-                middle, middle_unescaped, trail = unescape(middle, trail)
-                url = smart_urlquote('http://%s' % middle_unescaped)
+                url = smart_urlquote('http://%s' % unescape(middle))
             elif ':' not in middle and is_email_simple(middle):
                 local, domain = middle.rsplit('@', 1)
                 try:
author	Tim Graham <timograham@gmail.com>	2018-10-12 12:15:26 -0400
committer	GitHub <noreply@github.com>	2018-10-12 12:15:26 -0400
commit	1e3cd5116367a15e2c9855dced062de885e1f0f0 (patch)
tree	83cf94c2dd11a400a82b3f452bcd06e2c3b46f32 /django/utils/html.py
parent	910548634a23f7a3346158e93de0ab308ae52c0c (diff)