Fixed #11911 -- Made the urlize filter smarter with closing punctuation.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17362 bcc190cf-cafb-0310-a4f2-bffc1f526a37
author: Aymeric Augustin <aymeric.augustin@m4x.org> 2012-01-08 19:42:14 +0000
committer: Aymeric Augustin <aymeric.augustin@m4x.org> 2012-01-08 19:42:14 +0000
commit: 15d10a5210378bba88c7dfa1f45a4d3528ddfc3f (patch)
tree: 1c6ea78522f302b6bb7792d0ac54f6605c36e6de /django/utils/html.py
parent: 78c92c416790b22eb837dbf852f93134e354173d (diff)
1 files changed, 19 insertions, 8 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 5e39ac9183..d04e75f856 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -11,8 +11,8 @@ from django.utils.functional import allow_lazy
 from django.utils.text import normalize_newlines
 
 # Configuration for urlize() function.
-LEADING_PUNCTUATION  = ['(', '<', '&lt;']
-TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
+TRAILING_PUNCTUATION = ['.', ',', ':', ';']
+WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('&lt;', '&gt;')]
 
 # List of possible strings used for bullets in bulleted lists.
 DOTS = [u'&middot;', u'*', u'\u2022', u'&#149;', u'&bull;', u'&#8226;']
@@ -20,9 +20,6 @@ DOTS = [u'&middot;', u'*', u'\u2022', u'&#149;', u'&bull;', u'&#8226;']
 unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
 unquoted_percents_re = re.compile(r'%(?![0-9A-Fa-f]{2})')
 word_split_re = re.compile(r'(\s+)')
-punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
-    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
-    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
 simple_url_re = re.compile(r'^https?://\w')
 simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org|[a-z]{2})$')
 simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
@@ -147,9 +144,22 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
     for i, word in enumerate(words):
         match = None
         if '.' in word or '@' in word or ':' in word:
-            match = punctuation_re.match(word)
-        if match:
-            lead, middle, trail = match.groups()
+            # Deal with punctuation.
+            lead, middle, trail = '', word, ''
+            for punctuation in TRAILING_PUNCTUATION:
+                if middle.endswith(punctuation):
+                    middle = middle[:-len(punctuation)]
+                    trail = punctuation + trail
+            for opening, closing in WRAPPING_PUNCTUATION:
+                if middle.startswith(opening):
+                    middle = middle[len(opening):]
+                    lead = lead + opening
+                # Keep parentheses at the end only if they're balanced.
+                if (middle.endswith(closing)
+                    and middle.count(closing) == middle.count(opening) + 1):
+                    middle = middle[:-len(closing)]
+                    trail = closing + trail
+
             # Make URL we want to point to.
             url = None
             nofollow_attr = ' rel="nofollow"' if nofollow else ''
@@ -162,6 +172,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
                 domain = domain.encode('idna')
                 url = 'mailto:%s@%s' % (local, domain)
                 nofollow_attr = ''
+
             # Make link.
             if url:
                 trimmed = trim_url(middle)
author	Aymeric Augustin <aymeric.augustin@m4x.org>	2012-01-08 19:42:14 +0000
committer	Aymeric Augustin <aymeric.augustin@m4x.org>	2012-01-08 19:42:14 +0000
commit	15d10a5210378bba88c7dfa1f45a4d3528ddfc3f (patch)
tree	1c6ea78522f302b6bb7792d0ac54f6605c36e6de /django/utils/html.py
parent	78c92c416790b22eb837dbf852f93134e354173d (diff)