summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorJustin Bronn <jbronn@gmail.com>2007-08-26 01:10:53 +0000
committerJustin Bronn <jbronn@gmail.com>2007-08-26 01:10:53 +0000
commit2052b508eb92c62fc0678efd4936c5ec1e0e735b (patch)
treee510109b74b28c8ccef5f6955727cb9dce3da655 /django/utils/html.py
parenta7297a255f4bb86f608ea251e00253d18c31d9d4 (diff)
gis: Made necessary modifications for unicode, manage refactor, backend refactor and merged 5584-6000 via svnmerge from [repos:django/trunk trunk].
git-svn-id: http://code.djangoproject.com/svn/django/branches/gis@6018 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py80
1 files changed, 45 insertions, 35 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index e1860627ce..ebd04d1b3c 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -1,7 +1,10 @@
"HTML utilities suitable for global use."
-import re, string
-from django.utils.encoding import smart_unicode
+import re
+import string
+
+from django.utils.encoding import force_unicode
+from django.utils.functional import allow_lazy
# Configuration for urlize() function
LEADING_PUNCTUATION = ['(', '<', '&lt;']
@@ -23,40 +26,45 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\
del x # Temporary variable
def escape(html):
- "Returns the given HTML with ampersands, quotes and carets encoded"
- if not isinstance(html, basestring):
- html = str(html)
- return html.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
+ "Return the given HTML with ampersands, quotes and carets encoded."
+ return force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')
+escape = allow_lazy(escape, unicode)
def linebreaks(value):
- "Converts newlines into <p> and <br />s"
- value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
+ "Convert newlines into <p> and <br />s."
+ value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
paras = re.split('\n{2,}', value)
- paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
- return '\n\n'.join(paras)
+ paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
+ return u'\n\n'.join(paras)
+linebreaks = allow_lazy(linebreaks, unicode)
def strip_tags(value):
- "Returns the given HTML with all tags stripped"
- return re.sub(r'<[^>]*?>', '', value)
+ "Return the given HTML with all tags stripped."
+ return re.sub(r'<[^>]*?>', '', force_unicode(value))
+strip_tags = allow_lazy(strip_tags)
def strip_spaces_between_tags(value):
- "Returns the given HTML with spaces between tags removed"
- return re.sub(r'>\s+<', '><', value)
+ "Return the given HTML with spaces between tags removed."
+ return re.sub(r'>\s+<', '><', force_unicode(value))
+strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
def strip_entities(value):
- "Returns the given HTML with all entities (&something;) stripped"
- return re.sub(r'&(?:\w+|#\d);', '', value)
+ "Return the given HTML with all entities (&something;) stripped."
+ return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
+strip_entities = allow_lazy(strip_entities, unicode)
def fix_ampersands(value):
- "Returns the given HTML with all unencoded ampersands encoded correctly"
- return unencoded_ampersands_re.sub('&amp;', value)
+ "Return the given HTML with all unencoded ampersands encoded correctly."
+ return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
+fix_ampersands = allow_lazy(fix_ampersands, unicode)
def urlize(text, trim_url_limit=None, nofollow=False):
"""
- Converts any URLs in text into clickable links. Works on http://, https://
- and www. links. Links can have trailing punctuation (periods, commas,
- close-parens) and leading punctuation (opening parens) and it'll still do
- the right thing.
+ Convert any URLs in text into clickable links.
+
+ Works on http://, https://, and www. links. Links can have trailing
+ punctuation (periods, commas, close-parens) and leading punctuation
+ (opening parens) and it'll still do the right thing.
If trim_url_limit is not None, the URLs in link text longer than this limit
will truncated to trim_url_limit-3 characters and appended with an elipsis.
@@ -65,7 +73,7 @@ def urlize(text, trim_url_limit=None, nofollow=False):
attribute.
"""
trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
- words = word_split_re.split(text)
+ words = word_split_re.split(force_unicode(text))
nofollow_attr = nofollow and ' rel="nofollow"' or ''
for i, word in enumerate(words):
match = punctuation_re.match(word)
@@ -82,22 +90,23 @@ def urlize(text, trim_url_limit=None, nofollow=False):
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
if lead + middle + trail != word:
words[i] = lead + middle + trail
- return ''.join(words)
+ return u''.join(words)
+urlize = allow_lazy(urlize, unicode)
def clean_html(text):
"""
- Cleans the given HTML. Specifically, it does the following:
- * Converts <b> and <i> to <strong> and <em>.
- * Encodes all ampersands correctly.
- * Removes all "target" attributes from <a> tags.
- * Removes extraneous HTML, such as presentational tags that open and
+ Clean the given HTML. Specifically, do the following:
+ * Convert <b> and <i> to <strong> and <em>.
+ * Encode all ampersands correctly.
+ * Remove all "target" attributes from <a> tags.
+ * Remove extraneous HTML, such as presentational tags that open and
immediately close and <br clear="all">.
- * Converts hard-coded bullets into HTML unordered lists.
- * Removes stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
+ * Convert hard-coded bullets into HTML unordered lists.
+ * Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
bottom of the text.
"""
from django.utils.text import normalize_newlines
- text = normalize_newlines(text)
+ text = normalize_newlines(force_unicode(text))
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
text = fix_ampersands(text)
@@ -110,9 +119,10 @@ def clean_html(text):
s = match.group().replace('</p>', '</li>')
for d in DOTS:
s = s.replace('<p>%s' % d, '<li>')
- return '<ul>\n%s\n</ul>' % s
+ return u'<ul>\n%s\n</ul>' % s
text = hard_coded_bullets_re.sub(replace_p_tags, text)
- # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom of the text.
+ # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom
+ # of the text.
text = trailing_empty_content_re.sub('', text)
return text
-
+clean_html = allow_lazy(clean_html, unicode)