summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorAndrew Godwin <andrew@aeracode.org>2013-06-07 11:15:34 +0100
committerAndrew Godwin <andrew@aeracode.org>2013-06-07 11:15:34 +0100
commit3c296382b8dea5de7f4e1e11b66bd7cecaf2ee51 (patch)
tree0ca12593be82971691ffca01a836d00d3fcb3bd4 /django/utils/html.py
parent7609e0b42e0014a6ad0adf9dafc7018cb268070e (diff)
parent357d62d9f2972bf1bc21e5835c12c849143e06af (diff)
Merge remote-tracking branch 'core/master' into schema-alteration
Conflicts: django/db/models/fields/related.py
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py35
1 files changed, 33 insertions, 2 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index 8b28d97d13..0d28c77a61 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -16,6 +16,9 @@ from django.utils.functional import allow_lazy
from django.utils import six
from django.utils.text import normalize_newlines
+from .html_parser import HTMLParser, HTMLParseError
+
+
# Configuration for urlize() function.
TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)']
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;')]
@@ -33,7 +36,6 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
-strip_tags_re = re.compile(r'</?\S([^=>]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
def escape(text):
@@ -116,9 +118,31 @@ def linebreaks(value, autoescape=False):
return '\n\n'.join(paras)
linebreaks = allow_lazy(linebreaks, six.text_type)
+
+class MLStripper(HTMLParser):
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.reset()
+ self.fed = []
+ def handle_data(self, d):
+ self.fed.append(d)
+ def handle_entityref(self, name):
+ self.fed.append('&%s;' % name)
+ def handle_charref(self, name):
+ self.fed.append('&#%s;' % name)
+ def get_data(self):
+ return ''.join(self.fed)
+
def strip_tags(value):
"""Returns the given HTML with all tags stripped."""
- return strip_tags_re.sub('', force_text(value))
+ s = MLStripper()
+ try:
+ s.feed(value)
+ s.close()
+ except HTMLParseError:
+ return value
+ else:
+ return s.get_data()
strip_tags = allow_lazy(strip_tags)
def remove_tags(html, tags):
@@ -281,3 +305,10 @@ def clean_html(text):
text = trailing_empty_content_re.sub('', text)
return text
clean_html = allow_lazy(clean_html, six.text_type)
+
+def avoid_wrapping(value):
+ """
+ Avoid text wrapping in the middle of a phrase by adding non-breaking
+ spaces where there previously were normal spaces.
+ """
+ return value.replace(" ", "\xa0")