summaryrefslogtreecommitdiff
path: root/django/utils/html.py
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2014-03-20 16:50:50 +0100
committerClaude Paroz <claude@2xlibre.net>2014-03-22 10:59:18 +0100
commit6ca6c36f82b97eafeada61384b2e2f1d0587da86 (patch)
treef0fd507b152148ac683d539c6d23ca9821ac1124 /django/utils/html.py
parentaaa21102592e96c543d60513755d6c81f639f122 (diff)
Improved strip_tags and clarified documentation
The fact that strip_tags cannot guarantee to really strip all non-safe HTML content was not clear enough. Also see: https://www.djangoproject.com/weblog/2014/mar/22/strip-tags-advisory/
Diffstat (limited to 'django/utils/html.py')
-rw-r--r--django/utils/html.py31
1 files changed, 27 insertions, 4 deletions
diff --git a/django/utils/html.py b/django/utils/html.py
index b9444fc01f..8be7fd5153 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type)
class MLStripper(HTMLParser):
def __init__(self):
- HTMLParser.__init__(self)
+ if six.PY2:
+ HTMLParser.__init__(self)
+ else:
+ HTMLParser.__init__(self, strict=False)
self.reset()
self.fed = []
@@ -135,16 +138,36 @@ class MLStripper(HTMLParser):
return ''.join(self.fed)
-def strip_tags(value):
- """Returns the given HTML with all tags stripped."""
+def _strip_once(value):
+ """
+ Internal tag stripping utility used by strip_tags.
+ """
s = MLStripper()
try:
s.feed(value)
- s.close()
except HTMLParseError:
return value
+ try:
+ s.close()
+ except (HTMLParseError, UnboundLocalError) as err:
+ # UnboundLocalError because of http://bugs.python.org/issue17802
+ # on Python 3.2, triggered by strict=False mode of HTMLParser
+ return s.get_data() + s.rawdata
else:
return s.get_data()
+
+
+def strip_tags(value):
+ """Returns the given HTML with all tags stripped."""
+ while True:
+ if not ('<' in value or '>' in value):
+ return value
+ new_value = _strip_once(value)
+ if new_value == value:
+ # _strip_once was not able to detect more tags
+ return value
+ else:
+ value = new_value
strip_tags = allow_lazy(strip_tags)