diff options
| author | Claude Paroz <claude@2xlibre.net> | 2014-03-20 16:50:50 +0100 |
|---|---|---|
| committer | Claude Paroz <claude@2xlibre.net> | 2014-03-22 10:59:18 +0100 |
| commit | 6ca6c36f82b97eafeada61384b2e2f1d0587da86 (patch) | |
| tree | f0fd507b152148ac683d539c6d23ca9821ac1124 /django/utils/html.py | |
| parent | aaa21102592e96c543d60513755d6c81f639f122 (diff) | |
Improved strip_tags and clarified documentation
The fact that strip_tags cannot guarantee to really strip all
non-safe HTML content was not clear enough. Also see:
https://www.djangoproject.com/weblog/2014/mar/22/strip-tags-advisory/
Diffstat (limited to 'django/utils/html.py')
| -rw-r--r-- | django/utils/html.py | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/django/utils/html.py b/django/utils/html.py index b9444fc01f..8be7fd5153 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type) class MLStripper(HTMLParser): def __init__(self): - HTMLParser.__init__(self) + if six.PY2: + HTMLParser.__init__(self) + else: + HTMLParser.__init__(self, strict=False) self.reset() self.fed = [] @@ -135,16 +138,36 @@ class MLStripper(HTMLParser): return ''.join(self.fed) -def strip_tags(value): - """Returns the given HTML with all tags stripped.""" +def _strip_once(value): + """ + Internal tag stripping utility used by strip_tags. + """ s = MLStripper() try: s.feed(value) - s.close() except HTMLParseError: return value + try: + s.close() + except (HTMLParseError, UnboundLocalError) as err: + # UnboundLocalError because of http://bugs.python.org/issue17802 + # on Python 3.2, triggered by strict=False mode of HTMLParser + return s.get_data() + s.rawdata else: return s.get_data() + + +def strip_tags(value): + """Returns the given HTML with all tags stripped.""" + while True: + if not ('<' in value or '>' in value): + return value + new_value = _strip_once(value) + if new_value == value: + # _strip_once was not able to detect more tags + return value + else: + value = new_value strip_tags = allow_lazy(strip_tags) |
