Fixed #19237 -- Used HTML parser to strip tags

The regex method used until now for the strip_tags utility is fast, but subject to flaws and security issues. Consensus and good practice lead use to use a slower but safer method.
author: Claude Paroz <claude@2xlibre.net> 2013-05-22 17:29:16 +0200
committer: Claude Paroz <claude@2xlibre.net> 2013-05-22 17:34:02 +0200
commit: dc51ec8bc214cf60ebb99732363624c23df8005f (patch)
tree: 5b870ff55500ee2b3bed9547bafee290e86a29fe /tests/utils_tests/test_html.py
parent: 01948e384f5508c126c7216e43db3654bf6330f0 (diff)
1 files changed, 6 insertions, 2 deletions
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 090cc32d1c..c3e9f7c878 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -5,6 +5,7 @@ import os
 
 from django.utils import html
 from django.utils._os import upath
+from django.utils.encoding import force_text
 from django.utils.unittest import TestCase
 
 
@@ -63,10 +64,12 @@ class TestUtilsHtml(TestCase):
     def test_strip_tags(self):
         f = html.strip_tags
         items = (
+            ('<p>See: &#39;&eacute; is an apostrophe followed by e acute</p>',
+             'See: &#39;&eacute; is an apostrophe followed by e acute'),
             ('<adf>a', 'a'),
             ('</adf>a', 'a'),
             ('<asdf><asdf>e', 'e'),
-            ('<f', '<f'),
+            ('hi, <f x', 'hi, <f x'),
             ('</fe', '</fe'),
             ('<x>b<y>', 'b'),
             ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
@@ -81,8 +84,9 @@ class TestUtilsHtml(TestCase):
         for filename in ('strip_tags1.html', 'strip_tags2.txt'):
             path = os.path.join(os.path.dirname(upath(__file__)), 'files', filename)
             with open(path, 'r') as fp:
+                content = force_text(fp.read())
                 start = datetime.now()
-                stripped = html.strip_tags(fp.read())
+                stripped = html.strip_tags(content)
                 elapsed = datetime.now() - start
             self.assertEqual(elapsed.seconds, 0)
             self.assertIn("Please try again.", stripped)
author	Claude Paroz <claude@2xlibre.net>	2013-05-22 17:29:16 +0200
committer	Claude Paroz <claude@2xlibre.net>	2013-05-22 17:34:02 +0200
commit	dc51ec8bc214cf60ebb99732363624c23df8005f (patch)
tree	5b870ff55500ee2b3bed9547bafee290e86a29fe /tests/utils_tests/test_html.py
parent	01948e384f5508c126c7216e43db3654bf6330f0 (diff)