summaryrefslogtreecommitdiff
path: root/django/utils/text.py
diff options
context:
space:
mode:
authorShai Berger <shai@platonix.com>2024-02-19 13:56:37 +0100
committerMariusz Felisiak <felisiak.mariusz@gmail.com>2024-03-04 08:22:40 +0100
commit3394fc6132436eca89e997083bae9985fb7e761e (patch)
tree55e9e0e4fc0d62c5064841a4bbe78aaf1d8bb590 /django/utils/text.py
parent80761c3b01fbbbe2da1761937edd20251a86fbee (diff)
[5.0.x] Fixed CVE-2024-27351 -- Prevented potential ReDoS in Truncator.words().
Thanks Seokchan Yoon for the report. Co-Authored-By: Mariusz Felisiak <felisiak.mariusz@gmail.com>
Diffstat (limited to 'django/utils/text.py')
-rw-r--r--django/utils/text.py57
1 files changed, 55 insertions, 2 deletions
diff --git a/django/utils/text.py b/django/utils/text.py
index 295f919b51..d992f80dd2 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -23,8 +23,61 @@ def capfirst(x):
return x[0].upper() + x[1:]
-# Set up regular expressions
-re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+# ----- Begin security-related performance workaround -----
+
+# We used to have, below
+#
+# re_words = _lazy_re_compile(r"<[^>]+?>|([^<>\s]+)", re.S)
+#
+# But it was shown that this regex, in the way we use it here, has some
+# catastrophic edge-case performance features. Namely, when it is applied to
+# text with only open brackets "<<<...". The class below provides the services
+# and correct answers for the use cases, but in these edge cases does it much
+# faster.
+re_notag = _lazy_re_compile(r"([^<>\s]+)", re.S)
+re_prt = _lazy_re_compile(r"<|([^<>\s]+)", re.S)
+
+
+class WordsRegex:
+ @staticmethod
+ def search(text, pos):
+ # Look for "<" or a non-tag word.
+ partial = re_prt.search(text, pos)
+ if partial is None or partial[1] is not None:
+ return partial
+
+ # "<" was found, look for a closing ">".
+ end = text.find(">", partial.end(0))
+ if end < 0:
+ # ">" cannot be found, look for a word.
+ return re_notag.search(text, pos + 1)
+ else:
+ # "<" followed by a ">" was found -- fake a match.
+ end += 1
+ return FakeMatch(text[partial.start(0) : end], end)
+
+
+class FakeMatch:
+ __slots__ = ["_text", "_end"]
+
+ def end(self, group=0):
+ assert group == 0, "This specific object takes only group=0"
+ return self._end
+
+ def __getitem__(self, group):
+ if group == 1:
+ return None
+ assert group == 0, "This specific object takes only group in {0,1}"
+ return self._text
+
+ def __init__(self, text, end):
+ self._text, self._end = text, end
+
+
+# ----- End security-related performance workaround -----
+
+# Set up regular expressions.
+re_words = WordsRegex
re_chars = _lazy_re_compile(r"<[^>]+?>|(.)", re.S)
re_tag = _lazy_re_compile(r"<(/)?(\S+?)(?:(\s*/)|\s.*?)?>", re.S)
re_newlines = _lazy_re_compile(r"\r\n|\r") # Used in normalize_newlines