summaryrefslogtreecommitdiff
path: root/django/core/serializers/xml_serializer.py
diff options
context:
space:
mode:
authorShai Berger <shai@platonix.com>2025-10-11 21:42:56 +0300
committerNatalia <124304+nessita@users.noreply.github.com>2025-12-02 09:44:40 -0300
commit4d2b8803bebcdefd2b76e9e8fc528d5fddea93f0 (patch)
treea8931c086dd3830c827f2a090c4c7c7e655f21cf /django/core/serializers/xml_serializer.py
parentf997037b235f6b5c9e7c4a501491ec45f3400f3d (diff)
[4.2.x] Fixed CVE-2025-64460 -- Corrected quadratic inner text accumulation in XML serializer.
Previously, `getInnerText()` recursively used `list.extend()` on strings, which added each character from child nodes as a separate list element. On deeply nested XML content, this caused the overall deserialization work to grow quadratically with input size, potentially allowing disproportionate CPU consumption for crafted XML. The fix separates collection of inner texts from joining them, so that each subtree is joined only once, reducing the complexity to linear in the size of the input. These changes also include a mitigation for a xml.dom.minidom performance issue. Thanks Seokchan Yoon (https://ch4n3.kr/) for report. Co-authored-by: Jacob Walls <jacobtylerwalls@gmail.com> Co-authored-by: Natalia <124304+nessita@users.noreply.github.com> Backport of 50efb718b31333051bc2dcb06911b8fa1358c98c from main.
Diffstat (limited to 'django/core/serializers/xml_serializer.py')
-rw-r--r--django/core/serializers/xml_serializer.py39
1 files changed, 33 insertions, 6 deletions
diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py
index 3f9955aa23..5db8c067c4 100644
--- a/django/core/serializers/xml_serializer.py
+++ b/django/core/serializers/xml_serializer.py
@@ -2,7 +2,8 @@
XML serializer.
"""
import json
-from xml.dom import pulldom
+from contextlib import contextmanager
+from xml.dom import minidom, pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser
@@ -14,6 +15,25 @@ from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
+@contextmanager
+def fast_cache_clearing():
+ """Workaround for performance issues in minidom document checks.
+
+ Speeds up repeated DOM operations by skipping unnecessary full traversal
+ of the DOM tree.
+ """
+ module_helper_was_lambda = False
+ if original_fn := getattr(minidom, "_in_document", None):
+ module_helper_was_lambda = original_fn.__name__ == "<lambda>"
+ if not module_helper_was_lambda:
+ minidom._in_document = lambda node: bool(node.ownerDocument)
+ try:
+ yield
+ finally:
+ if original_fn and not module_helper_was_lambda:
+ minidom._in_document = original_fn
+
+
class Serializer(base.Serializer):
"""Serialize a QuerySet to XML."""
@@ -208,7 +228,8 @@ class Deserializer(base.Deserializer):
def __next__(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
- self.event_stream.expandNode(node)
+ with fast_cache_clearing():
+ self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
@@ -392,19 +413,25 @@ class Deserializer(base.Deserializer):
def getInnerText(node):
"""Get all the inner text of a DOM node (recursively)."""
+ inner_text_list = getInnerTextList(node)
+ return "".join(inner_text_list)
+
+
+def getInnerTextList(node):
+ """Return a list of the inner texts of a DOM node (recursively)."""
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
- inner_text = []
+ result = []
for child in node.childNodes:
if (
child.nodeType == child.TEXT_NODE
or child.nodeType == child.CDATA_SECTION_NODE
):
- inner_text.append(child.data)
+ result.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
- inner_text.extend(getInnerText(child))
+ result.extend(getInnerTextList(child))
else:
pass
- return "".join(inner_text)
+ return result
# Below code based on Christian Heimes' defusedxml