summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--django/core/serializers/xml_serializer.py39
-rw-r--r--docs/releases/4.2.27.txt10
-rw-r--r--docs/topics/serialization.txt2
-rw-r--r--tests/serializers/test_xml.py55
4 files changed, 99 insertions, 7 deletions
diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py
index 3f9955aa23..5db8c067c4 100644
--- a/django/core/serializers/xml_serializer.py
+++ b/django/core/serializers/xml_serializer.py
@@ -2,7 +2,8 @@
XML serializer.
"""
import json
-from xml.dom import pulldom
+from contextlib import contextmanager
+from xml.dom import minidom, pulldom
from xml.sax import handler
from xml.sax.expatreader import ExpatParser as _ExpatParser
@@ -14,6 +15,25 @@ from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
+@contextmanager
+def fast_cache_clearing():
+ """Workaround for performance issues in minidom document checks.
+
+ Speeds up repeated DOM operations by skipping unnecessary full traversal
+ of the DOM tree.
+ """
+ module_helper_was_lambda = False
+ if original_fn := getattr(minidom, "_in_document", None):
+ module_helper_was_lambda = original_fn.__name__ == "<lambda>"
+ if not module_helper_was_lambda:
+ minidom._in_document = lambda node: bool(node.ownerDocument)
+ try:
+ yield
+ finally:
+ if original_fn and not module_helper_was_lambda:
+ minidom._in_document = original_fn
+
+
class Serializer(base.Serializer):
"""Serialize a QuerySet to XML."""
@@ -208,7 +228,8 @@ class Deserializer(base.Deserializer):
def __next__(self):
for event, node in self.event_stream:
if event == "START_ELEMENT" and node.nodeName == "object":
- self.event_stream.expandNode(node)
+ with fast_cache_clearing():
+ self.event_stream.expandNode(node)
return self._handle_object(node)
raise StopIteration
@@ -392,19 +413,25 @@ class Deserializer(base.Deserializer):
def getInnerText(node):
"""Get all the inner text of a DOM node (recursively)."""
+ inner_text_list = getInnerTextList(node)
+ return "".join(inner_text_list)
+
+
+def getInnerTextList(node):
+ """Return a list of the inner texts of a DOM node (recursively)."""
# inspired by https://mail.python.org/pipermail/xml-sig/2005-March/011022.html
- inner_text = []
+ result = []
for child in node.childNodes:
if (
child.nodeType == child.TEXT_NODE
or child.nodeType == child.CDATA_SECTION_NODE
):
- inner_text.append(child.data)
+ result.append(child.data)
elif child.nodeType == child.ELEMENT_NODE:
- inner_text.extend(getInnerText(child))
+ result.extend(getInnerTextList(child))
else:
pass
- return "".join(inner_text)
+ return result
# Below code based on Christian Heimes' defusedxml
diff --git a/docs/releases/4.2.27.txt b/docs/releases/4.2.27.txt
index e95dc63f74..b843f6a443 100644
--- a/docs/releases/4.2.27.txt
+++ b/docs/releases/4.2.27.txt
@@ -15,6 +15,16 @@ using a suitably crafted dictionary, with dictionary expansion, as the
``**kwargs`` passed to :meth:`.QuerySet.annotate` or :meth:`.QuerySet.alias` on
PostgreSQL.
+CVE-2025-64460: Potential denial-of-service vulnerability in XML ``Deserializer``
+=================================================================================
+
+:ref:`XML Serialization <serialization-formats-xml>` was subject to a potential
+denial-of-service attack due to quadratic time complexity when deserializing
+crafted documents containing many nested invalid elements. The internal helper
+``django.core.serializers.xml_serializer.getInnerText()`` previously
+accumulated inner text inefficiently during recursion. It now collects text per
+element, avoiding excessive resource usage.
+
Bugfixes
========
diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt
index 0bb57642ab..dc403ca1d4 100644
--- a/docs/topics/serialization.txt
+++ b/docs/topics/serialization.txt
@@ -173,6 +173,8 @@ Identifier Information
.. _jsonl: https://jsonlines.org/
.. _PyYAML: https://pyyaml.org/
+.. _serialization-formats-xml:
+
XML
---
diff --git a/tests/serializers/test_xml.py b/tests/serializers/test_xml.py
index c9df2f2a5b..03462cfed5 100644
--- a/tests/serializers/test_xml.py
+++ b/tests/serializers/test_xml.py
@@ -1,7 +1,10 @@
+import gc
+import time
from xml.dom import minidom
from django.core import serializers
-from django.core.serializers.xml_serializer import DTDForbidden
+from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
+from django.db import models
from django.test import TestCase, TransactionTestCase
from .tests import SerializersTestBase, SerializersTransactionTestBase
@@ -90,6 +93,56 @@ class XmlSerializerTestCase(SerializersTestBase, TestCase):
with self.assertRaises(DTDForbidden):
next(serializers.deserialize("xml", xml))
+ def test_crafted_xml_performance(self):
+ """The time to process invalid inputs is not quadratic."""
+
+ def build_crafted_xml(depth, leaf_text_len):
+ nested_open = "<nested>" * depth
+ nested_close = "</nested>" * depth
+ leaf = "x" * leaf_text_len
+ field_content = f"{nested_open}{leaf}{nested_close}"
+ return f"""
+ <django-objects version="1.0">
+ <object model="contenttypes.contenttype" pk="1">
+ <field name="app_label">{field_content}</field>
+ <field name="model">m</field>
+ </object>
+ </django-objects>
+ """
+
+ def deserialize(crafted_xml):
+ iterator = Deserializer(crafted_xml)
+ gc.collect()
+
+ start_time = time.perf_counter()
+ result = list(iterator)
+ end_time = time.perf_counter()
+
+ self.assertEqual(len(result), 1)
+ self.assertIsInstance(result[0].object, models.Model)
+ return end_time - start_time
+
+ def assertFactor(label, params, factor=2):
+ factors = []
+ prev_time = None
+ for depth, length in params:
+ crafted_xml = build_crafted_xml(depth, length)
+ elapsed = deserialize(crafted_xml)
+ if prev_time is not None:
+ factors.append(elapsed / prev_time)
+ prev_time = elapsed
+
+ with self.subTest(label):
+ # Assert based on the average factor to reduce test flakiness.
+ self.assertLessEqual(sum(factors) / len(factors), factor)
+
+ assertFactor(
+ "varying depth, varying length",
+ [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
+ 2,
+ )
+ assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
+
class XmlSerializerTransactionTestCase(
SerializersTransactionTestBase, TransactionTestCase