import gc
import time
from xml.dom import minidom
from django.core import serializers
from django.core.serializers.xml_serializer import Deserializer, DTDForbidden
from django.db import models
from django.test import TestCase, TransactionTestCase
from .tests import SerializersTestBase, SerializersTransactionTestBase
class XmlSerializerTestCase(SerializersTestBase, TestCase):
serializer_name = "xml"
pkless_str = """
"""
mapping_ordering_str = """
""" # NOQA
@staticmethod
def _validate_output(serial_str):
try:
minidom.parseString(serial_str)
except Exception:
return False
else:
return True
@staticmethod
def _get_pk_values(serial_str):
ret_list = []
dom = minidom.parseString(serial_str)
fields = dom.getElementsByTagName("object")
for field in fields:
ret_list.append(field.getAttribute("pk"))
return ret_list
@staticmethod
def _get_field_values(serial_str, field_name):
ret_list = []
dom = minidom.parseString(serial_str)
fields = dom.getElementsByTagName("field")
for field in fields:
if field.getAttribute("name") == field_name:
temp = []
for child in field.childNodes:
temp.append(child.nodeValue)
ret_list.append("".join(temp))
return ret_list
def test_control_char_failure(self):
"""
Serializing control characters with XML should fail as those characters
are not supported in the XML 1.0 standard (except HT, LF, CR).
"""
self.a1.headline = "This contains \u0001 control \u0011 chars"
msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk
with self.assertRaisesMessage(ValueError, msg):
serializers.serialize(self.serializer_name, [self.a1])
self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed"
self.assertIn(
"HT \t, LF \n, and CR \r are allowed",
serializers.serialize(self.serializer_name, [self.a1]),
)
def test_no_dtd(self):
"""
The XML deserializer shouldn't allow a DTD.
This is the most straightforward way to prevent all entity definitions
and avoid both external entities and entity-expansion attacks.
"""
xml = (
''
''
)
with self.assertRaises(DTDForbidden):
next(serializers.deserialize("xml", xml))
def test_crafted_xml_performance(self):
"""The time to process invalid inputs is not quadratic."""
def build_crafted_xml(depth, leaf_text_len):
nested_open = "" * depth
nested_close = "" * depth
leaf = "x" * leaf_text_len
field_content = f"{nested_open}{leaf}{nested_close}"
return f"""
{field_content}
m
"""
def deserialize(crafted_xml):
iterator = Deserializer(crafted_xml)
gc.collect()
start_time = time.perf_counter()
result = list(iterator)
end_time = time.perf_counter()
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0].object, models.Model)
return end_time - start_time
def assertFactor(label, params, factor=2):
factors = []
prev_time = None
for depth, length in params:
crafted_xml = build_crafted_xml(depth, length)
elapsed = deserialize(crafted_xml)
if prev_time is not None:
factors.append(elapsed / prev_time)
prev_time = elapsed
with self.subTest(label):
# Assert based on the average factor to reduce test flakiness.
self.assertLessEqual(sum(factors) / len(factors), factor)
assertFactor(
"varying depth, varying length",
[(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
2,
)
assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
class XmlSerializerTransactionTestCase(
SerializersTransactionTestBase, TransactionTestCase
):
serializer_name = "xml"
fwd_ref_str = """
1
Forward references pose no problem
2006-06-16T15:00:00
Agnes
Reference
""" # NOQA