import gc import time from xml.dom import minidom from django.core import serializers from django.core.serializers.xml_serializer import Deserializer, DTDForbidden from django.db import models from django.test import TestCase, TransactionTestCase from .tests import SerializersTestBase, SerializersTransactionTestBase class XmlSerializerTestCase(SerializersTestBase, TestCase): serializer_name = "xml" pkless_str = """ Reference Non-fiction """ mapping_ordering_str = """ %(author_pk)s Poker has no place on ESPN 2006-06-16T11:00:00 """ # NOQA @staticmethod def _validate_output(serial_str): try: minidom.parseString(serial_str) except Exception: return False else: return True @staticmethod def _get_pk_values(serial_str): ret_list = [] dom = minidom.parseString(serial_str) fields = dom.getElementsByTagName("object") for field in fields: ret_list.append(field.getAttribute("pk")) return ret_list @staticmethod def _get_field_values(serial_str, field_name): ret_list = [] dom = minidom.parseString(serial_str) fields = dom.getElementsByTagName("field") for field in fields: if field.getAttribute("name") == field_name: temp = [] for child in field.childNodes: temp.append(child.nodeValue) ret_list.append("".join(temp)) return ret_list def test_control_char_failure(self): """ Serializing control characters with XML should fail as those characters are not supported in the XML 1.0 standard (except HT, LF, CR). """ self.a1.headline = "This contains \u0001 control \u0011 chars" msg = "Article.headline (pk:%s) contains unserializable characters" % self.a1.pk with self.assertRaisesMessage(ValueError, msg): serializers.serialize(self.serializer_name, [self.a1]) self.a1.headline = "HT \u0009, LF \u000A, and CR \u000D are allowed" self.assertIn( "HT \t, LF \n, and CR \r are allowed", serializers.serialize(self.serializer_name, [self.a1]), ) def test_no_dtd(self): """ The XML deserializer shouldn't allow a DTD. This is the most straightforward way to prevent all entity definitions and avoid both external entities and entity-expansion attacks. """ xml = ( '' '' ) with self.assertRaises(DTDForbidden): next(serializers.deserialize("xml", xml)) def test_crafted_xml_performance(self): """The time to process invalid inputs is not quadratic.""" def build_crafted_xml(depth, leaf_text_len): nested_open = "" * depth nested_close = "" * depth leaf = "x" * leaf_text_len field_content = f"{nested_open}{leaf}{nested_close}" return f""" {field_content} m """ def deserialize(crafted_xml): iterator = Deserializer(crafted_xml) gc.collect() start_time = time.perf_counter() result = list(iterator) end_time = time.perf_counter() self.assertEqual(len(result), 1) self.assertIsInstance(result[0].object, models.Model) return end_time - start_time def assertFactor(label, params, factor=2): factors = [] prev_time = None for depth, length in params: crafted_xml = build_crafted_xml(depth, length) elapsed = deserialize(crafted_xml) if prev_time is not None: factors.append(elapsed / prev_time) prev_time = elapsed with self.subTest(label): # Assert based on the average factor to reduce test flakiness. self.assertLessEqual(sum(factors) / len(factors), factor) assertFactor( "varying depth, varying length", [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)], 2, ) assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2) class XmlSerializerTransactionTestCase( SerializersTransactionTestBase, TransactionTestCase ): serializer_name = "xml" fwd_ref_str = """ 1 Forward references pose no problem 2006-06-16T15:00:00 Agnes Reference """ # NOQA