XML deserialization / Deserialization of untrusted input
Description
The XML deserializer in Django previously allowed nested tags inside field content when deserializing XML fixtures. The commit adds a guard that raises SuspiciousOperation when an element has child nodes (i.e., unexpected nested tags such as <em> inside a field value). This prevents crafting XML fixtures that might be misinterpreted or bypass validation during deserialization. The fix updates the XML deserializer (check_element_type) and gracefully propagates SuspiciousOperation through existing code paths, and adds tests/docs to reflect the change.
Proof of Concept
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
crafted_xml = """<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<django-objects version=\"1.0\">\n <object model=\"fixtures.person\" pk=\"1\">\n <field type=\"CharField\" name=\"name\">Django <em>pony</em></field>\n </object>\n</django-objects>\n"""
# This should raise SuspiciousOperation due to nested tag inside a field value
list(XMLDeserializer(crafted_xml))
Commit Details
Author: Jacob Walls
Date: 2026-01-07 21:23 UTC
Message:
Refs #36769 -- Raised SuspiciousOperation for unexpected nested tags in XML Deserializer.
Thanks Shai Berger and Natalia Bidart for reviews.
Triage Assessment
Vulnerability Type: Deserialization vulnerability
Confidence: HIGH
Reasoning:
The commit introduces a security-related validation in the XML deserializer: it raises SuspiciousOperation when encountering unexpected nested tags, and propagates this through the code path. This prevents potentially crafted XML from being deserialized in unsafe ways, and includes tests/docs to reflect the change.
Verification Assessment
Vulnerability Type: XML deserialization / Deserialization of untrusted input
Confidence: HIGH
Affected Versions: stable/5.1.x and earlier in the 5.x line; backported/fixed in this commit (73c5e94521c5...)
Code Diff
diff --git a/django/core/serializers/xml_serializer.py b/django/core/serializers/xml_serializer.py
index e159180e17b4..d8ffbdf00a98 100644
--- a/django/core/serializers/xml_serializer.py
+++ b/django/core/serializers/xml_serializer.py
@@ -10,7 +10,7 @@
from django.apps import apps
from django.conf import settings
-from django.core.exceptions import ObjectDoesNotExist
+from django.core.exceptions import ObjectDoesNotExist, SuspiciousOperation
from django.core.serializers import base
from django.db import DEFAULT_DB_ALIAS, models
from django.utils.xmlutils import SimplerXMLGenerator, UnserializableContentError
@@ -411,6 +411,8 @@ def m2m_convert(n):
try:
for c in node.getElementsByTagName("object"):
values.append(m2m_convert(c))
+ except SuspiciousOperation:
+ raise
except Exception as e:
if isinstance(e, ObjectDoesNotExist) and self.handle_forward_references:
return base.DEFER_FIELD
@@ -440,6 +442,8 @@ def _get_model_from_node(self, node, attr):
def check_element_type(element):
+ if element.childNodes:
+ raise SuspiciousOperation(f"Unexpected element: {element.tagName!r}")
return element.nodeType in (element.TEXT_NODE, element.CDATA_SECTION_NODE)
diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt
index b5b47a11d836..57129bfb666f 100644
--- a/docs/releases/6.1.txt
+++ b/docs/releases/6.1.txt
@@ -303,6 +303,10 @@ Serialization
``()``. This ensures primary keys are serialized when using
:option:`dumpdata --natural-primary`.
+* The XML deserializer now raises
+ :exc:`~django.core.exceptions.SuspiciousOperation` when it encounters
+ unexpected nested tags.
+
Signals
~~~~~~~
diff --git a/docs/topics/serialization.txt b/docs/topics/serialization.txt
index f801df05617d..1a129072e09c 100644
--- a/docs/topics/serialization.txt
+++ b/docs/topics/serialization.txt
@@ -198,7 +198,8 @@ lowercase name of the model ("session") separated by a dot.
Each field of the object is serialized as a ``<field>``-element sporting the
fields "type" and "name". The text content of the element represents the value
-that should be stored.
+that should be stored. (If the element contains child tags,
+:exc:`~django.core.exceptions.SuspiciousOperation` is raised.)
Foreign keys and other relational fields are treated a little bit differently:
@@ -237,6 +238,11 @@ This example links the given user with the permission models with PKs 46 and
XHTML, XML and Control Codes
<https://www.w3.org/International/questions/qa-controls>`_.
+.. versionchanged:: 6.1
+
+ :exc:`~django.core.exceptions.SuspiciousOperation` is raised when
+ unexpected nested tags are found.
+
.. _serialization-formats-json:
JSON
diff --git a/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml b/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml
index 2ed39c3d36eb..e3ed67f32061 100644
--- a/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml
+++ b/tests/fixtures/fixtures/invalid_deeply_nested_elements.xml
@@ -3,6 +3,7 @@
<object pk="1" model="fixtures.person">
<field type="CharField" name="name">
+ <!-- This <em> is unexpected & invalid -->
Django <em>pony</em>
</field>
</object>
diff --git a/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml b/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml
new file mode 100644
index 000000000000..7433fad6c7f6
--- /dev/null
+++ b/tests/fixtures/fixtures/invalid_deeply_nested_elements_natural_key.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<django-objects version="1.0">
+
+ <object pk="1" model="fixtures.book">
+ <field type="CharField" name="name">Music for all ages</field>
+ <field to="fixtures.person" name="authors" rel="ManyToManyRel">
+ <object>
+ <!-- This <em> is unexpected & invalid -->
+ <natural>Artist formerly known as <em>Prince</em></natural>
+ </object>
+ </field>
+ </object>
+
+</django-objects>
diff --git a/tests/fixtures/tests.py b/tests/fixtures/tests.py
index 7511569d2102..dd03343027f3 100644
--- a/tests/fixtures/tests.py
+++ b/tests/fixtures/tests.py
@@ -10,6 +10,7 @@
from django.apps import apps
from django.contrib.sites.models import Site
from django.core import management
+from django.core.exceptions import SuspiciousOperation
from django.core.files.temp import NamedTemporaryFile
from django.core.management import CommandError
from django.core.management.commands.dumpdata import ProxyModelWarning
@@ -23,7 +24,6 @@
CircularA,
CircularB,
NaturalKeyThing,
- Person,
PrimaryKeyUUIDModel,
ProxySpy,
Spy,
@@ -522,12 +522,13 @@ def test_loading_and_dumping(self):
)
def test_deeply_nested_elements(self):
- """Text inside deeply-nested tags is skipped."""
- management.call_command(
- "loaddata", "invalid_deeply_nested_elements.xml", verbosity=0
- )
- person = Person.objects.get(pk=1)
- self.assertEqual(person.name, "Django") # not "Django pony"
+ """Text inside deeply-nested tags raises SuspiciousOperation."""
+ for file in [
+ "invalid_deeply_nested_elements.xml",
+ "invalid_deeply_nested_elements_natural_key.xml",
+ ]:
+ with self.subTest(file=file), self.assertRaises(SuspiciousOperation):
+ management.call_command("loaddata", file, verbosity=0)
def test_dumpdata_with_excludes(self):
# Load fixture1 which has a site, two articles, and a category
diff --git a/tests/serializers/test_deserialization.py b/tests/serializers/test_deserialization.py
index a718a990385a..b6a0818c427a 100644
--- a/tests/serializers/test_deserialization.py
+++ b/tests/serializers/test_deserialization.py
@@ -1,15 +1,14 @@
import json
-import time
+import textwrap
import unittest
+from django.core.exceptions import SuspiciousOperation
from django.core.serializers.base import DeserializationError, DeserializedObject
from django.core.serializers.json import Deserializer as JsonDeserializer
from django.core.serializers.jsonl import Deserializer as JsonlDeserializer
from django.core.serializers.python import Deserializer
from django.core.serializers.xml_serializer import Deserializer as XMLDeserializer
-from django.db import models
from django.test import SimpleTestCase
-from django.test.utils import garbage_collect
from .models import Author
@@ -138,52 +137,23 @@ def test_yaml_bytes_input(self):
self.assertEqual(first_item.object, self.jane)
self.assertEqual(second_item.object, self.joe)
- def test_crafted_xml_performance(self):
- """The time to process invalid inputs is not quadratic."""
-
- def build_crafted_xml(depth, leaf_text_len):
- nested_open = "<nested>" * depth
- nested_close = "</nested>" * depth
- leaf = "x" * leaf_text_len
- field_content = f"{nested_open}{leaf}{nested_close}"
- return f"""
- <django-objects version="1.0">
- <object model="contenttypes.contenttype" pk="1">
- <field name="app_label">{field_content}</field>
- <field name="model">m</field>
- </object>
- </django-objects>
- """
-
- def deserialize(crafted_xml):
- iterator = XMLDeserializer(crafted_xml)
- garbage_collect()
-
- start_time = time.perf_counter()
- result = list(iterator)
- end_time = time.perf_counter()
-
- self.assertEqual(len(result), 1)
- self.assertIsInstance(result[0].object, models.Model)
- return end_time - start_time
-
- def assertFactor(label, params, factor=2):
- factors = []
- prev_time = None
- for depth, length in params:
- crafted_xml = build_crafted_xml(depth, length)
- elapsed = deserialize(crafted_xml)
- if prev_time is not None:
- factors.append(elapsed / prev_time)
- prev_time = elapsed
-
- with self.subTest(label):
- # Assert based on the average factor to reduce test flakiness.
- self.assertLessEqual(sum(factors) / len(factors), factor)
-
- assertFactor(
- "varying depth, varying length",
- [(50, 2000), (100, 4000), (200, 8000), (400, 16000), (800, 32000)],
- 2,
+ def test_crafted_xml_rejected(self):
+ depth = 100
+ leaf_text_len = 1000
+ nested_open = "<nested>" * depth
+ nested_close = "</nested>" * depth
+ leaf = "x" * leaf_text_len
+ field_content = f"{nested_open}{leaf}{nested_close}"
+ crafted_xml = textwrap.dedent(
+ f"""
+ <django-objects version="1.0">
+ <object model="contenttypes.contenttype" pk="1">
+ <field name="app_label">{field_content}</field>
+ <field name="model">m</field>
+ </object>
+ </django-objects>"""
)
- assertFactor("constant depth, varying length", [(100, 1), (100, 1000)], 2)
+
+ msg = "Unexpected element: 'nested'"
+ with self.assertRaisesMessage(SuspiciousOperation, msg):
+ list(XMLDeserializer(crafted_xml))