python-django/CVE-2024-39614.patch

From 2f128b1865bc43f6cf3583b1255bf1bd8be29e57 Mon Sep 17 00:00:00 2001
From: nkrapp <nico.krapp@suse.com>
Date: Mon, 22 Jul 2024 11:23:29 +0200
Subject: [PATCH] Fixed CVE-2024-39614 -- Mitigated potential DoS in
 get_supported_language_variant().

Language codes are now parsed with a maximum length limit of 500 chars.

Thanks to MProgrammer for the report.
---
 django/utils/translation/trans_real.py | 29 ++++++++++---
 docs/ref/utils.txt                     | 25 +++++++++++
 tests/i18n/tests.py                    | 59 ++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 6 deletions(-)

diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py
index ecd701f3d8..0a237a5afc 100644
--- a/django/utils/translation/trans_real.py
+++ b/django/utils/translation/trans_real.py
@@ -30,9 +30,10 @@ _default = None
 CONTEXT_SEPARATOR = "\x04"

 # Maximum number of characters that will be parsed from the Accept-Language
-# header to prevent possible denial of service or memory exhaustion attacks.
-# About 10x longer than the longest value shown on MDN’s Accept-Language page.
-ACCEPT_LANGUAGE_HEADER_MAX_LENGTH = 500
+# header or cookie to prevent possible denial of service or memory exhaustion
+# attacks. About 10x longer than the longest value shown on MDN’s
+# Accept-Language page.
+LANGUAGE_CODE_MAX_LENGTH = 500

 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9
 # and RFC 3066, section 2.1
@@ -473,12 +474,28 @@ def get_supported_language_variant(lang_code, strict=False):
     If `strict` is False (the default), look for a country-specific variant
     when neither the language code nor its generic variant is found.

+    The language code is truncated to a maximum length to avoid potential
+    denial of service attacks.
+
     lru_cache should have a maxsize to prevent from memory exhaustion attacks,
     as the provided language codes are taken from the HTTP request. See also
     <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
     """
     if lang_code:
-        # If 'fr-ca' is not supported, try special fallback or language-only 'fr'.
+        # Truncate the language code to a maximum length to avoid potential
+        # denial of service attacks.
+        if len(lang_code) > LANGUAGE_CODE_MAX_LENGTH:
+            index = lang_code.rfind("-", 0, LANGUAGE_CODE_MAX_LENGTH)
+            if (
+                not strict
+                and index > 0
+            ):
+                # There is a generic variant under the maximum length accepted length.
+                lang_code = lang_code[:index]
+            else:
+                raise ValueError("'lang_code' exceeds the maximum accepted length")
+        # If 'zh-hant-tw' is not supported, try special fallback or subsequent
+        # language codes i.e. 'zh-hant' and 'zh'.
         possible_lang_codes = [lang_code]
         try:
             possible_lang_codes.extend(LANG_INFO[lang_code]['fallback'])
@@ -599,13 +616,13 @@ def parse_accept_lang_header(lang_string):
     functools.lru_cache() to avoid repetitive parsing of common header values.
     """
     # If the header value doesn't exceed the maximum allowed length, parse it.
-    if len(lang_string) <= ACCEPT_LANGUAGE_HEADER_MAX_LENGTH:
+    if len(lang_string) <= LANGUAGE_CODE_MAX_LENGTH:
         return _parse_accept_lang_header(lang_string)

     # If there is at least one comma in the value, parse up to the last comma
     # before the max length, skipping any truncated parts at the end of the
     # header value.
-    index = lang_string.rfind(",", 0, ACCEPT_LANGUAGE_HEADER_MAX_LENGTH)
+    index = lang_string.rfind(",", 0, LANGUAGE_CODE_MAX_LENGTH)
     if index > 0:
         return _parse_accept_lang_header(lang_string[:index])

diff --git a/docs/ref/utils.txt b/docs/ref/utils.txt
index 390f167ce2..d0a8e8c1f3 100644
--- a/docs/ref/utils.txt
+++ b/docs/ref/utils.txt
@@ -1150,6 +1150,31 @@ functions without the ``u``.

     Raises :exc:`LookupError` if nothing is found.

+.. function:: get_supported_language_variant(lang_code, strict=False)
+
+    Returns ``lang_code`` if it's in the :setting:`LANGUAGES` setting, possibly
+    selecting a more generic variant. For example, ``'es'`` is returned if
+    ``lang_code`` is ``'es-ar'`` and ``'es'`` is in :setting:`LANGUAGES` but
+    ``'es-ar'`` isn't.
+
+    ``lang_code`` has a maximum accepted length of 500 characters. A
+    :exc:`ValueError` is raised if ``lang_code`` exceeds this limit and
+    ``strict`` is ``True``, or if there is no generic variant and ``strict``
+    is ``False``.
+
+    If ``strict`` is ``False`` (the default), a country-specific variant may
+    be returned when neither the language code nor its generic variant is found.
+    For example, if only ``'es-co'`` is in :setting:`LANGUAGES`, that's
+    returned for ``lang_code``\s like ``'es'`` and ``'es-ar'``. Those matches
+    aren't returned if ``strict=True``.
+
+    Raises :exc:`LookupError` if nothing is found.
+
+    .. versionchanged:: 4.2.14
+
+        In older versions, ``lang_code`` values over 500 characters were
+        processed without raising a :exc:`ValueError`.
+
 .. function:: to_locale(language)

     Turns a language name (en-us) into a locale name (en_US).
diff --git a/tests/i18n/tests.py b/tests/i18n/tests.py
index 6efc3a5ae3..3087e5b6a6 100644
--- a/tests/i18n/tests.py
+++ b/tests/i18n/tests.py
@@ -39,6 +39,7 @@ from django.utils.translation import (
 from django.utils.translation.reloader import (
     translation_file_changed, watch_for_translation_changes,
 )
+from django.utils.translation.trans_real import LANGUAGE_CODE_MAX_LENGTH

 from .forms import CompanyForm, I18nForm, SelectDateForm
 from .models import Company, TestModel
@@ -1434,6 +1435,64 @@ class MiscTests(SimpleTestCase):
         r.COOKIES = {settings.LANGUAGE_COOKIE_NAME: 'zh-hans'}
         r.META = {'HTTP_ACCEPT_LANGUAGE': 'de'}
         self.assertEqual(g(r), 'zh-hans')
+
+    @override_settings(
+        USE_I18N=True,
+        LANGUAGES=[
+            ("en", "English"),
+            ("ar-dz", "Algerian Arabic"),
+            ("de", "German"),
+            ("de-at", "Austrian German"),
+            ("pt-BR", "Portuguese (Brazil)"),
+        ],
+    )
+    def test_get_supported_language_variant_real(self):
+        g = trans_real.get_supported_language_variant
+        self.assertEqual(g("en"), "en")
+        self.assertEqual(g("en-gb"), "en")
+        self.assertEqual(g("de"), "de")
+        self.assertEqual(g("de-at"), "de-at")
+        self.assertEqual(g("de-ch"), "de")
+        self.assertEqual(g("pt-br"), "pt-br")
+        self.assertEqual(g("pt-BR"), "pt-BR")
+        self.assertEqual(g("pt"), "pt-br")
+        self.assertEqual(g("pt-pt"), "pt-br")
+        self.assertEqual(g("ar-dz"), "ar-dz")
+        self.assertEqual(g("ar-DZ"), "ar-DZ")
+        with self.assertRaises(LookupError):
+            g("pt", strict=True)
+        with self.assertRaises(LookupError):
+            g("pt-pt", strict=True)
+        with self.assertRaises(LookupError):
+            g("xyz")
+        with self.assertRaises(LookupError):
+            g("xy-zz")
+        msg = "'lang_code' exceeds the maximum accepted length"
+        with self.assertRaises(LookupError):
+            g("x" * LANGUAGE_CODE_MAX_LENGTH)
+        with self.assertRaisesMessage(ValueError, msg):
+            g("x" * (LANGUAGE_CODE_MAX_LENGTH + 1))
+        # 167 * 3 = 501 which is LANGUAGE_CODE_MAX_LENGTH + 1.
+        self.assertEqual(g("en-" * 167), "en")
+        with self.assertRaisesMessage(ValueError, msg):
+            g("en-" * 167, strict=True)
+        self.assertEqual(g("en-" * 30000), "en")  # catastrophic test
+
+    def test_get_supported_language_variant_null(self):
+        g = trans_null.get_supported_language_variant
+        self.assertEqual(g(settings.LANGUAGE_CODE), settings.LANGUAGE_CODE)
+        with self.assertRaises(LookupError):
+            g("pt")
+        with self.assertRaises(LookupError):
+            g("de")
+        with self.assertRaises(LookupError):
+            g("de-at")
+        with self.assertRaises(LookupError):
+            g("de", strict=True)
+        with self.assertRaises(LookupError):
+            g("de-at", strict=True)
+        with self.assertRaises(LookupError):
+            g("xyz")

     @override_settings(
         USE_I18N=True,
--
2.45.2