!96 Fixed changalog date not sorted

From: @jackssir Reviewed-by: @dillon_chen Signed-off-by: @dillon_chen
update python-lxml.spec.
2024-08-21 02:20:37 +00:00 · 2024-08-21 01:48:35 +00:00 · 2024-08-19 15:17:03 +08:00 · 2024-06-14 02:05:03 +00:00 · 2024-06-13 15:22:52 +08:00 · 2022-07-25 08:48:59 +00:00
5 changed files with 706 additions and 2 deletions
--- a/backport-CVE-2022-2309.patch
+++ b/backport-CVE-2022-2309.patch
@ -0,0 +1,92 @@
+From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
+From: Stefan Behnel <stefan_ml@behnel.de>
+Date: Fri, 1 Jul 2022 21:06:10 +0200
+Subject: [PATCH] Fix a crash when incorrect parser input occurs together with
+ usages of iterwalk() on trees generated by the same parser.
+
+---
+ src/lxml/apihelpers.pxi      |  7 ++++---
+ src/lxml/iterparse.pxi       | 11 ++++++-----
+ src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
+ 3 files changed, 30 insertions(+), 8 deletions(-)
+
+diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
+index c166276..9fae9fb 100644
+--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
+@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
+     while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
+         c_ns = c_node.nsDef
+         while c_ns is not NULL:
+-            prefix = funicodeOrNone(c_ns.prefix)
+-            if prefix not in nsmap:
+-                nsmap[prefix] = funicodeOrNone(c_ns.href)
+            if c_ns.prefix or c_ns.href:
+                prefix = funicodeOrNone(c_ns.prefix)
+                if prefix not in nsmap:
+                    nsmap[prefix] = funicodeOrNone(c_ns.href)
+             c_ns = c_ns.next
+         c_node = c_node.parent
+     return nsmap
+diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
+index 138c23a..a7299da 100644
+--- a/src/lxml/iterparse.pxi
+++ b/src/lxml/iterparse.pxi
+@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
+     count = 0
+     c_ns = c_node.nsDef
+     while c_ns is not NULL:
+-        count += 1
+        count += (c_ns.href is not NULL)
+         c_ns = c_ns.next
+     return count
+ 
+@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
+     count = 0
+     c_ns = c_node.nsDef
+     while c_ns is not NULL:
+-        ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
+-                    funicode(c_ns.href))
+-        event_list.append( (u"start-ns", ns_tuple) )
+-        count += 1
+        if c_ns.href:
+            ns_tuple = (funicodeOrEmpty(c_ns.prefix),
+                        funicode(c_ns.href))
+            event_list.append( (u"start-ns", ns_tuple) )
+            count += 1
+         c_ns = c_ns.next
+     return count
+diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
+index e5f0846..285313f 100644
+--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
+@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase):
+             [1,2,1,4],
+             counts)
+ 
+    def test_walk_after_parse_failure(self):
+        # This used to be an issue because libxml2 can leak empty namespaces
+        # between failed parser runs.  iterwalk() failed to handle such a tree.
+        try:
+            etree.XML('''<anot xmlns="1">''')
+        except etree.XMLSyntaxError:
+            pass
+        else:
+            assert False, "invalid input did not fail to parse"
+
+        et = etree.XML('''<root>  </root>''')
+        try:
+            ns = next(etree.iterwalk(et, events=('start-ns',)))
+        except StopIteration:
+            # This would be the expected result, because there was no namespace
+            pass
+        else:
+            # This is a bug in libxml2
+            assert not ns, repr(ns)
+
+     def test_itertext_comment_pi(self):
+         # https://bugs.launchpad.net/lxml/+bug/1844674
+         XML = self.etree.XML
+-- 
+1.8.3.1
+
--- a/backport-CVE-2024-37388.patch
+++ b/backport-CVE-2024-37388.patch
@ -0,0 +1,372 @@
+From b38cebf2f846e92bd63de4488fd3d1c8b568f397 Mon Sep 17 00:00:00 2001
+From: scoder <stefan_ml@behnel.de>
+Date: Fri, 29 Dec 2023 14:21:23 +0100
+Subject: [PATCH] Disable external entity resolution (XXE) by default (GH-391)
+
+This prevents security risks that would allow loading arbitrary external files.
+
+Closes https://bugs.launchpad.net/lxml/+bug/1742885
+Supersedes https://github.com/lxml/lxml/pull/130
+---
+ doc/FAQ.txt                     | 12 +++--
+ src/lxml/includes/xmlparser.pxd | 18 +++++++-
+ src/lxml/parser.pxi             | 70 ++++++++++++++++++++++++++--
+ src/lxml/tests/test_etree.py    | 81 +++++++++++++++++++++++++++++++++
+ 4 files changed, 170 insertions(+), 11 deletions(-)
+
+diff --git a/doc/FAQ.txt b/doc/FAQ.txt
+index 48f69a6..7f3a524 100644
+--- a/doc/FAQ.txt
+++ b/doc/FAQ.txt
+@@ -1107,9 +1107,9 @@ useless for the data commonly sent through web services and
+ can simply be disabled, which rules out several types of
+ denial of service attacks at once.  This also involves an attack
+ that reads local files from the server, as XML entities can be
+-defined to expand into their content.   Consequently, version
+-1.2 of the SOAP standard explicitly disallows entity references
+-in the XML stream.
+defined to expand into the content of external resources.
+Consequently, version 1.2 of the SOAP standard explicitly
+disallows entity references in the XML stream.
+ 
+ To disable entity expansion, use an XML parser that is configured
+ with the option ``resolve_entities=False``.  Then, after (or
+@@ -1117,7 +1117,11 @@ while) parsing the document, use ``root.iter(etree.Entity)`` to
+ recursively search for entity references.  If it contains any,
+ reject the entire input document with a suitable error response.
+ In lxml 3.x, you can also use the new DTD introspection API to
+-apply your own restrictions on input documents.
+apply your own restrictions on input documents.  Since version 5.x,
+lxml disables the expansion of external entities (XXE) by default.
+If you really want to allow loading external files into XML documents
+using this functionality, you have to explicitly set
+``resolve_entities=True``.
+ 
+ Another attack to consider is compression bombs.  If you allow
+ compressed input into your web service, attackers can try to send
+diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
+index 45acfc8..3945495 100644
+--- a/src/lxml/includes/xmlparser.pxd
+++ b/src/lxml/includes/xmlparser.pxd
+@@ -1,9 +1,9 @@
+ from libc.string cimport const_char
+ 
+ from lxml.includes.tree cimport (
+-    xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
+    xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar)
+ from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
+-from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
+from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel
+ 
+ 
+ cdef extern from "libxml/parser.h":
+@@ -47,11 +47,14 @@ cdef extern from "libxml/parser.h":
+ 
+     ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
+ 
+    ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name)
+
+     cdef int XML_SAX2_MAGIC
+ 
+ cdef extern from "libxml/tree.h":
+     ctypedef struct xmlParserInput:
+         int line
+        int col
+         int length
+         const_xmlChar* base
+         const_xmlChar* cur
+@@ -76,6 +79,7 @@ cdef extern from "libxml/tree.h":
+         charactersSAXFunc               characters
+         cdataBlockSAXFunc               cdataBlock
+         referenceSAXFunc                reference
+        getEntitySAXFunc                getEntity
+         commentSAXFunc                  comment
+         processingInstructionSAXFunc	processingInstruction
+         startDocumentSAXFunc            startDocument
+@@ -150,6 +154,8 @@ cdef extern from "libxml/parser.h":
+         int inSubset
+         int charset
+         xmlParserInput* input
+        int inputNr
+        xmlParserInput** inputTab
+ 
+     ctypedef enum xmlParserOption:
+         XML_PARSE_RECOVER = 1 # recover on errors
+@@ -212,6 +218,12 @@ cdef extern from "libxml/parser.h":
+                                    char* filename, const_char* encoding,
+                                    int options) nogil
+ 
+    cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node,
+                           int domain, int code, xmlErrorLevel level,
+                           const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
+                           int int1, const char *msg, ...)
+
+
+ # iterparse:
+ 
+     cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
+@@ -233,6 +245,8 @@ cdef extern from "libxml/parser.h":
+     cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
+     cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
+ 
+    cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) nogil
+
+ # DTDs:
+ 
+     cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
+diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
+index 3187a38..2f0ce80 100644
+--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
+@@ -794,6 +794,7 @@ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
+         c_attr = c_attr.next
+     return 0
+ 
+
+ @cython.internal
+ cdef class _BaseParser:
+     cdef ElementClassLookup _class_lookup
+@@ -806,6 +807,7 @@ cdef class _BaseParser:
+     cdef bint _remove_pis
+     cdef bint _strip_cdata
+     cdef bint _collect_ids
+    cdef bint _resolve_external_entities
+     cdef XMLSchema _schema
+     cdef bytes _filename
+     cdef readonly object target
+@@ -814,7 +816,7 @@ cdef class _BaseParser:
+ 
+     def __init__(self, int parse_options, bint for_html, XMLSchema schema,
+                  remove_comments, remove_pis, strip_cdata, collect_ids,
+-                 target, encoding):
+                 target, encoding, bint resolve_external_entities=True):
+         cdef tree.xmlCharEncodingHandler* enchandler
+         cdef int c_encoding
+         if not isinstance(self, (XMLParser, HTMLParser)):
+@@ -827,6 +829,7 @@ cdef class _BaseParser:
+         self._remove_pis = remove_pis
+         self._strip_cdata = strip_cdata
+         self._collect_ids = collect_ids
+        self._resolve_external_entities = resolve_external_entities
+         self._schema = schema
+ 
+         self._resolvers = _ResolverRegistry()
+@@ -906,6 +909,8 @@ cdef class _BaseParser:
+         if self._strip_cdata:
+             # hard switch-off for CDATA nodes => makes them plain text
+             pctxt.sax.cdataBlock = NULL
+        if not self._resolve_external_entities:
+            pctxt.sax.getEntity = _getInternalEntityOnly
+ 
+     cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
+         cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
+@@ -1206,6 +1211,56 @@ cdef class _BaseParser:
+         finally:
+             context.cleanup()
+ 
+cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name):
+    """
+    Callback function to intercept the entity resolution when external entity loading is disabled.
+    """
+    cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
+    if not entity:
+        return NULL
+    if entity.etype not in (
+            tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
+            tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
+            tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
+        return entity
+
+    # Reject all external entities and fail the parsing instead. There is currently
+    # no way in libxml2 to just prevent the entity resolution in this case.
+    cdef xmlerror.xmlError c_error
+    cdef xmlerror.xmlStructuredErrorFunc err_func
+    cdef xmlparser.xmlParserInput* parser_input
+    cdef void* err_context
+
+    c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
+    err_func = xmlerror.xmlStructuredError
+    if err_func:
+        parser_input = c_ctxt.input
+        # Copied from xmlVErrParser() in libxml2: get current input from stack.
+        if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
+            parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
+
+        c_error = xmlerror.xmlError(
+            domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
+            code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
+            level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
+            message=b"External entity resolution is disabled for security reasons "
+                    b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
+                    b"if you consider it safe to enable it.",
+            file=parser_input.filename,
+            node=entity,
+            str1=<char*> name,
+            str2=NULL,
+            str3=NULL,
+            line=parser_input.line if parser_input else 0,
+            int1=0,
+            int2=parser_input.col if parser_input else 0,
+        )
+        err_context = xmlerror.xmlStructuredErrorContext
+        err_func(err_context, &c_error)
+
+    c_ctxt.wellFormed = 0
+    # The entity was looked up and does not need to be freed.
+    return NULL
+ 
+ cdef void _initSaxDocument(void* ctxt) with gil:
+     xmlparser.xmlSAX2StartDocument(ctxt)
+@@ -1508,12 +1563,14 @@ cdef class XMLParser(_FeedParser):
+     - strip_cdata        - replace CDATA sections by normal text content (default: True)
+     - compact            - save memory for short text content (default: True)
+     - collect_ids        - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
+-    - resolve_entities   - replace entities by their text value (default: True)
+     - huge_tree          - disable security restrictions and support very deep trees
+                            and very long text content (only affects libxml2 2.7+)
+ 
+     Other keyword arguments:
+-
+    - resolve_entities - replace entities by their text value: False for keeping the
+          entity references, True for resolving them, and 'internal' for resolving
+          internal definitions only (no external file/URL access).
+          The default used to be True and was changed to 'internal' in lxml 5.0.
+     - encoding - override the document encoding
+     - target   - a parser target object that will receive the parse events
+     - schema   - an XMLSchema to validate against
+@@ -1525,10 +1582,11 @@ cdef class XMLParser(_FeedParser):
+     def __init__(self, *, encoding=None, attribute_defaults=False,
+                  dtd_validation=False, load_dtd=False, no_network=True,
+                  ns_clean=False, recover=False, XMLSchema schema=None,
+-                 huge_tree=False, remove_blank_text=False, resolve_entities=True,
+                 huge_tree=False, remove_blank_text=False, resolve_entities='internal',
+                  remove_comments=False, remove_pis=False, strip_cdata=True,
+                  collect_ids=True, target=None, compact=True):
+         cdef int parse_options
+        cdef bint resolve_external = True
+         parse_options = _XML_DEFAULT_PARSE_OPTIONS
+         if load_dtd:
+             parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
+@@ -1553,12 +1611,14 @@ cdef class XMLParser(_FeedParser):
+             parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
+         if not resolve_entities:
+             parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
+        elif resolve_entities == 'internal':
+            resolve_external = False
+         if not strip_cdata:
+             parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
+ 
+         _BaseParser.__init__(self, parse_options, 0, schema,
+                              remove_comments, remove_pis, strip_cdata,
+-                             collect_ids, target, encoding)
+                             collect_ids, target, encoding, resolve_external)
+ 
+ 
+ cdef class XMLPullParser(XMLParser):
+diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
+index 14b21f7..bc7548f 100644
+--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
+@@ -12,11 +12,14 @@ from __future__ import absolute_import
+ from collections import OrderedDict
+ import os.path
+ import unittest
+import contextlib
+ import copy
+ import sys
+ import re
+ import gc
+ import operator
+import shutil
+import tempfile
+ import textwrap
+ import zlib
+ import gzip
+@@ -1675,6 +1678,84 @@ class ETreeOnlyTestCase(HelperTestCase):
+         self.assertEqual(_bytes('<doc>&myentity;</doc>'),
+                           tostring(root))
+ 
+    @contextlib.contextmanager
+    def _xml_test_file(self, name, content=b'<evil>XML</evil>'):
+        temp_dir = tempfile.mkdtemp()
+        try:
+            xml_file = os.path.join(temp_dir, name)
+            with open(xml_file, 'wb') as tmpfile:
+                tmpfile.write(content)
+            yield xml_file
+        finally:
+            shutil.rmtree(temp_dir)
+
+    def test_entity_parse_external(self):
+        fromstring = self.etree.fromstring
+        tostring = self.etree.tostring
+        parser = self.etree.XMLParser(resolve_entities=True)
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_external_entity;</doc>
+            ''' % path2url(entity_file)
+            root = fromstring(xml, parser)
+
+        self.assertEqual(_bytes('<doc><evil>XML</evil></doc>'),
+                          tostring(root))
+        self.assertEqual(root.tag, 'doc')
+        self.assertEqual(root[0].tag, 'evil')
+        self.assertEqual(root[0].text, 'XML')
+        self.assertEqual(root[0].tail, None)
+
+    def test_entity_parse_external_no_resolve(self):
+        fromstring = self.etree.fromstring
+        parser = self.etree.XMLParser(resolve_entities=False)
+        Entity = self.etree.Entity
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_external_entity;</doc>
+            ''' % path2url(entity_file)
+            root = fromstring(xml, parser)
+
+        self.assertEqual(root[0].tag, Entity)
+        self.assertEqual(root[0].text, "&my_external_entity;")
+
+    def test_entity_parse_no_external_default(self):
+        fromstring = self.etree.fromstring
+
+        with self._xml_test_file("entity.xml") as entity_file:
+            xml = '''
+            <!DOCTYPE doc [
+                <!ENTITY my_failing_external_entity SYSTEM "%s">
+            ]>
+            <doc>&my_failing_external_entity;</doc>
+            ''' % path2url(entity_file)
+
+            try:
+                fromstring(xml)
+            except self.etree.XMLSyntaxError as exc:
+                exception = exc
+            else:
+                self.assertTrue(False, "XMLSyntaxError was not raised")
+
+        self.assertIn("my_failing_external_entity", str(exception))
+        self.assertTrue(exception.error_log)
+        # Depending on the libxml2 version, we get different errors here,
+        # not necessarily the one that lxml produced. But it should fail either way.
+        for error in exception.error_log:
+            if "my_failing_external_entity" in error.message:
+                self.assertEqual(5, error.line)
+                break
+        else:
+            self.assertFalse("entity error not found in parser error log")
+
+     def test_entity_restructure(self):
+         xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp "&#160;"> ]>
+             <root>
+-- 
+2.33.0
+
--- a/backport-Cleaner-cover-some-more-cases-where-scripts-could-sn.patch
+++ b/backport-Cleaner-cover-some-more-cases-where-scripts-could-sn.patch
@ -0,0 +1,163 @@
+From 69a747356655158fdf9abaecea5feafb3bd6b5f5 Mon Sep 17 00:00:00 2001
+From: Stefan Behnel <stefan_ml@behnel.de>
+Date: Sat, 11 Dec 2021 12:19:21 +0100
+Subject: [PATCH] Cleaner: cover some more cases where scripts could sneak
+ through in specially crafted style content.
+
+---
+ src/lxml/html/clean.py            | 20 ++++++------
+ src/lxml/html/tests/test_clean.py | 65 ++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 73 insertions(+), 12 deletions(-)
+
+diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
+index 4df10c2..0e96627 100644
+--- a/src/lxml/html/clean.py
+++ b/src/lxml/html/clean.py
+@@ -74,22 +74,20 @@ _looks_like_tag_content = re.compile(
+ # All kinds of schemes besides just javascript: that can cause
+ # execution:
+ _find_image_dataurls = re.compile(
+-    r'^data:image/(.+);base64,', re.I).findall
+-_is_possibly_malicious_scheme = re.compile(
+    r'data:image/(.+);base64,', re.I).findall
+_possibly_malicious_schemes = re.compile(
+     r'(javascript|jscript|livescript|vbscript|data|about|mocha):',
+     re.I).findall
+ # SVG images can contain script content
+-_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall
+_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).search
+ 
+-def _is_javascript_scheme(s):
+-    is_image_url = False
+def _has_javascript_scheme(s):
+    safe_image_urls = 0
+     for image_type in _find_image_dataurls(s):
+-        is_image_url = True
+         if _is_unsafe_image_type(image_type):
+             return True
+-    if is_image_url:
+-        return False
+-    return bool(_is_possibly_malicious_scheme(s))
+        safe_image_urls += 1
+    return len(_possibly_malicious_schemes(s)) > safe_image_urls
+ 
+ _substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub
+ 
+@@ -521,7 +519,7 @@ class Cleaner(object):
+     def _remove_javascript_link(self, link):
+         # links like "j a v a s c r i p t:" might be interpreted in IE
+         new = _substitute_whitespace('', unquote_plus(link))
+-        if _is_javascript_scheme(new):
+        if _has_javascript_scheme(new):
+             # FIXME: should this be None to delete?
+             return ''
+         return link
+@@ -543,7 +541,7 @@ class Cleaner(object):
+         style = style.replace('\\', '')
+         style = _substitute_whitespace('', style)
+         style = style.lower()
+-        if 'javascript:' in style:
+        if _has_javascript_scheme(style):
+             return True
+         if 'expression(' in style:
+             return True
+diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py
+index a05d967..aec87cd 100644
+--- a/src/lxml/html/tests/test_clean.py
+++ b/src/lxml/html/tests/test_clean.py
+@@ -126,7 +126,7 @@ class CleanerTest(unittest.TestCase):
+             lxml.html.tostring(clean_html(s)))
+ 
+     def test_sneaky_import_in_style(self):
+-        # Prevent "@@importimport" -> "@import" replacement.
+        # Prevent "@@importimport" -> "@import" replacement etc.
+         style_codes = [
+             "@@importimport(extstyle.css)",
+             "@ @  import import(extstyle.css)",
+@@ -134,6 +134,11 @@ class CleanerTest(unittest.TestCase):
+             "@@  import import(extstyle.css)",
+             "@ @import import(extstyle.css)",
+             "@@importimport()",
+            "@@importimport()  ()",
+            "@/* ... */import()",
+            "@im/* ... */port()",
+            "@ @import/* ... */import()",
+            "@    /* ... */      import()",
+         ]
+         for style_code in style_codes:
+             html = '<style>%s</style>' % style_code
+@@ -145,6 +150,41 @@ class CleanerTest(unittest.TestCase):
+                 cleaned,
+                 "%s  ->  %s" % (style_code, cleaned))
+ 
+    def test_sneaky_schemes_in_style(self):
+        style_codes = [
+            "javasjavascript:cript:",
+            "javascriptjavascript::",
+            "javascriptjavascript:: :",
+            "vbjavascript:cript:",
+        ]
+        for style_code in style_codes:
+            html = '<style>%s</style>' % style_code
+            s = lxml.html.fragment_fromstring(html)
+
+            cleaned = lxml.html.tostring(clean_html(s))
+            self.assertEqual(
+                b'<style>/* deleted */</style>',
+                cleaned,
+                "%s  ->  %s" % (style_code, cleaned))
+
+    def test_sneaky_urls_in_style(self):
+        style_codes = [
+            "url(data:image/svg+xml;base64,...)",
+            "url(javasjavascript:cript:)",
+            "url(javasjavascript:cript: ::)",
+            "url(vbjavascript:cript:)",
+            "url(vbjavascript:cript: :)",
+        ]
+        for style_code in style_codes:
+            html = '<style>%s</style>' % style_code
+            s = lxml.html.fragment_fromstring(html)
+
+            cleaned = lxml.html.tostring(clean_html(s))
+            self.assertEqual(
+                b'<style>url()</style>',
+                cleaned,
+                "%s  ->  %s" % (style_code, cleaned))
+
+     def test_svg_data_links(self):
+         # Remove SVG images with potentially insecure content.
+         svg = b'<svg onload="alert(123)" />'
+@@ -188,6 +228,29 @@ class CleanerTest(unittest.TestCase):
+                 cleaned,
+                 "%s  ->  %s" % (url, cleaned))
+ 
+    def test_image_data_links_in_style(self):
+        data = b'123'
+        data_b64 = base64.b64encode(data).decode('ASCII')
+        urls = [
+            "data:image/jpeg;base64," + data_b64,
+            "data:image/apng;base64," + data_b64,
+            "data:image/png;base64," + data_b64,
+            "data:image/gif;base64," + data_b64,
+            "data:image/webp;base64," + data_b64,
+            "data:image/bmp;base64," + data_b64,
+            "data:image/tiff;base64," + data_b64,
+            "data:image/x-icon;base64," + data_b64,
+        ]
+        for url in urls:
+            html = '<style> url(%s) </style>' % url
+            s = lxml.html.fragment_fromstring(html)
+
+            cleaned = lxml.html.tostring(clean_html(s))
+            self.assertEqual(
+                html.encode("UTF-8"),
+                cleaned,
+                "%s  ->  %s" % (url, cleaned))
+
+     def test_formaction_attribute_in_button_input(self):
+         # The formaction attribute overrides the form's action and should be
+         # treated as a malicious link attribute
+-- 
+2.13.7
+
--- a/backport-Work-around-libxml2-bug-in-affected-versions.patch
+++ b/backport-Work-around-libxml2-bug-in-affected-versions.patch
@ -0,0 +1,56 @@
+From c742576c105f40fc8b754fcae56fee4aa35840a3 Mon Sep 17 00:00:00 2001
+From: Stefan Behnel <stefan_ml@behnel.de>
+Date: Tue, 19 Jul 2022 08:25:20 +0200
+Subject: [PATCH] Work around libxml2 bug in affected versions that failed to
+ reset the namespace count in the parser context.
+
+See https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
+---
+ src/lxml/includes/xmlparser.pxd | 1 +
+ src/lxml/parser.pxi             | 3 +++
+ src/lxml/tests/test_etree.py    | 3 +--
+ 3 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
+index a196e34..45acfc8 100644
+--- a/src/lxml/includes/xmlparser.pxd
+++ b/src/lxml/includes/xmlparser.pxd
+@@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h":
+         void* userData
+         int* spaceTab
+         int spaceMax
+        int nsNr
+         bint html
+         bint progressive
+         int inSubset
+diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
+index f5baf29..f0c8c6b 100644
+--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
+@@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext):
+                 self._c_ctxt.disableSAX = 0 # work around bug in libxml2
+             else:
+                 xmlparser.xmlClearParserCtxt(self._c_ctxt)
+                # work around bug in libxml2 [2.9.10 .. 2.9.14]:
+                # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
+                self._c_ctxt.nsNr = 0
+ 
+     cdef int prepare(self, bint set_document_loader=True) except -1:
+         cdef int result
+diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
+index 8bf82c0..0339796 100644
+--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
+@@ -1491,8 +1491,7 @@ class ETreeOnlyTestCase(HelperTestCase):
+             # This would be the expected result, because there was no namespace
+             pass
+         else:
+-            # This is a bug in libxml2
+-            assert not ns, repr(ns)
+            assert False, "Found unexpected namespace '%s'" % ns
+ 
+     def test_itertext_comment_pi(self):
+         # https://bugs.launchpad.net/lxml/+bug/1844674
+-- 
+1.8.3.1
+
--- a/python-lxml.spec
+++ b/python-lxml.spec
@ -7,7 +7,7 @@ The latest release works with all CPython versions from 2.7 to 3.7.

 Name:           python-%{modname}
 Version:        4.5.2
-Release:        5
+Release:        9
 Summary:        XML processing library combining libxml2/libxslt with the ElementTree API
 License:        BSD
 URL:            http://lxml.de
@ -18,6 +18,10 @@ Patch6001:      backport-CVE-2020-27783-2.patch
 Patch6002:      backport-CVE-2021-28957.patch
 Patch6003:      backport-0001-CVE-2021-43818.patch
 Patch6004:      backport-0002-CVE-2021-43818.patch
+Patch6005:      backport-Cleaner-cover-some-more-cases-where-scripts-could-sn.patch
+Patch6006:      backport-CVE-2022-2309.patch
+Patch6007:      backport-Work-around-libxml2-bug-in-affected-versions.patch
+Patch6008:      backport-CVE-2024-37388.patch

 BuildRequires:  gcc libxml2-devel libxslt-devel

@ -41,6 +45,8 @@ BuildRequires:  python3-devel python3-setuptools python3-Cython

 %prep
 %autosetup -n %{modname}-%{version} -p1
+# Remove pregenerated Cython C sources
+find -type f -name '*.c' -print -delete

 %build
 export WITH_CYTHON=true
@ -68,6 +74,21 @@ make test3
 %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt

 %changelog
+* Wed Jun 12 2024 zhuofeng <zhuofeng2@huawei.com> - 4.5.2-9
+- Type:CVE
+- CVE:CVE-2024-37388
+- SUG:NA
+- DESC:fix CVE-2024-37388
+
+* Mon Jul 25 2022 shixuantong <shixuantong@h-partners.com> - 4.5.2-8
+- Remove pregenerated Cython C sources
+
+* Thu Jul 21 2022 renhongxun <renhongxun@h-partners.com> - 4.5.2-7
+- fix CVE-2022-2309
+
+* Sat Jan 22 2022 shixuantong <shixuantong@huawei.com> - 4.5.2-6
+- Cleaner: cover some more cases where scripts could sneak through in specially crafted style content.
+
 * Wed Jan 19 2022 shixuantong <shixuantong@huawei.com> - 4.5.2-5
 - enable check

@ -80,7 +101,7 @@ make test3
 * Fri Feb 05 2021 shixuantong <shixuantong@huawei.com> - 4.5.2-2
 - fix CVE-2020-27783

-* Tue Jan 05 2020 shixuantong <shixuantong@huawei.com> - 4.5.2-1
+* Tue Jan 05 2021 shixuantong <shixuantong@huawei.com> - 4.5.2-1
 - update version to 4.5.2

 * Fri Aug 21 2020 shixuantong <shixuantong@huawei.com> - 4.2.3-5
Author	SHA1	Message	Date
openeuler-ci-bot	72883bff79	!96 Fixed changalog date not sorted From: @jackssir Reviewed-by: @dillon_chen Signed-off-by: @dillon_chen	2024-08-21 02:20:37 +00:00
lvfei	d31c18ab7d	update python-lxml.spec. Signed-off-by: lvfei <lvfei@kylinos.cn>	2024-08-21 01:48:35 +00:00
lvfei	69fd3ee9bc	Fixed changalog date not sorted	2024-08-19 15:17:03 +08:00
openeuler-ci-bot	ba231e969f	!89 fix CVE-2024-37388 From: @zhuofeng6 Reviewed-by: @hubin95, @gaoruoshu Signed-off-by: @hubin95, @gaoruoshu	2024-06-14 02:05:03 +00:00
zhuofeng	76da917069	fix CVE-2024-37388	2024-06-13 15:22:52 +08:00
openeuler-ci-bot	a48a35752f	!52 删除源码包中etree.c等文件，构建过程自动生成 From: @tong_1001 Reviewed-by: @xiezhipeng1 Signed-off-by: @xiezhipeng1	2022-07-25 08:48:59 +00:00
shixuantong	bddafeb6e5	Remove pregenerated Cython C sources	2022-07-25 16:10:14 +08:00
openeuler-ci-bot	6479fbc60b	!48 Fix CVE-2022-2309 From: @renxichen Reviewed-by: @xiezhipeng1 Signed-off-by: @xiezhipeng1	2022-07-21 09:14:24 +00:00
rwx403335	a1ea60f874	Fix CVE-2022-2309	2022-07-21 15:52:11 +08:00
openeuler-ci-bot	fdc6554a9f	!42 [sync] PR-39: Cleaner: cover some more cases where scripts could sneak through in specially crafted style content. Merge pull request !42 from openeuler-sync-bot/sync-pr39-openEuler-20.03-LTS-Next-to-openEuler-20.03-LTS-SP3	2022-01-28 01:57:09 +00:00
shixuantong	3735c909d9	Cleaner: cover some more cases where scripts could sneak through in specially crafted style content. (cherry picked from commit b4f89041f532543487ec5a1b916faf83d98b6b6b)	2022-01-27 10:35:35 +08:00