Compare commits
11 Commits
17aed4af77
...
72883bff79
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
72883bff79 | ||
|
|
d31c18ab7d | ||
|
|
69fd3ee9bc | ||
|
|
ba231e969f | ||
|
|
76da917069 | ||
|
|
a48a35752f | ||
|
|
bddafeb6e5 | ||
|
|
6479fbc60b | ||
|
|
a1ea60f874 | ||
|
|
fdc6554a9f | ||
|
|
3735c909d9 |
92
backport-CVE-2022-2309.patch
Normal file
92
backport-CVE-2022-2309.patch
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
From 86368e9cf70a0ad23cccd5ee32de847149af0c6f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
||||||
|
Date: Fri, 1 Jul 2022 21:06:10 +0200
|
||||||
|
Subject: [PATCH] Fix a crash when incorrect parser input occurs together with
|
||||||
|
usages of iterwalk() on trees generated by the same parser.
|
||||||
|
|
||||||
|
---
|
||||||
|
src/lxml/apihelpers.pxi | 7 ++++---
|
||||||
|
src/lxml/iterparse.pxi | 11 ++++++-----
|
||||||
|
src/lxml/tests/test_etree.py | 20 ++++++++++++++++++++
|
||||||
|
3 files changed, 30 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
|
||||||
|
index c166276..9fae9fb 100644
|
||||||
|
--- a/src/lxml/apihelpers.pxi
|
||||||
|
+++ b/src/lxml/apihelpers.pxi
|
||||||
|
@@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node):
|
||||||
|
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
|
||||||
|
c_ns = c_node.nsDef
|
||||||
|
while c_ns is not NULL:
|
||||||
|
- prefix = funicodeOrNone(c_ns.prefix)
|
||||||
|
- if prefix not in nsmap:
|
||||||
|
- nsmap[prefix] = funicodeOrNone(c_ns.href)
|
||||||
|
+ if c_ns.prefix or c_ns.href:
|
||||||
|
+ prefix = funicodeOrNone(c_ns.prefix)
|
||||||
|
+ if prefix not in nsmap:
|
||||||
|
+ nsmap[prefix] = funicodeOrNone(c_ns.href)
|
||||||
|
c_ns = c_ns.next
|
||||||
|
c_node = c_node.parent
|
||||||
|
return nsmap
|
||||||
|
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
|
||||||
|
index 138c23a..a7299da 100644
|
||||||
|
--- a/src/lxml/iterparse.pxi
|
||||||
|
+++ b/src/lxml/iterparse.pxi
|
||||||
|
@@ -420,7 +420,7 @@ cdef int _countNsDefs(xmlNode* c_node):
|
||||||
|
count = 0
|
||||||
|
c_ns = c_node.nsDef
|
||||||
|
while c_ns is not NULL:
|
||||||
|
- count += 1
|
||||||
|
+ count += (c_ns.href is not NULL)
|
||||||
|
c_ns = c_ns.next
|
||||||
|
return count
|
||||||
|
|
||||||
|
@@ -431,9 +431,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
|
||||||
|
count = 0
|
||||||
|
c_ns = c_node.nsDef
|
||||||
|
while c_ns is not NULL:
|
||||||
|
- ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '',
|
||||||
|
- funicode(c_ns.href))
|
||||||
|
- event_list.append( (u"start-ns", ns_tuple) )
|
||||||
|
- count += 1
|
||||||
|
+ if c_ns.href:
|
||||||
|
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
|
||||||
|
+ funicode(c_ns.href))
|
||||||
|
+ event_list.append( (u"start-ns", ns_tuple) )
|
||||||
|
+ count += 1
|
||||||
|
c_ns = c_ns.next
|
||||||
|
return count
|
||||||
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||||
|
index e5f0846..285313f 100644
|
||||||
|
--- a/src/lxml/tests/test_etree.py
|
||||||
|
+++ b/src/lxml/tests/test_etree.py
|
||||||
|
@@ -1460,6 +1460,26 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||||
|
[1,2,1,4],
|
||||||
|
counts)
|
||||||
|
|
||||||
|
+ def test_walk_after_parse_failure(self):
|
||||||
|
+ # This used to be an issue because libxml2 can leak empty namespaces
|
||||||
|
+ # between failed parser runs. iterwalk() failed to handle such a tree.
|
||||||
|
+ try:
|
||||||
|
+ etree.XML('''<anot xmlns="1">''')
|
||||||
|
+ except etree.XMLSyntaxError:
|
||||||
|
+ pass
|
||||||
|
+ else:
|
||||||
|
+ assert False, "invalid input did not fail to parse"
|
||||||
|
+
|
||||||
|
+ et = etree.XML('''<root> </root>''')
|
||||||
|
+ try:
|
||||||
|
+ ns = next(etree.iterwalk(et, events=('start-ns',)))
|
||||||
|
+ except StopIteration:
|
||||||
|
+ # This would be the expected result, because there was no namespace
|
||||||
|
+ pass
|
||||||
|
+ else:
|
||||||
|
+ # This is a bug in libxml2
|
||||||
|
+ assert not ns, repr(ns)
|
||||||
|
+
|
||||||
|
def test_itertext_comment_pi(self):
|
||||||
|
# https://bugs.launchpad.net/lxml/+bug/1844674
|
||||||
|
XML = self.etree.XML
|
||||||
|
--
|
||||||
|
1.8.3.1
|
||||||
|
|
||||||
372
backport-CVE-2024-37388.patch
Normal file
372
backport-CVE-2024-37388.patch
Normal file
@ -0,0 +1,372 @@
|
|||||||
|
From b38cebf2f846e92bd63de4488fd3d1c8b568f397 Mon Sep 17 00:00:00 2001
|
||||||
|
From: scoder <stefan_ml@behnel.de>
|
||||||
|
Date: Fri, 29 Dec 2023 14:21:23 +0100
|
||||||
|
Subject: [PATCH] Disable external entity resolution (XXE) by default (GH-391)
|
||||||
|
|
||||||
|
This prevents security risks that would allow loading arbitrary external files.
|
||||||
|
|
||||||
|
Closes https://bugs.launchpad.net/lxml/+bug/1742885
|
||||||
|
Supersedes https://github.com/lxml/lxml/pull/130
|
||||||
|
---
|
||||||
|
doc/FAQ.txt | 12 +++--
|
||||||
|
src/lxml/includes/xmlparser.pxd | 18 +++++++-
|
||||||
|
src/lxml/parser.pxi | 70 ++++++++++++++++++++++++++--
|
||||||
|
src/lxml/tests/test_etree.py | 81 +++++++++++++++++++++++++++++++++
|
||||||
|
4 files changed, 170 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/doc/FAQ.txt b/doc/FAQ.txt
|
||||||
|
index 48f69a6..7f3a524 100644
|
||||||
|
--- a/doc/FAQ.txt
|
||||||
|
+++ b/doc/FAQ.txt
|
||||||
|
@@ -1107,9 +1107,9 @@ useless for the data commonly sent through web services and
|
||||||
|
can simply be disabled, which rules out several types of
|
||||||
|
denial of service attacks at once. This also involves an attack
|
||||||
|
that reads local files from the server, as XML entities can be
|
||||||
|
-defined to expand into their content. Consequently, version
|
||||||
|
-1.2 of the SOAP standard explicitly disallows entity references
|
||||||
|
-in the XML stream.
|
||||||
|
+defined to expand into the content of external resources.
|
||||||
|
+Consequently, version 1.2 of the SOAP standard explicitly
|
||||||
|
+disallows entity references in the XML stream.
|
||||||
|
|
||||||
|
To disable entity expansion, use an XML parser that is configured
|
||||||
|
with the option ``resolve_entities=False``. Then, after (or
|
||||||
|
@@ -1117,7 +1117,11 @@ while) parsing the document, use ``root.iter(etree.Entity)`` to
|
||||||
|
recursively search for entity references. If it contains any,
|
||||||
|
reject the entire input document with a suitable error response.
|
||||||
|
In lxml 3.x, you can also use the new DTD introspection API to
|
||||||
|
-apply your own restrictions on input documents.
|
||||||
|
+apply your own restrictions on input documents. Since version 5.x,
|
||||||
|
+lxml disables the expansion of external entities (XXE) by default.
|
||||||
|
+If you really want to allow loading external files into XML documents
|
||||||
|
+using this functionality, you have to explicitly set
|
||||||
|
+``resolve_entities=True``.
|
||||||
|
|
||||||
|
Another attack to consider is compression bombs. If you allow
|
||||||
|
compressed input into your web service, attackers can try to send
|
||||||
|
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
|
||||||
|
index 45acfc8..3945495 100644
|
||||||
|
--- a/src/lxml/includes/xmlparser.pxd
|
||||||
|
+++ b/src/lxml/includes/xmlparser.pxd
|
||||||
|
@@ -1,9 +1,9 @@
|
||||||
|
from libc.string cimport const_char
|
||||||
|
|
||||||
|
from lxml.includes.tree cimport (
|
||||||
|
- xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||||
|
+ xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||||
|
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||||
|
-from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
|
||||||
|
+from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel
|
||||||
|
|
||||||
|
|
||||||
|
cdef extern from "libxml/parser.h":
|
||||||
|
@@ -47,11 +47,14 @@ cdef extern from "libxml/parser.h":
|
||||||
|
|
||||||
|
ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
|
||||||
|
|
||||||
|
+ ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name)
|
||||||
|
+
|
||||||
|
cdef int XML_SAX2_MAGIC
|
||||||
|
|
||||||
|
cdef extern from "libxml/tree.h":
|
||||||
|
ctypedef struct xmlParserInput:
|
||||||
|
int line
|
||||||
|
+ int col
|
||||||
|
int length
|
||||||
|
const_xmlChar* base
|
||||||
|
const_xmlChar* cur
|
||||||
|
@@ -76,6 +79,7 @@ cdef extern from "libxml/tree.h":
|
||||||
|
charactersSAXFunc characters
|
||||||
|
cdataBlockSAXFunc cdataBlock
|
||||||
|
referenceSAXFunc reference
|
||||||
|
+ getEntitySAXFunc getEntity
|
||||||
|
commentSAXFunc comment
|
||||||
|
processingInstructionSAXFunc processingInstruction
|
||||||
|
startDocumentSAXFunc startDocument
|
||||||
|
@@ -150,6 +154,8 @@ cdef extern from "libxml/parser.h":
|
||||||
|
int inSubset
|
||||||
|
int charset
|
||||||
|
xmlParserInput* input
|
||||||
|
+ int inputNr
|
||||||
|
+ xmlParserInput** inputTab
|
||||||
|
|
||||||
|
ctypedef enum xmlParserOption:
|
||||||
|
XML_PARSE_RECOVER = 1 # recover on errors
|
||||||
|
@@ -212,6 +218,12 @@ cdef extern from "libxml/parser.h":
|
||||||
|
char* filename, const_char* encoding,
|
||||||
|
int options) nogil
|
||||||
|
|
||||||
|
+ cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node,
|
||||||
|
+ int domain, int code, xmlErrorLevel level,
|
||||||
|
+ const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
||||||
|
+ int int1, const char *msg, ...)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
# iterparse:
|
||||||
|
|
||||||
|
cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||||
|
@@ -233,6 +245,8 @@ cdef extern from "libxml/parser.h":
|
||||||
|
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
|
||||||
|
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
|
||||||
|
|
||||||
|
+ cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) nogil
|
||||||
|
+
|
||||||
|
# DTDs:
|
||||||
|
|
||||||
|
cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||||
|
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
|
||||||
|
index 3187a38..2f0ce80 100644
|
||||||
|
--- a/src/lxml/parser.pxi
|
||||||
|
+++ b/src/lxml/parser.pxi
|
||||||
|
@@ -794,6 +794,7 @@ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
|
||||||
|
c_attr = c_attr.next
|
||||||
|
return 0
|
||||||
|
|
||||||
|
+
|
||||||
|
@cython.internal
|
||||||
|
cdef class _BaseParser:
|
||||||
|
cdef ElementClassLookup _class_lookup
|
||||||
|
@@ -806,6 +807,7 @@ cdef class _BaseParser:
|
||||||
|
cdef bint _remove_pis
|
||||||
|
cdef bint _strip_cdata
|
||||||
|
cdef bint _collect_ids
|
||||||
|
+ cdef bint _resolve_external_entities
|
||||||
|
cdef XMLSchema _schema
|
||||||
|
cdef bytes _filename
|
||||||
|
cdef readonly object target
|
||||||
|
@@ -814,7 +816,7 @@ cdef class _BaseParser:
|
||||||
|
|
||||||
|
def __init__(self, int parse_options, bint for_html, XMLSchema schema,
|
||||||
|
remove_comments, remove_pis, strip_cdata, collect_ids,
|
||||||
|
- target, encoding):
|
||||||
|
+ target, encoding, bint resolve_external_entities=True):
|
||||||
|
cdef tree.xmlCharEncodingHandler* enchandler
|
||||||
|
cdef int c_encoding
|
||||||
|
if not isinstance(self, (XMLParser, HTMLParser)):
|
||||||
|
@@ -827,6 +829,7 @@ cdef class _BaseParser:
|
||||||
|
self._remove_pis = remove_pis
|
||||||
|
self._strip_cdata = strip_cdata
|
||||||
|
self._collect_ids = collect_ids
|
||||||
|
+ self._resolve_external_entities = resolve_external_entities
|
||||||
|
self._schema = schema
|
||||||
|
|
||||||
|
self._resolvers = _ResolverRegistry()
|
||||||
|
@@ -906,6 +909,8 @@ cdef class _BaseParser:
|
||||||
|
if self._strip_cdata:
|
||||||
|
# hard switch-off for CDATA nodes => makes them plain text
|
||||||
|
pctxt.sax.cdataBlock = NULL
|
||||||
|
+ if not self._resolve_external_entities:
|
||||||
|
+ pctxt.sax.getEntity = _getInternalEntityOnly
|
||||||
|
|
||||||
|
cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
|
||||||
|
cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
|
||||||
|
@@ -1206,6 +1211,56 @@ cdef class _BaseParser:
|
||||||
|
finally:
|
||||||
|
context.cleanup()
|
||||||
|
|
||||||
|
+cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name):
|
||||||
|
+ """
|
||||||
|
+ Callback function to intercept the entity resolution when external entity loading is disabled.
|
||||||
|
+ """
|
||||||
|
+ cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
|
||||||
|
+ if not entity:
|
||||||
|
+ return NULL
|
||||||
|
+ if entity.etype not in (
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
|
||||||
|
+ return entity
|
||||||
|
+
|
||||||
|
+ # Reject all external entities and fail the parsing instead. There is currently
|
||||||
|
+ # no way in libxml2 to just prevent the entity resolution in this case.
|
||||||
|
+ cdef xmlerror.xmlError c_error
|
||||||
|
+ cdef xmlerror.xmlStructuredErrorFunc err_func
|
||||||
|
+ cdef xmlparser.xmlParserInput* parser_input
|
||||||
|
+ cdef void* err_context
|
||||||
|
+
|
||||||
|
+ c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
|
||||||
|
+ err_func = xmlerror.xmlStructuredError
|
||||||
|
+ if err_func:
|
||||||
|
+ parser_input = c_ctxt.input
|
||||||
|
+ # Copied from xmlVErrParser() in libxml2: get current input from stack.
|
||||||
|
+ if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
|
||||||
|
+ parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
|
||||||
|
+
|
||||||
|
+ c_error = xmlerror.xmlError(
|
||||||
|
+ domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
|
||||||
|
+ code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
|
||||||
|
+ level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
|
||||||
|
+ message=b"External entity resolution is disabled for security reasons "
|
||||||
|
+ b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
|
||||||
|
+ b"if you consider it safe to enable it.",
|
||||||
|
+ file=parser_input.filename,
|
||||||
|
+ node=entity,
|
||||||
|
+ str1=<char*> name,
|
||||||
|
+ str2=NULL,
|
||||||
|
+ str3=NULL,
|
||||||
|
+ line=parser_input.line if parser_input else 0,
|
||||||
|
+ int1=0,
|
||||||
|
+ int2=parser_input.col if parser_input else 0,
|
||||||
|
+ )
|
||||||
|
+ err_context = xmlerror.xmlStructuredErrorContext
|
||||||
|
+ err_func(err_context, &c_error)
|
||||||
|
+
|
||||||
|
+ c_ctxt.wellFormed = 0
|
||||||
|
+ # The entity was looked up and does not need to be freed.
|
||||||
|
+ return NULL
|
||||||
|
|
||||||
|
cdef void _initSaxDocument(void* ctxt) with gil:
|
||||||
|
xmlparser.xmlSAX2StartDocument(ctxt)
|
||||||
|
@@ -1508,12 +1563,14 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
- strip_cdata - replace CDATA sections by normal text content (default: True)
|
||||||
|
- compact - save memory for short text content (default: True)
|
||||||
|
- collect_ids - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
|
||||||
|
- - resolve_entities - replace entities by their text value (default: True)
|
||||||
|
- huge_tree - disable security restrictions and support very deep trees
|
||||||
|
and very long text content (only affects libxml2 2.7+)
|
||||||
|
|
||||||
|
Other keyword arguments:
|
||||||
|
-
|
||||||
|
+ - resolve_entities - replace entities by their text value: False for keeping the
|
||||||
|
+ entity references, True for resolving them, and 'internal' for resolving
|
||||||
|
+ internal definitions only (no external file/URL access).
|
||||||
|
+ The default used to be True and was changed to 'internal' in lxml 5.0.
|
||||||
|
- encoding - override the document encoding
|
||||||
|
- target - a parser target object that will receive the parse events
|
||||||
|
- schema - an XMLSchema to validate against
|
||||||
|
@@ -1525,10 +1582,11 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
def __init__(self, *, encoding=None, attribute_defaults=False,
|
||||||
|
dtd_validation=False, load_dtd=False, no_network=True,
|
||||||
|
ns_clean=False, recover=False, XMLSchema schema=None,
|
||||||
|
- huge_tree=False, remove_blank_text=False, resolve_entities=True,
|
||||||
|
+ huge_tree=False, remove_blank_text=False, resolve_entities='internal',
|
||||||
|
remove_comments=False, remove_pis=False, strip_cdata=True,
|
||||||
|
collect_ids=True, target=None, compact=True):
|
||||||
|
cdef int parse_options
|
||||||
|
+ cdef bint resolve_external = True
|
||||||
|
parse_options = _XML_DEFAULT_PARSE_OPTIONS
|
||||||
|
if load_dtd:
|
||||||
|
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
|
||||||
|
@@ -1553,12 +1611,14 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
|
||||||
|
if not resolve_entities:
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
|
||||||
|
+ elif resolve_entities == 'internal':
|
||||||
|
+ resolve_external = False
|
||||||
|
if not strip_cdata:
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
|
||||||
|
|
||||||
|
_BaseParser.__init__(self, parse_options, 0, schema,
|
||||||
|
remove_comments, remove_pis, strip_cdata,
|
||||||
|
- collect_ids, target, encoding)
|
||||||
|
+ collect_ids, target, encoding, resolve_external)
|
||||||
|
|
||||||
|
|
||||||
|
cdef class XMLPullParser(XMLParser):
|
||||||
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||||
|
index 14b21f7..bc7548f 100644
|
||||||
|
--- a/src/lxml/tests/test_etree.py
|
||||||
|
+++ b/src/lxml/tests/test_etree.py
|
||||||
|
@@ -12,11 +12,14 @@ from __future__ import absolute_import
|
||||||
|
from collections import OrderedDict
|
||||||
|
import os.path
|
||||||
|
import unittest
|
||||||
|
+import contextlib
|
||||||
|
import copy
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import gc
|
||||||
|
import operator
|
||||||
|
+import shutil
|
||||||
|
+import tempfile
|
||||||
|
import textwrap
|
||||||
|
import zlib
|
||||||
|
import gzip
|
||||||
|
@@ -1675,6 +1678,84 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||||
|
self.assertEqual(_bytes('<doc>&myentity;</doc>'),
|
||||||
|
tostring(root))
|
||||||
|
|
||||||
|
+ @contextlib.contextmanager
|
||||||
|
+ def _xml_test_file(self, name, content=b'<evil>XML</evil>'):
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ try:
|
||||||
|
+ xml_file = os.path.join(temp_dir, name)
|
||||||
|
+ with open(xml_file, 'wb') as tmpfile:
|
||||||
|
+ tmpfile.write(content)
|
||||||
|
+ yield xml_file
|
||||||
|
+ finally:
|
||||||
|
+ shutil.rmtree(temp_dir)
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_external(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+ tostring = self.etree.tostring
|
||||||
|
+ parser = self.etree.XMLParser(resolve_entities=True)
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+ root = fromstring(xml, parser)
|
||||||
|
+
|
||||||
|
+ self.assertEqual(_bytes('<doc><evil>XML</evil></doc>'),
|
||||||
|
+ tostring(root))
|
||||||
|
+ self.assertEqual(root.tag, 'doc')
|
||||||
|
+ self.assertEqual(root[0].tag, 'evil')
|
||||||
|
+ self.assertEqual(root[0].text, 'XML')
|
||||||
|
+ self.assertEqual(root[0].tail, None)
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_external_no_resolve(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+ parser = self.etree.XMLParser(resolve_entities=False)
|
||||||
|
+ Entity = self.etree.Entity
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+ root = fromstring(xml, parser)
|
||||||
|
+
|
||||||
|
+ self.assertEqual(root[0].tag, Entity)
|
||||||
|
+ self.assertEqual(root[0].text, "&my_external_entity;")
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_no_external_default(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_failing_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_failing_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ fromstring(xml)
|
||||||
|
+ except self.etree.XMLSyntaxError as exc:
|
||||||
|
+ exception = exc
|
||||||
|
+ else:
|
||||||
|
+ self.assertTrue(False, "XMLSyntaxError was not raised")
|
||||||
|
+
|
||||||
|
+ self.assertIn("my_failing_external_entity", str(exception))
|
||||||
|
+ self.assertTrue(exception.error_log)
|
||||||
|
+ # Depending on the libxml2 version, we get different errors here,
|
||||||
|
+ # not necessarily the one that lxml produced. But it should fail either way.
|
||||||
|
+ for error in exception.error_log:
|
||||||
|
+ if "my_failing_external_entity" in error.message:
|
||||||
|
+ self.assertEqual(5, error.line)
|
||||||
|
+ break
|
||||||
|
+ else:
|
||||||
|
+ self.assertFalse("entity error not found in parser error log")
|
||||||
|
+
|
||||||
|
def test_entity_restructure(self):
|
||||||
|
xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
|
||||||
|
<root>
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,163 @@
|
|||||||
|
From 69a747356655158fdf9abaecea5feafb3bd6b5f5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
||||||
|
Date: Sat, 11 Dec 2021 12:19:21 +0100
|
||||||
|
Subject: [PATCH] Cleaner: cover some more cases where scripts could sneak
|
||||||
|
through in specially crafted style content.
|
||||||
|
|
||||||
|
---
|
||||||
|
src/lxml/html/clean.py | 20 ++++++------
|
||||||
|
src/lxml/html/tests/test_clean.py | 65 ++++++++++++++++++++++++++++++++++++++-
|
||||||
|
2 files changed, 73 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
|
||||||
|
index 4df10c2..0e96627 100644
|
||||||
|
--- a/src/lxml/html/clean.py
|
||||||
|
+++ b/src/lxml/html/clean.py
|
||||||
|
@@ -74,22 +74,20 @@ _looks_like_tag_content = re.compile(
|
||||||
|
# All kinds of schemes besides just javascript: that can cause
|
||||||
|
# execution:
|
||||||
|
_find_image_dataurls = re.compile(
|
||||||
|
- r'^data:image/(.+);base64,', re.I).findall
|
||||||
|
-_is_possibly_malicious_scheme = re.compile(
|
||||||
|
+ r'data:image/(.+);base64,', re.I).findall
|
||||||
|
+_possibly_malicious_schemes = re.compile(
|
||||||
|
r'(javascript|jscript|livescript|vbscript|data|about|mocha):',
|
||||||
|
re.I).findall
|
||||||
|
# SVG images can contain script content
|
||||||
|
-_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall
|
||||||
|
+_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).search
|
||||||
|
|
||||||
|
-def _is_javascript_scheme(s):
|
||||||
|
- is_image_url = False
|
||||||
|
+def _has_javascript_scheme(s):
|
||||||
|
+ safe_image_urls = 0
|
||||||
|
for image_type in _find_image_dataurls(s):
|
||||||
|
- is_image_url = True
|
||||||
|
if _is_unsafe_image_type(image_type):
|
||||||
|
return True
|
||||||
|
- if is_image_url:
|
||||||
|
- return False
|
||||||
|
- return bool(_is_possibly_malicious_scheme(s))
|
||||||
|
+ safe_image_urls += 1
|
||||||
|
+ return len(_possibly_malicious_schemes(s)) > safe_image_urls
|
||||||
|
|
||||||
|
_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub
|
||||||
|
|
||||||
|
@@ -521,7 +519,7 @@ class Cleaner(object):
|
||||||
|
def _remove_javascript_link(self, link):
|
||||||
|
# links like "j a v a s c r i p t:" might be interpreted in IE
|
||||||
|
new = _substitute_whitespace('', unquote_plus(link))
|
||||||
|
- if _is_javascript_scheme(new):
|
||||||
|
+ if _has_javascript_scheme(new):
|
||||||
|
# FIXME: should this be None to delete?
|
||||||
|
return ''
|
||||||
|
return link
|
||||||
|
@@ -543,7 +541,7 @@ class Cleaner(object):
|
||||||
|
style = style.replace('\\', '')
|
||||||
|
style = _substitute_whitespace('', style)
|
||||||
|
style = style.lower()
|
||||||
|
- if 'javascript:' in style:
|
||||||
|
+ if _has_javascript_scheme(style):
|
||||||
|
return True
|
||||||
|
if 'expression(' in style:
|
||||||
|
return True
|
||||||
|
diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py
|
||||||
|
index a05d967..aec87cd 100644
|
||||||
|
--- a/src/lxml/html/tests/test_clean.py
|
||||||
|
+++ b/src/lxml/html/tests/test_clean.py
|
||||||
|
@@ -126,7 +126,7 @@ class CleanerTest(unittest.TestCase):
|
||||||
|
lxml.html.tostring(clean_html(s)))
|
||||||
|
|
||||||
|
def test_sneaky_import_in_style(self):
|
||||||
|
- # Prevent "@@importimport" -> "@import" replacement.
|
||||||
|
+ # Prevent "@@importimport" -> "@import" replacement etc.
|
||||||
|
style_codes = [
|
||||||
|
"@@importimport(extstyle.css)",
|
||||||
|
"@ @ import import(extstyle.css)",
|
||||||
|
@@ -134,6 +134,11 @@ class CleanerTest(unittest.TestCase):
|
||||||
|
"@@ import import(extstyle.css)",
|
||||||
|
"@ @import import(extstyle.css)",
|
||||||
|
"@@importimport()",
|
||||||
|
+ "@@importimport() ()",
|
||||||
|
+ "@/* ... */import()",
|
||||||
|
+ "@im/* ... */port()",
|
||||||
|
+ "@ @import/* ... */import()",
|
||||||
|
+ "@ /* ... */ import()",
|
||||||
|
]
|
||||||
|
for style_code in style_codes:
|
||||||
|
html = '<style>%s</style>' % style_code
|
||||||
|
@@ -145,6 +150,41 @@ class CleanerTest(unittest.TestCase):
|
||||||
|
cleaned,
|
||||||
|
"%s -> %s" % (style_code, cleaned))
|
||||||
|
|
||||||
|
+ def test_sneaky_schemes_in_style(self):
|
||||||
|
+ style_codes = [
|
||||||
|
+ "javasjavascript:cript:",
|
||||||
|
+ "javascriptjavascript::",
|
||||||
|
+ "javascriptjavascript:: :",
|
||||||
|
+ "vbjavascript:cript:",
|
||||||
|
+ ]
|
||||||
|
+ for style_code in style_codes:
|
||||||
|
+ html = '<style>%s</style>' % style_code
|
||||||
|
+ s = lxml.html.fragment_fromstring(html)
|
||||||
|
+
|
||||||
|
+ cleaned = lxml.html.tostring(clean_html(s))
|
||||||
|
+ self.assertEqual(
|
||||||
|
+ b'<style>/* deleted */</style>',
|
||||||
|
+ cleaned,
|
||||||
|
+ "%s -> %s" % (style_code, cleaned))
|
||||||
|
+
|
||||||
|
+ def test_sneaky_urls_in_style(self):
|
||||||
|
+ style_codes = [
|
||||||
|
+ "url(data:image/svg+xml;base64,...)",
|
||||||
|
+ "url(javasjavascript:cript:)",
|
||||||
|
+ "url(javasjavascript:cript: ::)",
|
||||||
|
+ "url(vbjavascript:cript:)",
|
||||||
|
+ "url(vbjavascript:cript: :)",
|
||||||
|
+ ]
|
||||||
|
+ for style_code in style_codes:
|
||||||
|
+ html = '<style>%s</style>' % style_code
|
||||||
|
+ s = lxml.html.fragment_fromstring(html)
|
||||||
|
+
|
||||||
|
+ cleaned = lxml.html.tostring(clean_html(s))
|
||||||
|
+ self.assertEqual(
|
||||||
|
+ b'<style>url()</style>',
|
||||||
|
+ cleaned,
|
||||||
|
+ "%s -> %s" % (style_code, cleaned))
|
||||||
|
+
|
||||||
|
def test_svg_data_links(self):
|
||||||
|
# Remove SVG images with potentially insecure content.
|
||||||
|
svg = b'<svg onload="alert(123)" />'
|
||||||
|
@@ -188,6 +228,29 @@ class CleanerTest(unittest.TestCase):
|
||||||
|
cleaned,
|
||||||
|
"%s -> %s" % (url, cleaned))
|
||||||
|
|
||||||
|
+ def test_image_data_links_in_style(self):
|
||||||
|
+ data = b'123'
|
||||||
|
+ data_b64 = base64.b64encode(data).decode('ASCII')
|
||||||
|
+ urls = [
|
||||||
|
+ "data:image/jpeg;base64," + data_b64,
|
||||||
|
+ "data:image/apng;base64," + data_b64,
|
||||||
|
+ "data:image/png;base64," + data_b64,
|
||||||
|
+ "data:image/gif;base64," + data_b64,
|
||||||
|
+ "data:image/webp;base64," + data_b64,
|
||||||
|
+ "data:image/bmp;base64," + data_b64,
|
||||||
|
+ "data:image/tiff;base64," + data_b64,
|
||||||
|
+ "data:image/x-icon;base64," + data_b64,
|
||||||
|
+ ]
|
||||||
|
+ for url in urls:
|
||||||
|
+ html = '<style> url(%s) </style>' % url
|
||||||
|
+ s = lxml.html.fragment_fromstring(html)
|
||||||
|
+
|
||||||
|
+ cleaned = lxml.html.tostring(clean_html(s))
|
||||||
|
+ self.assertEqual(
|
||||||
|
+ html.encode("UTF-8"),
|
||||||
|
+ cleaned,
|
||||||
|
+ "%s -> %s" % (url, cleaned))
|
||||||
|
+
|
||||||
|
def test_formaction_attribute_in_button_input(self):
|
||||||
|
# The formaction attribute overrides the form's action and should be
|
||||||
|
# treated as a malicious link attribute
|
||||||
|
--
|
||||||
|
2.13.7
|
||||||
|
|
||||||
56
backport-Work-around-libxml2-bug-in-affected-versions.patch
Normal file
56
backport-Work-around-libxml2-bug-in-affected-versions.patch
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
From c742576c105f40fc8b754fcae56fee4aa35840a3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
||||||
|
Date: Tue, 19 Jul 2022 08:25:20 +0200
|
||||||
|
Subject: [PATCH] Work around libxml2 bug in affected versions that failed to
|
||||||
|
reset the namespace count in the parser context.
|
||||||
|
|
||||||
|
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
|
||||||
|
---
|
||||||
|
src/lxml/includes/xmlparser.pxd | 1 +
|
||||||
|
src/lxml/parser.pxi | 3 +++
|
||||||
|
src/lxml/tests/test_etree.py | 3 +--
|
||||||
|
3 files changed, 5 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
|
||||||
|
index a196e34..45acfc8 100644
|
||||||
|
--- a/src/lxml/includes/xmlparser.pxd
|
||||||
|
+++ b/src/lxml/includes/xmlparser.pxd
|
||||||
|
@@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h":
|
||||||
|
void* userData
|
||||||
|
int* spaceTab
|
||||||
|
int spaceMax
|
||||||
|
+ int nsNr
|
||||||
|
bint html
|
||||||
|
bint progressive
|
||||||
|
int inSubset
|
||||||
|
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
|
||||||
|
index f5baf29..f0c8c6b 100644
|
||||||
|
--- a/src/lxml/parser.pxi
|
||||||
|
+++ b/src/lxml/parser.pxi
|
||||||
|
@@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext):
|
||||||
|
self._c_ctxt.disableSAX = 0 # work around bug in libxml2
|
||||||
|
else:
|
||||||
|
xmlparser.xmlClearParserCtxt(self._c_ctxt)
|
||||||
|
+ # work around bug in libxml2 [2.9.10 .. 2.9.14]:
|
||||||
|
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378
|
||||||
|
+ self._c_ctxt.nsNr = 0
|
||||||
|
|
||||||
|
cdef int prepare(self, bint set_document_loader=True) except -1:
|
||||||
|
cdef int result
|
||||||
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||||
|
index 8bf82c0..0339796 100644
|
||||||
|
--- a/src/lxml/tests/test_etree.py
|
||||||
|
+++ b/src/lxml/tests/test_etree.py
|
||||||
|
@@ -1491,8 +1491,7 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||||
|
# This would be the expected result, because there was no namespace
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
- # This is a bug in libxml2
|
||||||
|
- assert not ns, repr(ns)
|
||||||
|
+ assert False, "Found unexpected namespace '%s'" % ns
|
||||||
|
|
||||||
|
def test_itertext_comment_pi(self):
|
||||||
|
# https://bugs.launchpad.net/lxml/+bug/1844674
|
||||||
|
--
|
||||||
|
1.8.3.1
|
||||||
|
|
||||||
@ -7,7 +7,7 @@ The latest release works with all CPython versions from 2.7 to 3.7.
|
|||||||
|
|
||||||
Name: python-%{modname}
|
Name: python-%{modname}
|
||||||
Version: 4.5.2
|
Version: 4.5.2
|
||||||
Release: 5
|
Release: 9
|
||||||
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
|
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
|
||||||
License: BSD
|
License: BSD
|
||||||
URL: http://lxml.de
|
URL: http://lxml.de
|
||||||
@ -18,6 +18,10 @@ Patch6001: backport-CVE-2020-27783-2.patch
|
|||||||
Patch6002: backport-CVE-2021-28957.patch
|
Patch6002: backport-CVE-2021-28957.patch
|
||||||
Patch6003: backport-0001-CVE-2021-43818.patch
|
Patch6003: backport-0001-CVE-2021-43818.patch
|
||||||
Patch6004: backport-0002-CVE-2021-43818.patch
|
Patch6004: backport-0002-CVE-2021-43818.patch
|
||||||
|
Patch6005: backport-Cleaner-cover-some-more-cases-where-scripts-could-sn.patch
|
||||||
|
Patch6006: backport-CVE-2022-2309.patch
|
||||||
|
Patch6007: backport-Work-around-libxml2-bug-in-affected-versions.patch
|
||||||
|
Patch6008: backport-CVE-2024-37388.patch
|
||||||
|
|
||||||
BuildRequires: gcc libxml2-devel libxslt-devel
|
BuildRequires: gcc libxml2-devel libxslt-devel
|
||||||
|
|
||||||
@ -41,6 +45,8 @@ BuildRequires: python3-devel python3-setuptools python3-Cython
|
|||||||
|
|
||||||
%prep
|
%prep
|
||||||
%autosetup -n %{modname}-%{version} -p1
|
%autosetup -n %{modname}-%{version} -p1
|
||||||
|
# Remove pregenerated Cython C sources
|
||||||
|
find -type f -name '*.c' -print -delete
|
||||||
|
|
||||||
%build
|
%build
|
||||||
export WITH_CYTHON=true
|
export WITH_CYTHON=true
|
||||||
@ -68,6 +74,21 @@ make test3
|
|||||||
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
|
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Jun 12 2024 zhuofeng <zhuofeng2@huawei.com> - 4.5.2-9
|
||||||
|
- Type:CVE
|
||||||
|
- CVE:CVE-2024-37388
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:fix CVE-2024-37388
|
||||||
|
|
||||||
|
* Mon Jul 25 2022 shixuantong <shixuantong@h-partners.com> - 4.5.2-8
|
||||||
|
- Remove pregenerated Cython C sources
|
||||||
|
|
||||||
|
* Thu Jul 21 2022 renhongxun <renhongxun@h-partners.com> - 4.5.2-7
|
||||||
|
- fix CVE-2022-2309
|
||||||
|
|
||||||
|
* Sat Jan 22 2022 shixuantong <shixuantong@huawei.com> - 4.5.2-6
|
||||||
|
- Cleaner: cover some more cases where scripts could sneak through in specially crafted style content.
|
||||||
|
|
||||||
* Wed Jan 19 2022 shixuantong <shixuantong@huawei.com> - 4.5.2-5
|
* Wed Jan 19 2022 shixuantong <shixuantong@huawei.com> - 4.5.2-5
|
||||||
- enable check
|
- enable check
|
||||||
|
|
||||||
@ -80,7 +101,7 @@ make test3
|
|||||||
* Fri Feb 05 2021 shixuantong <shixuantong@huawei.com> - 4.5.2-2
|
* Fri Feb 05 2021 shixuantong <shixuantong@huawei.com> - 4.5.2-2
|
||||||
- fix CVE-2020-27783
|
- fix CVE-2020-27783
|
||||||
|
|
||||||
* Tue Jan 05 2020 shixuantong <shixuantong@huawei.com> - 4.5.2-1
|
* Tue Jan 05 2021 shixuantong <shixuantong@huawei.com> - 4.5.2-1
|
||||||
- update version to 4.5.2
|
- update version to 4.5.2
|
||||||
|
|
||||||
* Fri Aug 21 2020 shixuantong <shixuantong@huawei.com> - 4.2.3-5
|
* Fri Aug 21 2020 shixuantong <shixuantong@huawei.com> - 4.2.3-5
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user