From a105ab8dc262ec6735977c25c13f0bdfcdec72a7 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Thu, 26 Nov 2020 09:20:52 +0100 Subject: [PATCH] Prevent combinations of and ' + return True return False def clean_html(self, html): diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py index 3c8ee25..0e669f9 100644 --- a/src/lxml/html/tests/test_clean.py +++ b/src/lxml/html/tests/test_clean.py @@ -113,6 +113,16 @@ class CleanerTest(unittest.TestCase): b'', lxml.html.tostring(clean_html(s))) + def test_sneaky_js_in_math_style(self): + # This gets parsed as -> + # thus passing any tag/script/whatever content through into the output. + html = '' + s = lxml.html.fragment_fromstring(html) + + self.assertEqual( + b'', + lxml.html.tostring(clean_html(s))) + def test_suite(): suite = unittest.TestSuite() diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 275be07..18e6c7e 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -104,7 +104,11 @@ >>> print(Cleaner(page_structure=False, comments=False).clean_html(doc)) - + @@ -126,7 +130,11 @@ >>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)) - + a link @@ -190,7 +198,11 @@ - + a link -- 1.8.3.1