50 lines
1.7 KiB
Diff
50 lines
1.7 KiB
Diff
From 89e7aad6e7ff9ecd88678ff25f885988b184b26e Mon Sep 17 00:00:00 2001
|
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
|
Date: Sun, 18 Oct 2020 10:06:46 +0200
|
|
Subject: [PATCH] Prevent combinations of <noscript> and <style> to sneak
|
|
JavaScript through the HTML cleaner.
|
|
|
|
---
|
|
src/lxml/html/clean.py | 3 +++
|
|
src/lxml/html/tests/test_clean.py | 10 ++++++++++
|
|
2 files changed, 13 insertions(+)
|
|
|
|
diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py
|
|
index 6b19213..6775ac3 100644
|
|
--- a/src/lxml/html/clean.py
|
|
+++ b/src/lxml/html/clean.py
|
|
@@ -537,6 +537,9 @@ class Cleaner(object):
|
|
return True
|
|
if 'expression(' in style:
|
|
return True
|
|
+ if '</noscript' in style:
|
|
+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
|
|
+ return True
|
|
return False
|
|
|
|
def clean_html(self, html):
|
|
diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py
|
|
index 4477337..3c8ee25 100644
|
|
--- a/src/lxml/html/tests/test_clean.py
|
|
+++ b/src/lxml/html/tests/test_clean.py
|
|
@@ -103,6 +103,16 @@ class CleanerTest(unittest.TestCase):
|
|
'<p><span>Cy<!-- xx -->an</span><!-- XXX --></p>',
|
|
cleaner.clean_html(html))
|
|
|
|
+ def test_sneaky_noscript_in_style(self):
|
|
+ # This gets parsed as <noscript> -> <style>"...</noscript>..."</style>
|
|
+ # thus passing the </noscript> through into the output.
|
|
+ html = '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
|
|
+ s = lxml.html.fragment_fromstring(html)
|
|
+
|
|
+ self.assertEqual(
|
|
+ b'<noscript><style>/* deleted */</style></noscript>',
|
|
+ lxml.html.tostring(clean_html(s)))
|
|
+
|
|
|
|
def test_suite():
|
|
suite = unittest.TestSuite()
|
|
--
|
|
1.8.3.1
|
|
|