From 1bad5b2ebc2f3cb663ce425b9979b4ec4dce27b2 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Thu, 6 Apr 2023 03:30:44 +0000 Subject: [PATCH] fix CVE-2023-24329 --- Lib/test/test_urlparse.py | 7 +++++++ Lib/urllib/parse.py | 1 + 2 files changed, 8 insertions(+) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 5655fc3..bddc7b4 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -694,6 +694,13 @@ class UrlParseTestCase(unittest.TestCase): else: self.assertEqual(p.scheme, "") + def test_attributes_bad_scheme_CVE_2023_24329(self): + """Check handling of invalid schemes that starts with blank characters.""" + for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): + url = " https://www.example.net" + p = parse(url) + self.assertEqual(p.scheme, "https") + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index fc4d8b7..9a867a0 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -423,6 +423,7 @@ def urlsplit(url, scheme='', allow_fragments=True): Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + url = url.lstrip() url, scheme, _coerce_result = _coerce_args(url, scheme) url = _remove_unsafe_bytes_from_url(url) scheme = _remove_unsafe_bytes_from_url(scheme) -- 2.27.0