python-ldap/backport-0001-CVE-2021-46823.patch

From 7e084aec1ba9ced25b44fd3db77e65242a827806 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ond=C5=99ej=20Kuzn=C3=ADk?= <ondra@mistotebe.net>
Date: Wed, 6 May 2020 15:48:46 +0100
Subject: [PATCH] Get rid of expected failures in tokenizer tests

https://github.com/python-ldap/python-ldap/pull/283
---
 Lib/ldap/schema/tokenizer.py     | 10 +++++++---
 Tests/t_ldap_schema_tokenizer.py |  6 ++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Lib/ldap/schema/tokenizer.py b/Lib/ldap/schema/tokenizer.py
index 20958c09..69823f2b 100644
--- a/Lib/ldap/schema/tokenizer.py
+++ b/Lib/ldap/schema/tokenizer.py
@@ -13,12 +13,16 @@
     r"|"              # or
     r"([^'$()\s]+)"   # string of length >= 1 without '$() or whitespace
     r"|"              # or
-    r"('.*?'(?!\w))"  # any string or empty string surrounded by single quotes
-                      # except if right quote is succeeded by alphanumeric char
+    r"('(?:[^'\\]|\\\\|\\.)*?'(?!\w))"
+                      # any string or empty string surrounded by unescaped
+                      # single quotes except if right quote is succeeded by
+                      # alphanumeric char
     r"|"              # or
     r"([^\s]+?)",     # residue, all non-whitespace strings
 ).findall

+UNESCAPE_PATTERN = re.compile(r"\\(.)")
+

 def split_tokens(s):
     """
@@ -30,7 +34,7 @@ def split_tokens(s):
         if unquoted:
             parts.append(unquoted)
         elif quoted:
-            parts.append(quoted[1:-1])
+            parts.append(UNESCAPE_PATTERN.sub(r'\1', quoted[1:-1]))
         elif opar:
             parens += 1
             parts.append(opar)
diff --git a/Tests/t_ldap_schema_tokenizer.py b/Tests/t_ldap_schema_tokenizer.py
index c8581771..0890379a 100644
--- a/Tests/t_ldap_schema_tokenizer.py
+++ b/Tests/t_ldap_schema_tokenizer.py
@@ -44,8 +44,8 @@

 # broken schema of Oracle Internet Directory
 TESTCASES_BROKEN_OID = (
-    ("BLUBB DI 'BLU B B ER'MUST 'BLAH' ", ['BLUBB', 'DI', 'BLU B B ER', 'MUST', 'BLAH']),
-    ("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]),
+    "BLUBB DI 'BLU B B ER'MUST 'BLAH' ", #['BLUBB', 'DI', 'BLU B B ER', 'MUST', 'BLAH']
+    "BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", #["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]
 )

 # for quoted single quotes inside string values
@@ -104,14 +104,12 @@ def test_utf8(self):
         """
         self._run_split_tokens_tests(TESTCASES_UTF8)

-    @unittest.expectedFailure
     def test_broken_oid(self):
         """
         run test cases specified in constant TESTCASES_BROKEN_OID
         """
         self._run_failure_tests(TESTCASES_BROKEN_OID)

-    @unittest.expectedFailure
     def test_escaped_quotes(self):
         """
         run test cases specified in constant TESTCASES_ESCAPED_QUOTES