From b7c0e5ea530e2c07cd4d106505f02ca2cb7799c6 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 13:41:51 -0500 Subject: [PATCH 1/5] bpo-40480 "fnmatch" exponential execution time --- Lib/fnmatch.py | 64 +++++++++++++++++++++++++++++++++++----- Lib/test/test_fnmatch.py | 17 +++++++++++ 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index b98e6413295e1c..547695377fc828 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -77,15 +77,19 @@ def translate(pat): There is no way to quote meta-characters. """ + STAR = object() + res = [] + add = res.append i, n = 0, len(pat) - res = '' while i < n: c = pat[i] i = i+1 if c == '*': - res = res + '.*' + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) elif c == '?': - res = res + '.' + add('.') elif c == '[': j = i if j < n and pat[j] == '!': @@ -95,7 +99,7 @@ def translate(pat): while j < n and pat[j] != ']': j = j+1 if j >= n: - res = res + '\\[' + add('\\[') else: stuff = pat[i:j] if '--' not in stuff: @@ -122,7 +126,53 @@ def translate(pat): stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) + add(f'[{stuff}]') else: - res = res + re.escape(c) - return r'(?s:%s)\Z' % res + add(re.escape(c)) + assert i == n + + # Deal with STARs. + inp = res + res = [] + add = res.append + i, n = 0, len(inp) + # Fixed piece at the start? + fixed = [] + while i < n and inp[i] is not STAR: + add(inp[i]) + i += 1 + if fixed: + add("".join(fixed)) + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # We can't spell that directly, but can trick it into working by + # by matching + # .*?fixed + # in a lookahead assertion, save the matched part in a group, then + # consume that group via a backreference. If the overall match fails, + # the lookahead assertion won't try alternatives. So the translation is: + # (?=(P.*?fixed))(?P=name) + # Group names are created as needed: g1, g2, g3, ... + groupnum = 0 + while i < n: + assert inp[i] is STAR + i += 1 + if i == n: + add(".*") + break + assert inp[i] is not STAR + fixed = [] + while i < n and inp[i] is not STAR: + fixed.append(inp[i]) + i += 1 + fixed = "".join(fixed) + if i == n: + add(".*") + add(fixed) + else: + groupnum += 1 + add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + assert i == n + res = "".join(res) + return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 55f9f0d3a5425a..4cec840d4cde17 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -44,6 +44,23 @@ def test_fnmatch(self): check('foo\nbar\n', 'foo*') check('\nfoo', 'foo*', False) check('\n', '*') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') + + def test_slow_fnmatch(self): + check = self.check_match + check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') + # The next "takes forever" if the regexp translation is + # straightforward. + check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') From a158f625dc51cdff1f7bdbc4146e8540572e78d5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 13:48:28 -0500 Subject: [PATCH 2/5] trivial edits --- Lib/fnmatch.py | 3 +-- Lib/test/test_fnmatch.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 547695377fc828..03d369778271fe 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -146,8 +146,7 @@ def translate(pat): # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. - # We can't spell that directly, but can trick it into working by - # by matching + # We can't spell that directly, but can trick it into working by matching # .*?fixed # in a lookahead assertion, save the matched part in a group, then # consume that group via a backreference. If the overall match fails, diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 4cec840d4cde17..1e0f9c0fdb0bf2 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -59,7 +59,7 @@ def test_slow_fnmatch(self): check = self.check_match check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') # The next "takes forever" if the regexp translation is - # straightforward. + # straightforward. See bpo-40480. check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) def test_mix_bytes_str(self): From 1d61b7fa9120e59b3b03637b00219b4b643eee08 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 4 May 2020 21:21:46 +0000 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst diff --git a/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst new file mode 100644 index 00000000000000..d046b1422419d7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst @@ -0,0 +1 @@ +``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking. \ No newline at end of file From fbb7048393041e4a691cb87572212ea8257dd745 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 16:27:27 -0500 Subject: [PATCH 4/5] Moved the block of new translate() tests into the right test function. --- Lib/test/test_fnmatch.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 1e0f9c0fdb0bf2..4c173069503cc6 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -44,16 +44,6 @@ def test_fnmatch(self): check('foo\nbar\n', 'foo*') check('\nfoo', 'foo*', False) check('\n', '*') - # from the docs - self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') - # squash consecutive stars - self.assertEqual(translate('*********'), r'(?s:.*)\Z') - self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') - self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') - self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') - # fancy translation to prevent exponential-time match failure - self.assertEqual(translate('**a*a****a'), - r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') def test_slow_fnmatch(self): check = self.check_match @@ -124,6 +114,16 @@ def test_translate(self): self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') self.assertEqual(translate('[x'), r'(?s:\[x)\Z') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') class FilterTestCase(unittest.TestCase): From 86d1d0d817168d54ecec08cffec3b9b5eb092982 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 16:35:39 -0500 Subject: [PATCH 5/5] Removed useless code from an earlier version. --- Lib/fnmatch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 03d369778271fe..d7d915d51314da 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -136,13 +136,10 @@ def translate(pat): res = [] add = res.append i, n = 0, len(inp) - # Fixed piece at the start? - fixed = [] + # Fixed pieces at the start? while i < n and inp[i] is not STAR: add(inp[i]) i += 1 - if fixed: - add("".join(fixed)) # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking.