Skip to content

Commit b1b4c79

Browse files
authored
bpo-40480: restore ability to join fnmatch.translate() results (GH-20049)
In translate(), generate unique group names across calls. The restores the undocumented ability to get a valid regexp by joining multiple translate() results via `|`.
1 parent d0919f0 commit b1b4c79

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

Lib/fnmatch.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616

1717
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
1818

19+
# Build a thread-safe incrementing counter to help create unique regexp group
20+
# names across calls.
21+
from itertools import count
22+
_nextgroupnum = count().__next__
23+
del count
24+
1925
def fnmatch(name, pat):
2026
"""Test whether FILENAME matches PATTERN.
2127
@@ -148,9 +154,12 @@ def translate(pat):
148154
# in a lookahead assertion, save the matched part in a group, then
149155
# consume that group via a backreference. If the overall match fails,
150156
# the lookahead assertion won't try alternatives. So the translation is:
151-
# (?=(P<name>.*?fixed))(?P=name)
152-
# Group names are created as needed: g1, g2, g3, ...
153-
groupnum = 0
157+
# (?=(?P<name>.*?fixed))(?P=name)
158+
# Group names are created as needed: g0, g1, g2, ...
159+
# The numbers are obtained from _nextgroupnum() to ensure they're unique
160+
# across calls and across threads. This is because people rely on the
161+
# undocumented ability to join multiple translate() results together via
162+
# "|" to build large regexps matching "one of many" shell patterns.
154163
while i < n:
155164
assert inp[i] is STAR
156165
i += 1
@@ -167,7 +176,7 @@ def translate(pat):
167176
add(".*")
168177
add(fixed)
169178
else:
170-
groupnum += 1
179+
groupnum = _nextgroupnum()
171180
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
172181
assert i == n
173182
res = "".join(res)

Lib/test/test_fnmatch.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def test_warnings(self):
106106
class TranslateTestCase(unittest.TestCase):
107107

108108
def test_translate(self):
109+
import re
109110
self.assertEqual(translate('*'), r'(?s:.*)\Z')
110111
self.assertEqual(translate('?'), r'(?s:.)\Z')
111112
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -122,9 +123,26 @@ def test_translate(self):
122123
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
123124
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
124125
# fancy translation to prevent exponential-time match failure
125-
self.assertEqual(translate('**a*a****a'),
126-
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
127-
126+
t = translate('**a*a****a')
127+
digits = re.findall(r'\d+', t)
128+
self.assertEqual(len(digits), 4)
129+
self.assertEqual(digits[0], digits[1])
130+
self.assertEqual(digits[2], digits[3])
131+
g1 = f"g{digits[0]}" # e.g., group name "g4"
132+
g2 = f"g{digits[2]}" # e.g., group name "g5"
133+
self.assertEqual(t,
134+
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
135+
# and try pasting multiple translate results - it's an undocumented
136+
# feature that this works; all the pain of generating unique group
137+
# names across calls exists to support this
138+
r1 = translate('**a**a**a*')
139+
r2 = translate('**b**b**b*')
140+
r3 = translate('*c*c*c*')
141+
fatre = "|".join([r1, r2, r3])
142+
self.assertTrue(re.match(fatre, 'abaccad'))
143+
self.assertTrue(re.match(fatre, 'abxbcab'))
144+
self.assertTrue(re.match(fatre, 'cbabcaxc'))
145+
self.assertFalse(re.match(fatre, 'dabccbad'))
128146

129147
class FilterTestCase(unittest.TestCase):
130148

0 commit comments

Comments
 (0)