Skip to content

Commit 612e422

Browse files
authored
bpo-46576: Speed up test_peg_generator by using a static library for shared sources (GH-32338)
Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code.
1 parent 1ba82d4 commit 612e422

File tree

3 files changed

+123
-44
lines changed

3 files changed

+123
-44
lines changed

Lib/test/test_peg_generator/test_c_parser.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,30 @@ def test_parse(self):
7272

7373
@support.requires_subprocess()
7474
class TestCParser(unittest.TestCase):
75+
76+
@classmethod
77+
def setUpClass(cls):
78+
# When running under regtest, a seperate tempdir is used
79+
# as the current directory and watched for left-overs.
80+
# Reusing that as the base for temporary directories
81+
# ensures everything is cleaned up properly and
82+
# cleans up afterwards if not (with warnings).
83+
cls.tmp_base = os.getcwd()
84+
if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
85+
cls.tmp_base = None
86+
# Create a directory for the reuseable static library part of
87+
# the pegen extension build process. This greatly reduces the
88+
# runtime overhead of spawning compiler processes.
89+
cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
90+
cls.addClassCleanup(shutil.rmtree, cls.library_dir)
91+
7592
def setUp(self):
7693
self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
7794
cmd = support.missing_compiler_executable()
7895
if cmd is not None:
7996
self.skipTest("The %r command is not found" % cmd)
8097
self.old_cwd = os.getcwd()
81-
self.tmp_path = tempfile.mkdtemp()
98+
self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
8299
change_cwd = os_helper.change_cwd(self.tmp_path)
83100
change_cwd.__enter__()
84101
self.addCleanup(change_cwd.__exit__, None, None, None)
@@ -91,7 +108,10 @@ def tearDown(self):
91108

92109
def build_extension(self, grammar_source):
93110
grammar = parse_string(grammar_source, GrammarParser)
94-
generate_parser_c_extension(grammar, Path(self.tmp_path))
111+
# Because setUp() already changes the current directory to the
112+
# temporary path, use a relative path here to prevent excessive
113+
# path lengths when compiling.
114+
generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
95115

96116
def run_test(self, grammar_source, test_source):
97117
self.build_extension(grammar_source)

Tools/peg_generator/pegen/build.py

Lines changed: 91 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import itertools
22
import pathlib
3-
import shutil
43
import sys
54
import sysconfig
65
import tempfile
@@ -33,7 +32,8 @@ def compile_c_extension(
3332
build_dir: Optional[str] = None,
3433
verbose: bool = False,
3534
keep_asserts: bool = True,
36-
disable_optimization: bool = True, # Significant test_peg_generator speedup.
35+
disable_optimization: bool = False,
36+
library_dir: Optional[str] = None,
3737
) -> str:
3838
"""Compile the generated source for a parser generator into an extension module.
3939
@@ -44,15 +44,21 @@ def compile_c_extension(
4444
4545
If *build_dir* is provided, that path will be used as the temporary build directory
4646
of distutils (this is useful in case you want to use a temporary directory).
47+
48+
If *library_dir* is provided, that path will be used as the directory for a
49+
static library of the common parser sources (this is useful in case you are
50+
creating multiple extensions).
4751
"""
4852
import distutils.log
49-
from distutils.command.build_ext import build_ext # type: ignore
50-
from distutils.command.clean import clean # type: ignore
5153
from distutils.core import Distribution, Extension
5254
from distutils.tests.support import fixup_build_ext # type: ignore
5355

56+
from distutils.ccompiler import new_compiler
57+
from distutils.dep_util import newer_group
58+
from distutils.sysconfig import customize_compiler
59+
5460
if verbose:
55-
distutils.log.set_verbosity(distutils.log.DEBUG)
61+
distutils.log.set_threshold(distutils.log.DEBUG)
5662

5763
source_file_path = pathlib.Path(generated_source_path)
5864
extension_name = source_file_path.stem
@@ -71,46 +77,92 @@ def compile_c_extension(
7177
extra_compile_args.append("-O0")
7278
if sysconfig.get_config_var("GNULD") == "yes":
7379
extra_link_args.append("-fno-lto")
74-
extension = [
75-
Extension(
76-
extension_name,
77-
sources=[
78-
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
79-
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
80-
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
81-
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
82-
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
83-
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
84-
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
85-
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
86-
generated_source_path,
87-
],
88-
include_dirs=[
89-
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
90-
str(MOD_DIR.parent.parent.parent / "Parser"),
91-
],
92-
extra_compile_args=extra_compile_args,
93-
extra_link_args=extra_link_args,
94-
)
80+
81+
common_sources = [
82+
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
83+
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
84+
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
85+
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
86+
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
87+
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
88+
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
89+
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
90+
]
91+
include_dirs = [
92+
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
93+
str(MOD_DIR.parent.parent.parent / "Parser"),
9594
]
96-
dist = Distribution({"name": extension_name, "ext_modules": extension})
97-
cmd = build_ext(dist)
95+
extension = Extension(
96+
extension_name,
97+
sources=[generated_source_path],
98+
extra_compile_args=extra_compile_args,
99+
extra_link_args=extra_link_args,
100+
)
101+
dist = Distribution({"name": extension_name, "ext_modules": [extension]})
102+
cmd = dist.get_command_obj("build_ext")
98103
fixup_build_ext(cmd)
99-
cmd.inplace = True
104+
cmd.build_lib = str(source_file_path.parent)
105+
cmd.include_dirs = include_dirs
100106
if build_dir:
101107
cmd.build_temp = build_dir
102-
cmd.build_lib = build_dir
103108
cmd.ensure_finalized()
104-
cmd.run()
105-
106-
extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
107-
shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
108-
109-
cmd = clean(dist)
110-
cmd.finalize_options()
111-
cmd.run()
112109

113-
return extension_path
110+
compiler = new_compiler()
111+
customize_compiler(compiler)
112+
compiler.set_include_dirs(cmd.include_dirs)
113+
compiler.set_library_dirs(cmd.library_dirs)
114+
# build static lib
115+
if library_dir:
116+
library_filename = compiler.library_filename(extension_name,
117+
output_dir=library_dir)
118+
if newer_group(common_sources, library_filename, 'newer'):
119+
if sys.platform == 'win32':
120+
pdb = compiler.static_lib_format % (extension_name, '.pdb')
121+
compile_opts = [f"/Fd{library_dir}\\{pdb}"]
122+
compile_opts.extend(extra_compile_args)
123+
else:
124+
compile_opts = extra_compile_args
125+
objects = compiler.compile(common_sources,
126+
output_dir=library_dir,
127+
debug=cmd.debug,
128+
extra_postargs=compile_opts)
129+
compiler.create_static_lib(objects, extension_name,
130+
output_dir=library_dir,
131+
debug=cmd.debug)
132+
if sys.platform == 'win32':
133+
compiler.add_library_dir(library_dir)
134+
extension.libraries = [extension_name]
135+
elif sys.platform == 'darwin':
136+
compiler.set_link_objects([
137+
'-Wl,-force_load', library_filename,
138+
])
139+
else:
140+
compiler.set_link_objects([
141+
'-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
142+
])
143+
else:
144+
extension.sources[0:0] = common_sources
145+
146+
# Compile the source code to object files.
147+
ext_path = cmd.get_ext_fullpath(extension_name)
148+
if newer_group(extension.sources, ext_path, 'newer'):
149+
objects = compiler.compile(extension.sources,
150+
output_dir=cmd.build_temp,
151+
debug=cmd.debug,
152+
extra_postargs=extra_compile_args)
153+
else:
154+
objects = compiler.object_filenames(extension.sources,
155+
output_dir=cmd.build_temp)
156+
# Now link the object files together into a "shared object"
157+
compiler.link_shared_object(
158+
objects, ext_path,
159+
libraries=cmd.get_libraries(extension),
160+
extra_postargs=extra_link_args,
161+
export_symbols=cmd.get_export_symbols(extension),
162+
debug=cmd.debug,
163+
build_temp=cmd.build_temp)
164+
165+
return pathlib.Path(ext_path)
114166

115167

116168
def build_parser(

Tools/peg_generator/pegen/testutil.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import textwrap
77
import token
88
import tokenize
9-
from typing import IO, Any, Dict, Final, Type, cast
9+
from typing import IO, Any, Dict, Final, Optional, Type, cast
1010

1111
from pegen.build import compile_c_extension
1212
from pegen.c_generator import CParserGenerator
@@ -83,7 +83,8 @@ def generate_c_parser_source(grammar: Grammar) -> str:
8383

8484

8585
def generate_parser_c_extension(
86-
grammar: Grammar, path: pathlib.PurePath, debug: bool = False
86+
grammar: Grammar, path: pathlib.PurePath, debug: bool = False,
87+
library_dir: Optional[str] = None,
8788
) -> Any:
8889
"""Generate a parser c extension for the given grammar in the given path
8990
@@ -101,7 +102,13 @@ def generate_parser_c_extension(
101102
grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
102103
)
103104
genr.generate("parse.c")
104-
compile_c_extension(str(source), build_dir=str(path))
105+
compile_c_extension(
106+
str(source),
107+
build_dir=str(path),
108+
# Significant test_peg_generator speedups
109+
disable_optimization=True,
110+
library_dir=library_dir,
111+
)
105112

106113

107114
def print_memstats() -> bool:

0 commit comments

Comments
 (0)