Skip to content

bpo-45653: Freeze parts of the encodings package #30030

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions Makefile.pre.in
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,10 @@ DEEPFREEZE_OBJS = \
Python/deepfreeze/zipimport.o \
Python/deepfreeze/abc.o \
Python/deepfreeze/codecs.o \
Python/deepfreeze/encodings.o \
Python/deepfreeze/encodings.aliases.o \
Python/deepfreeze/encodings.ascii.o \
Python/deepfreeze/encodings.utf_8.o \
Python/deepfreeze/io.o \
Python/deepfreeze/_collections_abc.o \
Python/deepfreeze/_sitebuiltins.o \
Expand Down Expand Up @@ -1008,6 +1012,18 @@ Python/deepfreeze/abc.c: Python/frozen_modules/abc.h $(DEEPFREEZE_DEPS)
Python/deepfreeze/codecs.c: Python/frozen_modules/codecs.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/codecs.h -m codecs -o Python/deepfreeze/codecs.c

Python/deepfreeze/encodings.c: Python/frozen_modules/encodings.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/encodings.h -m encodings -o Python/deepfreeze/encodings.c

Python/deepfreeze/encodings.aliases.c: Python/frozen_modules/encodings.aliases.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/encodings.aliases.h -m encodings.aliases -o Python/deepfreeze/encodings.aliases.c

Python/deepfreeze/encodings.ascii.c: Python/frozen_modules/encodings.ascii.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/encodings.ascii.h -m encodings.ascii -o Python/deepfreeze/encodings.ascii.c

Python/deepfreeze/encodings.utf_8.c: Python/frozen_modules/encodings.utf_8.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/encodings.utf_8.h -m encodings.utf_8 -o Python/deepfreeze/encodings.utf_8.c

Python/deepfreeze/io.c: Python/frozen_modules/io.h $(DEEPFREEZE_DEPS)
$(PYTHON_FOR_FREEZE) $(srcdir)/Tools/scripts/deepfreeze.py Python/frozen_modules/io.h -m io -o Python/deepfreeze/io.c

Expand Down Expand Up @@ -1098,6 +1114,10 @@ FROZEN_FILES_IN = \
Lib/zipimport.py \
Lib/abc.py \
Lib/codecs.py \
Lib/encodings/__init__.py \
Lib/encodings/aliases.py \
Lib/encodings/ascii.py \
Lib/encodings/utf_8.py \
Lib/io.py \
Lib/_collections_abc.py \
Lib/_sitebuiltins.py \
Expand All @@ -1123,6 +1143,10 @@ FROZEN_FILES_OUT = \
Python/frozen_modules/zipimport.h \
Python/frozen_modules/abc.h \
Python/frozen_modules/codecs.h \
Python/frozen_modules/encodings.h \
Python/frozen_modules/encodings.aliases.h \
Python/frozen_modules/encodings.ascii.h \
Python/frozen_modules/encodings.utf_8.h \
Python/frozen_modules/io.h \
Python/frozen_modules/_collections_abc.h \
Python/frozen_modules/_sitebuiltins.h \
Expand Down Expand Up @@ -1171,6 +1195,18 @@ Python/frozen_modules/abc.h: Lib/abc.py $(FREEZE_MODULE_DEPS)
Python/frozen_modules/codecs.h: Lib/codecs.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) codecs $(srcdir)/Lib/codecs.py Python/frozen_modules/codecs.h

Python/frozen_modules/encodings.h: Lib/encodings/__init__.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) encodings $(srcdir)/Lib/encodings/__init__.py Python/frozen_modules/encodings.h

Python/frozen_modules/encodings.aliases.h: Lib/encodings/aliases.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) encodings.aliases $(srcdir)/Lib/encodings/aliases.py Python/frozen_modules/encodings.aliases.h

Python/frozen_modules/encodings.ascii.h: Lib/encodings/ascii.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) encodings.ascii $(srcdir)/Lib/encodings/ascii.py Python/frozen_modules/encodings.ascii.h

Python/frozen_modules/encodings.utf_8.h: Lib/encodings/utf_8.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) encodings.utf_8 $(srcdir)/Lib/encodings/utf_8.py Python/frozen_modules/encodings.utf_8.h

Python/frozen_modules/io.h: Lib/io.py $(FREEZE_MODULE_DEPS)
$(FREEZE_MODULE) io $(srcdir)/Lib/io.py Python/frozen_modules/io.h

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Parts of the :mod:`encodings` package is now frozen.
28 changes: 28 additions & 0 deletions PCbuild/_freeze_module.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,34 @@
<DeepIntFile>$(IntDir)codecs.g.c</DeepIntFile>
<DeepOutFile>$(PySourcePath)Python\deepfreeze\df.codecs.c</DeepOutFile>
</None>
<None Include="..\Lib\encodings\__init__.py">
<ModName>encodings</ModName>
<IntFile>$(IntDir)encodings.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\encodings.h</OutFile>
<DeepIntFile>$(IntDir)encodings.g.c</DeepIntFile>
<DeepOutFile>$(PySourcePath)Python\deepfreeze\df.encodings.c</DeepOutFile>
</None>
<None Include="..\Lib\encodings\aliases.py">
<ModName>encodings.aliases</ModName>
<IntFile>$(IntDir)encodings.aliases.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\encodings.aliases.h</OutFile>
<DeepIntFile>$(IntDir)encodings.aliases.g.c</DeepIntFile>
<DeepOutFile>$(PySourcePath)Python\deepfreeze\df.encodings.aliases.c</DeepOutFile>
</None>
<None Include="..\Lib\encodings\ascii.py">
<ModName>encodings.ascii</ModName>
<IntFile>$(IntDir)encodings.ascii.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\encodings.ascii.h</OutFile>
<DeepIntFile>$(IntDir)encodings.ascii.g.c</DeepIntFile>
<DeepOutFile>$(PySourcePath)Python\deepfreeze\df.encodings.ascii.c</DeepOutFile>
</None>
<None Include="..\Lib\encodings\utf_8.py">
<ModName>encodings.utf_8</ModName>
<IntFile>$(IntDir)encodings.utf_8.g.h</IntFile>
<OutFile>$(PySourcePath)Python\frozen_modules\encodings.utf_8.h</OutFile>
<DeepIntFile>$(IntDir)encodings.utf_8.g.c</DeepIntFile>
<DeepOutFile>$(PySourcePath)Python\deepfreeze\df.encodings.utf_8.c</DeepOutFile>
</None>
<None Include="..\Lib\io.py">
<ModName>io</ModName>
<IntFile>$(IntDir)io.g.h</IntFile>
Expand Down
12 changes: 12 additions & 0 deletions PCbuild/_freeze_module.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,18 @@
<None Include="..\Lib\codecs.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\encodings\__init__.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\encodings\aliases.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\encodings\ascii.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\encodings\utf_8.py">
<Filter>Python Files</Filter>
</None>
<None Include="..\Lib\io.py">
<Filter>Python Files</Filter>
</None>
Expand Down
4 changes: 4 additions & 0 deletions PCbuild/pythoncore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,10 @@
<ClCompile Include="..\Python\deepfreeze\df.zipimport.c" />
<ClCompile Include="..\Python\deepfreeze\df.abc.c" />
<ClCompile Include="..\Python\deepfreeze\df.codecs.c" />
<ClCompile Include="..\Python\deepfreeze\df.encodings.c" />
<ClCompile Include="..\Python\deepfreeze\df.encodings.aliases.c" />
<ClCompile Include="..\Python\deepfreeze\df.encodings.ascii.c" />
<ClCompile Include="..\Python\deepfreeze\df.encodings.utf_8.c" />
<ClCompile Include="..\Python\deepfreeze\df.io.c" />
<ClCompile Include="..\Python\deepfreeze\df._collections_abc.c" />
<ClCompile Include="..\Python\deepfreeze\df._sitebuiltins.c" />
Expand Down
66 changes: 66 additions & 0 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Copyright (c) Corporation for National Research Initiatives.
#include "pycore_interp.h" // PyInterpreterState.codec_search_path
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "pycore_fileutils.h" // _Py_join_relfile()
#include <ctype.h>

const char *Py_hexdigits = "0123456789abcdef";
Expand Down Expand Up @@ -1405,6 +1406,66 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
return PyCodec_SurrogateEscapeErrors(exc);
}

/* Set encodings.__path__ for frozen encodings package
*
* The encodings package is frozen but most encodings modules are not. The
* __path__ attribute of the encodings package must be reset so importlib is
* able to find the pure Python modules.
* Returns -1 on error
*/
static int
_set_encodings_path(PyObject *mod) {
Comment on lines +1409 to +1417
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... This should not be necessary. It is already handled by FrozenImporter.find_spec() (in Lib/importlib/_bootstrap.py). If path is not getting set then something went wrong and needs to be fixed.

Is it here to provide a fallback for the config->stdlib_dir == NULL case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something is not working right when embedding Python.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😞

I'll try to take a look this week.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I spent some time looking at this today. I take back what I said: "This should not be necessary." The approach you took is probably good enough until we can find a better solution. I plan on troubleshooting the test_embed failures if you don't figure them out first.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that sys._stdlib_dir is set to None. This can happen in some embedding scenarios (for now). This is the reason why FrozenImporter.find_spec() doesn't populate encodings.__path__ in the failing tests. It is why _set_encodings_path() is failing.

The problem is that sys._stdlib_dir is set to None. sys._stdlib_dir is set from _Py_GetStdlibDir(), which returns the value calculated by the getpath.c code during runtime init. In some embedding cases that code refuses to extrapolate the stdlib dir, so it ends up None.

FrozenImporter.find_spec() uses sys._stdlib_dir to figured out the encodings.__path__ entry to add. If the stdlib dir is unknown then it doesn't add any. This is also why _set_encodings_path() isn't working.

We have several options:

  1. extrapolate sys._stdlib_dir some other way
  2. fall back to using the non-frozen encodings module
  3. freeze all "encodings" submodules

(2) effectively accomplishes the same thing as (1), though it doesn't actually update sys._stdlib_dir. Furthermore, we already know it works. (2) also has the benefit of being very simple, since we'd use the normal import machinery unchanged. (Note that (1) and (2) are not guaranteed to find the stdlib dir. However, with (2) that failure mode already exists, so embedders would already have to deal with it.) (3) would get what we want but would make the compiled binary bigger and would add a bunch of noise to make output when building.

So I recommend (2).


(2) involves 2 things: drop _set_encodings_path() here, and update FrozenImporter.find_spec().

diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
index afb95f4e1d..4200afae7c 100644
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -941,6 +941,9 @@ def find_spec(cls, fullname, path=None, target=None):
                                 origin=cls._ORIGIN,
                                 is_package=ispkg)
         filename, pkgdir = cls._resolve_filename(origname, fullname, ispkg)
+        if ispkg and not pkgdir:
+            # We can't resolve __path__, so Fall back to the path finder.
+            return None
         spec.loader_state = type(sys.implementation)(
             filename=filename,
             origname=origname,

int rc = -1;
PyObject *path = NULL;
PyObject *encodings_dir = NULL;

// borrowed ref
PyObject *stdlib_dir = PySys_GetObject("_stdlib_dir");
if (stdlib_dir == NULL) {
rc = 0;
goto exit;
}

Py_ssize_t size;
wchar_t *stdlib_dirw = PyUnicode_AsWideCharString(stdlib_dir, &size);
if (stdlib_dirw == NULL) {
goto exit;
}

// encodings_dir = os.path.join(sys._stdlib_dir, "encodings")
wchar_t *encodings_dirw = _Py_join_relfile(stdlib_dirw, L"encodings");
PyMem_Free((void *)stdlib_dirw);
if (encodings_dirw == NULL) {
PyErr_NoMemory();
goto exit;
}

encodings_dir = PyUnicode_FromWideChar(encodings_dirw, -1);
PyMem_RawFree((void *)encodings_dirw);
if (encodings_dir == NULL) {
goto exit;
}

path = PyList_New(0);
if (path == NULL) {
goto exit;
}
if (PyList_Append(path, encodings_dir) < 0) {
goto exit;
}
// encodings.__path__ = [encodings_dir]
if (PyObject_SetAttrString(mod, "__path__", path) < 0) {
goto exit;
}

rc = 0;

exit:
Py_XDECREF(path);
Py_XDECREF(encodings_dir);
return rc;
}

static int _PyCodecRegistry_Init(void)
{
static struct {
Expand Down Expand Up @@ -1531,6 +1592,11 @@ static int _PyCodecRegistry_Init(void)
if (mod == NULL) {
return -1;
}

if (_set_encodings_path(mod) < 0) {
Py_DECREF(mod);
return -1;
}
Py_DECREF(mod);
interp->codecs_initialized = 1;
return 0;
Expand Down
12 changes: 12 additions & 0 deletions Python/frozen.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
#include "frozen_modules/zipimport.h"
#include "frozen_modules/abc.h"
#include "frozen_modules/codecs.h"
#include "frozen_modules/encodings.h"
#include "frozen_modules/encodings.aliases.h"
#include "frozen_modules/encodings.ascii.h"
#include "frozen_modules/encodings.utf_8.h"
#include "frozen_modules/io.h"
#include "frozen_modules/_collections_abc.h"
#include "frozen_modules/_sitebuiltins.h"
Expand Down Expand Up @@ -72,6 +76,10 @@ extern PyObject *_Py_get_importlib__bootstrap_external_toplevel(void);
extern PyObject *_Py_get_zipimport_toplevel(void);
extern PyObject *_Py_get_abc_toplevel(void);
extern PyObject *_Py_get_codecs_toplevel(void);
extern PyObject *_Py_get_encodings_toplevel(void);
extern PyObject *_Py_get_encodings_aliases_toplevel(void);
extern PyObject *_Py_get_encodings_ascii_toplevel(void);
extern PyObject *_Py_get_encodings_utf_8_toplevel(void);
extern PyObject *_Py_get_io_toplevel(void);
extern PyObject *_Py_get__collections_abc_toplevel(void);
extern PyObject *_Py_get__sitebuiltins_toplevel(void);
Expand Down Expand Up @@ -110,6 +118,10 @@ static const struct _frozen stdlib_modules[] = {
/* stdlib - startup, without site (python -S) */
{"abc", _Py_M__abc, (int)sizeof(_Py_M__abc), GET_CODE(abc)},
{"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs), GET_CODE(codecs)},
{"encodings", _Py_M__encodings, -(int)sizeof(_Py_M__encodings), GET_CODE(encodings)},
{"encodings.aliases", _Py_M__encodings_aliases, (int)sizeof(_Py_M__encodings_aliases), GET_CODE(encodings_aliases)},
{"encodings.ascii", _Py_M__encodings_ascii, (int)sizeof(_Py_M__encodings_ascii), GET_CODE(encodings_ascii)},
{"encodings.utf_8", _Py_M__encodings_utf_8, (int)sizeof(_Py_M__encodings_utf_8), GET_CODE(encodings_utf_8)},
{"io", _Py_M__io, (int)sizeof(_Py_M__io), GET_CODE(io)},

/* stdlib - startup, with site */
Expand Down
8 changes: 4 additions & 4 deletions Tools/scripts/freeze_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
('stdlib - startup, without site (python -S)', [
'abc',
'codecs',
# For now we do not freeze the encodings, due # to the noise all
# those extra modules add to the text printed during the build.
# (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.)
#'<encodings.*>',
'<encodings>', # encodings.__init__
'encodings.aliases',
'encodings.ascii',
'encodings.utf_8',
'io',
]),
('stdlib - startup, with site', [
Expand Down