Skip to content

Commit b744ba1

Browse files
author
Victor Stinner
committed
Issue #8610: Load file system codec at startup, and display a fatal error on
failure. Set the file system encoding to utf-8 (instead of None) if getting the locale encoding failed, or if nl_langinfo(CODESET) function is missing.
1 parent 06ba9ad commit b744ba1

File tree

4 files changed

+62
-27
lines changed

4 files changed

+62
-27
lines changed

Doc/library/sys.rst

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -298,15 +298,13 @@ always available.
298298

299299
.. function:: getfilesystemencoding()
300300

301-
Return the name of the encoding used to convert Unicode filenames into system
302-
file names, or ``None`` if the system default encoding is used. The result value
303-
depends on the operating system:
301+
Return the name of the encoding used to convert Unicode filenames into
302+
system file names. The result value depends on the operating system:
304303

305304
* On Mac OS X, the encoding is ``'utf-8'``.
306305

307306
* On Unix, the encoding is the user's preference according to the result of
308-
nl_langinfo(CODESET), or ``None`` if the ``nl_langinfo(CODESET)``
309-
failed.
307+
nl_langinfo(CODESET), or ``'utf-8'`` if ``nl_langinfo(CODESET)`` failed.
310308

311309
* On Windows NT+, file names are Unicode natively, so no conversion is
312310
performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as
@@ -316,6 +314,10 @@ always available.
316314

317315
* On Windows 9x, the encoding is ``'mbcs'``.
318316

317+
.. versionchanged:: 3.2
318+
On Unix, use ``'utf-8'`` instead of ``None`` if ``nl_langinfo(CODESET)``
319+
failed. :func:`getfilesystemencoding` result cannot be ``None``.
320+
319321

320322
.. function:: getrefcount(object)
321323

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #8610: Load file system codec at startup, and display a fatal error on
16+
failure. Set the file system encoding to utf-8 (instead of None) if getting
17+
the locale encoding failed, or if nl_langinfo(CODESET) function is missing.
18+
1519
- PyFile_FromFd() uses PyUnicode_DecodeFSDefault() instead of
1620
PyUnicode_FromString() to support surrogates in the filename and use the
1721
right encoding

Python/bltinmodule.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99

1010
#include <ctype.h>
1111

12+
#ifdef HAVE_LANGINFO_H
13+
#include <langinfo.h> /* CODESET */
14+
#endif
15+
1216
/* The default encoding used by the platform file system APIs
1317
Can remain NULL for all platforms that don't have such a concept
1418
@@ -21,9 +25,12 @@ int Py_HasFileSystemDefaultEncoding = 1;
2125
#elif defined(__APPLE__)
2226
const char *Py_FileSystemDefaultEncoding = "utf-8";
2327
int Py_HasFileSystemDefaultEncoding = 1;
24-
#else
25-
const char *Py_FileSystemDefaultEncoding = NULL; /* use default */
28+
#elif defined(HAVE_LANGINFO_H) && defined(CODESET)
29+
const char *Py_FileSystemDefaultEncoding = NULL; /* set by initfsencoding() */
2630
int Py_HasFileSystemDefaultEncoding = 0;
31+
#else
32+
const char *Py_FileSystemDefaultEncoding = "utf-8";
33+
int Py_HasFileSystemDefaultEncoding = 1;
2734
#endif
2835

2936
int

Python/pythonrun.c

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ extern grammar _PyParser_Grammar; /* From graminit.c */
5757

5858
/* Forward */
5959
static void initmain(void);
60+
static void initfsencoding(void);
6061
static void initsite(void);
6162
static int initstdio(void);
6263
static void flush_io(void);
@@ -159,7 +160,6 @@ get_codeset(void)
159160

160161
error:
161162
Py_XDECREF(codec);
162-
PyErr_Clear();
163163
return NULL;
164164
}
165165
#endif
@@ -171,9 +171,6 @@ Py_InitializeEx(int install_sigs)
171171
PyThreadState *tstate;
172172
PyObject *bimod, *sysmod, *pstderr;
173173
char *p;
174-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
175-
char *codeset;
176-
#endif
177174
extern void _Py_ReadyTypes(void);
178175

179176
if (initialized)
@@ -264,21 +261,7 @@ Py_InitializeEx(int install_sigs)
264261

265262
_PyImportHooks_Init();
266263

267-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
268-
/* On Unix, set the file system encoding according to the
269-
user's preference, if the CODESET names a well-known
270-
Python codec, and Py_FileSystemDefaultEncoding isn't
271-
initialized by other means. Also set the encoding of
272-
stdin and stdout if these are terminals. */
273-
274-
codeset = get_codeset();
275-
if (codeset) {
276-
if (!Py_FileSystemDefaultEncoding)
277-
Py_FileSystemDefaultEncoding = codeset;
278-
else
279-
free(codeset);
280-
}
281-
#endif
264+
initfsencoding();
282265

283266
if (install_sigs)
284267
initsigs(); /* Signal handling stuff, including initintr() */
@@ -496,7 +479,7 @@ Py_Finalize(void)
496479
_PyUnicode_Fini();
497480

498481
/* reset file system default encoding */
499-
if (!Py_HasFileSystemDefaultEncoding) {
482+
if (!Py_HasFileSystemDefaultEncoding && Py_FileSystemDefaultEncoding) {
500483
free((char*)Py_FileSystemDefaultEncoding);
501484
Py_FileSystemDefaultEncoding = NULL;
502485
}
@@ -707,6 +690,45 @@ initmain(void)
707690
}
708691
}
709692

693+
static void
694+
initfsencoding(void)
695+
{
696+
PyObject *codec;
697+
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
698+
char *codeset;
699+
700+
/* On Unix, set the file system encoding according to the
701+
user's preference, if the CODESET names a well-known
702+
Python codec, and Py_FileSystemDefaultEncoding isn't
703+
initialized by other means. Also set the encoding of
704+
stdin and stdout if these are terminals. */
705+
codeset = get_codeset();
706+
if (codeset != NULL) {
707+
Py_FileSystemDefaultEncoding = codeset;
708+
Py_HasFileSystemDefaultEncoding = 0;
709+
return;
710+
}
711+
712+
PyErr_Clear();
713+
fprintf(stderr,
714+
"Unable to get the locale encoding: "
715+
"fallback to utf-8\n");
716+
Py_FileSystemDefaultEncoding = "utf-8";
717+
Py_HasFileSystemDefaultEncoding = 1;
718+
#endif
719+
720+
/* the encoding is mbcs, utf-8 or ascii */
721+
codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding);
722+
if (!codec) {
723+
/* Such error can only occurs in critical situations: no more
724+
* memory, import a module of the standard library failed,
725+
* etc. */
726+
Py_FatalError("Py_Initialize: unable to load the file system codec");
727+
} else {
728+
Py_DECREF(codec);
729+
}
730+
}
731+
710732
/* Import the site module (not into __main__ though) */
711733

712734
static void

0 commit comments

Comments
 (0)