Skip to content

Commit 73d538b

Browse files
committed
Always initialize Py_FileSystemDefaultEncoding on Unix in Py_Initialize,
and not as a side effect of setlocale. Expose it as sys.getfilesystemencoding. Adjust test case.
1 parent 620c083 commit 73d538b

File tree

6 files changed

+67
-22
lines changed

6 files changed

+67
-22
lines changed

Doc/lib/libsys.tex

+16
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,22 @@ \section{\module{sys} ---
211211
\versionadded{2.2}
212212
\end{funcdesc}
213213

214+
\begin{funcdesc}{getfilesystemencoding}{}
215+
Return the name of the encoding used to convert Unicode filenames
216+
into system file names, or \code{None} if the system default encoding
217+
is used. The result value depends on the operating system:
218+
\begin{itemize}
219+
\item On Windows 9x, the encoding is ``mbcs''.
220+
\item On Mac OS X, the encoding is ``utf-8''.
221+
\item On Unix, the encoding is the user's preference
222+
according to the result of nl_langinfo(CODESET), or None if
223+
the nl_langinfo(CODESET) failed.
224+
\item On Windows NT+, file names are Unicode natively, so no conversion
225+
is performed.
226+
\end{itemize}
227+
\versionadded{2.3}
228+
\end{funcdesc}
229+
214230
\begin{funcdesc}{getrefcount}{object}
215231
Return the reference count of the \var{object}. The count returned
216232
is generally one higher than you might expect, because it includes

Lib/test/test_support.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,7 @@ def fcmp(x, y): # fuzzy comparison function
109109
TESTFN_UNICODE="@test-\xe0\xf2"
110110
else:
111111
TESTFN_UNICODE=unicode("@test-\xe0\xf2", "latin-1") # 2 latin characters.
112-
if os.name=="nt":
113-
TESTFN_ENCODING="mbcs"
112+
TESTFN_ENCODING=sys.getfilesystemencoding()
114113
else:
115114
TESTFN = 'test'
116115

Misc/NEWS

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ What's New in Python 2.3 beta 1?
1212
Core and builtins
1313
-----------------
1414

15+
- sys.getfilesystemencoding() was added to expose
16+
Py_FileSystemDefaultEncoding.
1517

1618
- New function sys.exc_clear() clears the current exception. This is
1719
rarely needed, but can sometimes be useful to release objects

Modules/_localemodule.c

-20
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,6 @@ fixup_ulcase(void)
161161
Py_DECREF(ulo);
162162
}
163163

164-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
165-
static int fileencoding_uses_locale = 0;
166-
#endif
167-
168164
static PyObject*
169165
PyLocale_setlocale(PyObject* self, PyObject* args)
170166
{
@@ -213,22 +209,6 @@ PyLocale_setlocale(PyObject* self, PyObject* args)
213209
fixup_ulcase();
214210
/* things that got wrong up to here are ignored */
215211
PyErr_Clear();
216-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
217-
if (Py_FileSystemDefaultEncoding == NULL)
218-
fileencoding_uses_locale = 1;
219-
if (fileencoding_uses_locale) {
220-
char *codeset = nl_langinfo(CODESET);
221-
PyObject *enc = NULL;
222-
if (*codeset && (enc = PyCodec_Encoder(codeset))) {
223-
/* Release previous file encoding */
224-
if (Py_FileSystemDefaultEncoding)
225-
free((char *)Py_FileSystemDefaultEncoding);
226-
Py_FileSystemDefaultEncoding = strdup(codeset);
227-
Py_DECREF(enc);
228-
} else
229-
PyErr_Clear();
230-
}
231-
#endif
232212
} else {
233213
/* get locale */
234214
/* restore LC_NUMERIC first, if appropriate */

Python/pythonrun.c

+28
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
#include <signal.h>
1818
#endif
1919

20+
#ifdef HAVE_LANGINFO_H
21+
#include <locale.h>
22+
#include <langinfo.h>
23+
#endif
24+
2025
#ifdef MS_WINDOWS
2126
#undef BYTE
2227
#include "windows.h"
@@ -181,6 +186,29 @@ Py_Initialize(void)
181186
initsite(); /* Module site */
182187

183188
PyModule_WarningsModule = PyImport_ImportModule("warnings");
189+
190+
#if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
191+
/* On Unix, set the file system encoding according to the
192+
user's preference, if the CODESET names a well-known
193+
Python codec, and Py_FileSystemDefaultEncoding isn't
194+
initialized by other means. */
195+
if (!Py_FileSystemDefaultEncoding) {
196+
char *saved_locale = setlocale(LC_CTYPE, NULL);
197+
char *codeset;
198+
setlocale(LC_CTYPE, "");
199+
codeset = nl_langinfo(CODESET);
200+
PyObject *enc = NULL;
201+
if (*codeset) {
202+
enc = PyCodec_Encoder(codeset);
203+
if (enc) {
204+
Py_FileSystemDefaultEncoding = strdup(codeset);
205+
Py_DECREF(enc);
206+
} else
207+
PyErr_Clear();
208+
}
209+
setlocale(LC_CTYPE, saved_locale);
210+
}
211+
#endif
184212
}
185213

186214
#ifdef COUNT_ALLOCS

Python/sysmodule.c

+20
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,22 @@ PyDoc_STRVAR(setdefaultencoding_doc,
236236
Set the current default string encoding used by the Unicode implementation."
237237
);
238238

239+
static PyObject *
240+
sys_getfilesystemencoding(PyObject *self)
241+
{
242+
if (Py_FileSystemDefaultEncoding)
243+
return PyString_FromString(Py_FileSystemDefaultEncoding);
244+
Py_INCREF(Py_None);
245+
return Py_None;
246+
}
247+
248+
PyDoc_STRVAR(getfilesystemencoding_doc,
249+
"getfilesystemencoding() -> string\n\
250+
\n\
251+
Return the encoding used to convert Unicode filenames in\n\
252+
operating system filenames."
253+
);
254+
239255
#endif
240256

241257
/*
@@ -649,6 +665,10 @@ static PyMethodDef sys_methods[] = {
649665
#ifdef DYNAMIC_EXECUTION_PROFILE
650666
{"getdxp", _Py_GetDXProfile, METH_VARARGS},
651667
#endif
668+
#ifdef Py_USING_UNICODE
669+
{"getfilesystemencoding", (PyCFunction)sys_getfilesystemencoding,
670+
METH_NOARGS, getfilesystemencoding_doc},
671+
#endif
652672
#ifdef Py_TRACE_REFS
653673
{"getobjects", _Py_GetObjects, METH_VARARGS},
654674
#endif

0 commit comments

Comments
 (0)