[Python-checkins] gh-91156: Use `locale.getencoding()` instead of getpreferredencoding (GH-91732)

Thu Apr 21 21:39:39 EDT 2022

https://github.com/python/cpython/commit/1317b70f89606bd14597116b7ab68a968ea6c017
commit: 1317b70f89606bd14597116b7ab68a968ea6c017
branch: main
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2022年04月22日T10:39:24+09:00
summary:
gh-91156: Use `locale.getencoding()` instead of getpreferredencoding (GH-91732)
Co-authored-by: Victor Stinner <vstinner at python.org>
files:
M Doc/howto/curses.rst
M Doc/library/csv.rst
M Doc/library/curses.rst
M Doc/library/functions.rst
M Doc/library/os.rst
M Lib/test/libregrtest/main.py
M Lib/test/pythoninfo.py
M Lib/test/support/__init__.py
M Lib/test/test__locale.py
M Lib/test/test_builtin.py
M Lib/test/test_cmd_line.py
M Lib/test/test_io.py
M Lib/test/test_locale.py
M Lib/test/test_mimetypes.py

diff --git a/Doc/howto/curses.rst b/Doc/howto/curses.rst
index c0149ffff3771..26c4ece5ae6df 100644
--- a/Doc/howto/curses.rst
+++ b/Doc/howto/curses.rst
@@ -299,8 +299,7 @@ The :meth:`~curses.window.addstr` method takes a Python string or
 bytestring as the value to be displayed. The contents of bytestrings
 are sent to the terminal as-is. Strings are encoded to bytes using
 the value of the window's :attr:`encoding` attribute; this defaults to
-the default system encoding as returned by
-:func:`locale.getpreferredencoding`.
+the default system encoding as returned by :func:`locale.getencoding`.
 
 The :meth:`~curses.window.addch` methods take a character, which can be
 either a string of length 1, a bytestring of length 1, or an integer.
diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst
index 3a7817cfdfad8..9dec7240d9c50 100644
--- a/Doc/library/csv.rst
+++ b/Doc/library/csv.rst
@@ -542,7 +542,7 @@ The corresponding simplest possible writing example is::
 
 Since :func:`open` is used to open a CSV file for reading, the file
 will by default be decoded into unicode using the system default
-encoding (see :func:`locale.getpreferredencoding`). To decode a file
+encoding (see :func:`locale.getencoding`). To decode a file
 using a different encoding, use the ``encoding`` argument of open::
 
 import csv
diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst
index 37e822c0e2b20..a7cc495277801 100644
--- a/Doc/library/curses.rst
+++ b/Doc/library/curses.rst
@@ -27,20 +27,6 @@ Linux and the BSD variants of Unix.
 Whenever the documentation mentions a *character string* it can be specified
 as a Unicode string or a byte string.
 
-.. note::
-
- Since version 5.4, the ncurses library decides how to interpret non-ASCII data
- using the ``nl_langinfo`` function. That means that you have to call
- :func:`locale.setlocale` in the application and encode Unicode strings
- using one of the system's available encodings. This example uses the
- system's default encoding::
-
- import locale
- locale.setlocale(locale.LC_ALL, '')
- code = locale.getpreferredencoding()
-
- Then use *code* as the encoding for :meth:`str.encode` calls.
-
 .. seealso::
 
 Module :mod:`curses.ascii`
@@ -923,8 +909,8 @@ the following methods and attributes:
 
 Encoding used to encode method arguments (Unicode strings and characters).
 The encoding attribute is inherited from the parent window when a subwindow
- is created, for example with :meth:`window.subwin`. By default, the locale
- encoding is used (see :func:`locale.getpreferredencoding`).
+ is created, for example with :meth:`window.subwin`.
+ By default, current locale encoding is used (see :func:`locale.getencoding`).
 
 .. versionadded:: 3.3
 
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index e6fd0bb5eeef9..f3b8e40babbd8 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -1123,8 +1123,8 @@ are always available. They are listed here in alphabetical order.
 (which on *some* Unix systems, means that *all* writes append to the end of
 the file regardless of the current seek position). In text mode, if
 *encoding* is not specified the encoding used is platform-dependent:
- ``locale.getpreferredencoding(False)`` is called to get the current locale
- encoding. (For reading and writing raw bytes use binary mode and leave
+ :func:`locale.getencoding()` is called to get the current locale encoding.
+ (For reading and writing raw bytes use binary mode and leave
 *encoding* unspecified.) The available modes are:
 
 .. _filemodes:
@@ -1183,10 +1183,9 @@ are always available. They are listed here in alphabetical order.
 
 *encoding* is the name of the encoding used to decode or encode the file.
 This should only be used in text mode. The default encoding is platform
- dependent (whatever :func:`locale.getpreferredencoding` returns), but any
- :term:`text encoding` supported by Python
- can be used. See the :mod:`codecs` module for
- the list of supported encodings.
+ dependent (whatever :func:`locale.getencoding` returns), but any
+ :term:`text encoding` supported by Python can be used.
+ See the :mod:`codecs` module for the list of supported encodings.
 
 *errors* is an optional string that specifies how encoding and decoding
 errors are to be handled—this cannot be used in binary mode.
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index c22bf56a9f2cd..471890e74c8e5 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -105,15 +105,15 @@ of the UTF-8 encoding:
 
 * Use UTF-8 as the :term:`filesystem encoding <filesystem encoding and error
 handler>`.
-* :func:`sys.getfilesystemencoding()` returns ``'UTF-8'``.
-* :func:`locale.getpreferredencoding()` returns ``'UTF-8'`` (the *do_setlocale*
+* :func:`sys.getfilesystemencoding()` returns ``'utf-8'``.
+* :func:`locale.getpreferredencoding()` returns ``'utf-8'`` (the *do_setlocale*
 argument has no effect).
 * :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` all use
 UTF-8 as their text encoding, with the ``surrogateescape``
 :ref:`error handler <error-handlers>` being enabled for :data:`sys.stdin`
 and :data:`sys.stdout` (:data:`sys.stderr` continues to use
 ``backslashreplace`` as it does in the default locale-aware mode)
-* On Unix, :func:`os.device_encoding` returns ``'UTF-8'`` rather than the
+* On Unix, :func:`os.device_encoding` returns ``'utf-8'`` rather than the
 device encoding.
 
 Note that the standard stream settings in UTF-8 mode can be overridden by
diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py
index e7e3dde0b0a66..0cacccfc0b5e3 100644
--- a/Lib/test/libregrtest/main.py
+++ b/Lib/test/libregrtest/main.py
@@ -482,8 +482,7 @@ def display_header(self):
 if cpu_count:
 print("== CPU count:", cpu_count)
 print("== encodings: locale=%s, FS=%s"
- % (locale.getpreferredencoding(False),
- sys.getfilesystemencoding()))
+ % (locale.getencoding(), sys.getfilesystemencoding()))
 
 def get_tests_result(self):
 result = []
diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py
index b00830c279e87..39301e6397aab 100644
--- a/Lib/test/pythoninfo.py
+++ b/Lib/test/pythoninfo.py
@@ -155,7 +155,7 @@ def collect_platform(info_add):
 def collect_locale(info_add):
 import locale
 
- info_add('locale.encoding', locale.getpreferredencoding(False))
+ info_add('locale.getencoding', locale.getencoding())
 
 
 def collect_builtins(info_add):
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index c5666d66f4782..3b2f33979db9a 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -1445,7 +1445,7 @@ def skip_if_buggy_ucrt_strfptime(test):
 global _buggy_ucrt
 if _buggy_ucrt is None:
 if(sys.platform == 'win32' and
- locale.getpreferredencoding(False) == 'cp65001' and
+ locale.getencoding() == 'cp65001' and
 time.localtime().tm_zone == ''):
 _buggy_ucrt = True
 else:
diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py
index e25c92c2c82c5..b3bc54cd55104 100644
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@@ -43,7 +43,7 @@ def setUpModule():
 locale.setlocale(locale.LC_ALL, loc)
 except Error:
 continue
- encoding = locale.getpreferredencoding(False)
+ encoding = locale.getencoding()
 try:
 localeconv()
 except Exception as err:
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index a601a524d6eb7..29039230201ac 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -1204,7 +1204,7 @@ def test_open_default_encoding(self):
 del os.environ[key]
 
 self.write_testfile()
- current_locale_encoding = locale.getpreferredencoding(False)
+ current_locale_encoding = locale.getencoding()
 with warnings.catch_warnings():
 warnings.simplefilter("ignore", EncodingWarning)
 fp = open(TESTFN, 'w')
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 84eab71f97701..e8f1964c2a40d 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -216,7 +216,7 @@ def test_undecodable_code(self):
 code = (
 b'import locale; '
 b'print(ascii("' + undecodable + b'"), '
- b'locale.getpreferredencoding())')
+ b'locale.getencoding())')
 p = subprocess.Popen(
 [sys.executable, "-c", code],
 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
index 45bf81b61f416..5528c461e58ae 100644
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2726,7 +2726,7 @@ def test_default_encoding(self):
 if key in os.environ:
 del os.environ[key]
 
- current_locale_encoding = locale.getpreferredencoding(False)
+ current_locale_encoding = locale.getencoding()
 b = self.BytesIO()
 with warnings.catch_warnings():
 warnings.simplefilter("ignore", EncodingWarning)
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py
index 774b0fcd33344..5cb6edc52d777 100644
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -363,7 +363,7 @@ class TestEnUSCollation(BaseLocalizedTest, TestCollation):
 locale_type = locale.LC_ALL
 
 def setUp(self):
- enc = codecs.lookup(locale.getpreferredencoding(False) or 'ascii').name
+ enc = codecs.lookup(locale.getencoding() or 'ascii').name
 if enc not in ('utf-8', 'iso8859-1', 'cp1252'):
 raise unittest.SkipTest('encoding not suitable')
 if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or
@@ -533,6 +533,14 @@ def test_defaults_UTF8(self):
 if orig_getlocale is not None:
 _locale._getdefaultlocale = orig_getlocale
 
+ def test_getencoding(self):
+ # Invoke getencoding to make sure it does not cause exceptions.
+ enc = locale.getencoding()
+ self.assertIsInstance(enc, str)
+ self.assertNotEqual(enc, "")
+ # make sure it is valid
+ codecs.lookup(enc)
+
 def test_getpreferredencoding(self):
 # Invoke getpreferredencoding to make sure it does not cause exceptions.
 enc = locale.getpreferredencoding()
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index 3477b18376a4f..f2b103693a9b2 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -1,5 +1,4 @@
 import io
-import locale
 import mimetypes
 import pathlib
 import sys
@@ -33,7 +32,7 @@ def tearDownModule():
 class MimeTypesTestCase(unittest.TestCase):
 def setUp(self):
 self.db = mimetypes.MimeTypes()
- 
+
 def test_case_sensitivity(self):
 eq = self.assertEqual
 eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))
@@ -145,11 +144,6 @@ def test_guess_all_types(self):
 self.assertNotIn('.no-such-ext', all)
 
 def test_encoding(self):
- getpreferredencoding = locale.getpreferredencoding
- self.addCleanup(setattr, locale, 'getpreferredencoding',
- getpreferredencoding)
- locale.getpreferredencoding = lambda: 'ascii'
-
 filename = support.findfile("mime.types")
 mimes = mimetypes.MimeTypes([filename])
 exts = mimes.guess_all_extensions('application/vnd.geocube+xml',