[Python-checkins] cpython: Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec

steve.dower python-checkins at python.org
Tue Sep 6 22:42:59 EDT 2016


https://hg.python.org/cpython/rev/c499690f606c
changeset: 103199:c499690f606c
user: Steve Dower <steve.dower at microsoft.com>
date: Tue Sep 06 19:42:27 2016 -0700
summary:
 Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to codec lookup
files:
 Include/unicodeobject.h | 2 +-
 Lib/encodings/__init__.py | 10 ++
 Lib/encodings/aliases.py | 1 +
 Lib/encodings/oem.py | 41 ++++++++++
 Lib/site.py | 16 ---
 Lib/test/test_codecs.py | 62 +++++++--------
 Modules/_codecsmodule.c | 36 ++++++++
 Modules/clinic/_codecsmodule.c.h | 81 +++++++++++++++++++-
 8 files changed, 198 insertions(+), 51 deletions(-)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1663,7 +1663,7 @@
 
 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
 const char *string, /* MBCS encoded string */
- Py_ssize_t length, /* size of string */
+ Py_ssize_t length, /* size of string */
 const char *errors /* error handling */
 );
 
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -29,6 +29,7 @@
 """#"
 
 import codecs
+import sys
 from . import aliases
 
 _cache = {}
@@ -151,3 +152,12 @@
 
 # Register the search_function in the Python codec registry
 codecs.register(search_function)
+
+if sys.platform == 'win32':
+ def _alias_mbcs(encoding):
+ import _bootlocale
+ if encoding == _bootlocale.getpreferredencoding(False):
+ import encodings.mbcs
+ return encodings.mbcs.getregentry()
+
+ codecs.register(_alias_mbcs)
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -458,6 +458,7 @@
 'macturkish' : 'mac_turkish',
 
 # mbcs codec
+ 'ansi' : 'mbcs',
 'dbcs' : 'mbcs',
 
 # ptcp154 codec
diff --git a/Lib/encodings/oem.py b/Lib/encodings/oem.py
new file mode 100644
--- /dev/null
+++ b/Lib/encodings/oem.py
@@ -0,0 +1,41 @@
+""" Python 'oem' Codec for Windows
+
+"""
+# Import them explicitly to cause an ImportError
+# on non-Windows systems
+from codecs import oem_encode, oem_decode
+# for IncrementalDecoder, IncrementalEncoder, ...
+import codecs
+
+### Codec APIs
+
+encode = oem_encode
+
+def decode(input, errors='strict'):
+ return oem_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return oem_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ _buffer_decode = oem_decode
+
+class StreamWriter(codecs.StreamWriter):
+ encode = oem_encode
+
+class StreamReader(codecs.StreamReader):
+ decode = oem_decode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='oem',
+ encode=encode,
+ decode=decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/Lib/site.py b/Lib/site.py
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -423,21 +423,6 @@
 
 sys.__interactivehook__ = register_readline
 
-def aliasmbcs():
- """On Windows, some default encodings are not provided by Python,
- while they are always available as "mbcs" in each locale. Make
- them usable by aliasing to "mbcs" in such a case."""
- if sys.platform == 'win32':
- import _bootlocale, codecs
- enc = _bootlocale.getpreferredencoding(False)
- if enc.startswith('cp'): # "cp***" ?
- try:
- codecs.lookup(enc)
- except LookupError:
- import encodings
- encodings._cache[enc] = encodings._unknown
- encodings.aliases.aliases[enc] = 'mbcs'
-
 CONFIG_LINE = r'^(?P<key>(\w|[-_])+)\s*=\s*(?P<value>.*)\s*$'
 
 def venv(known_paths):
@@ -560,7 +545,6 @@
 setcopyright()
 sethelper()
 enablerlcompleter()
- aliasmbcs()
 execsitecustomize()
 if ENABLE_USER_SITE:
 execusercustomize()
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -8,11 +8,6 @@
 
 from test import support
 
-if sys.platform == 'win32':
- VISTA_OR_LATER = (sys.getwindowsversion().major >= 6)
-else:
- VISTA_OR_LATER = False
-
 try:
 import ctypes
 except ImportError:
@@ -841,18 +836,13 @@
 ('abc', 'strict', b'abc'),
 ('\xe9\u20ac', 'strict', b'\xc3\xa9\xe2\x82\xac'),
 ('\U0010ffff', 'strict', b'\xf4\x8f\xbf\xbf'),
+ ('\udc80', 'strict', None),
+ ('\udc80', 'ignore', b''),
+ ('\udc80', 'replace', b'?'),
+ ('\udc80', 'backslashreplace', b'\\udc80'),
+ ('\udc80', 'namereplace', b'\\udc80'),
+ ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
 ]
- if VISTA_OR_LATER:
- tests.extend((
- ('\udc80', 'strict', None),
- ('\udc80', 'ignore', b''),
- ('\udc80', 'replace', b'?'),
- ('\udc80', 'backslashreplace', b'\\udc80'),
- ('\udc80', 'namereplace', b'\\udc80'),
- ('\udc80', 'surrogatepass', b'\xed\xb2\x80'),
- ))
- else:
- tests.append(('\udc80', 'strict', b'\xed\xb2\x80'))
 for text, errors, expected in tests:
 if expected is not None:
 try:
@@ -879,17 +869,10 @@
 (b'[\xff]', 'ignore', '[]'),
 (b'[\xff]', 'replace', '[\ufffd]'),
 (b'[\xff]', 'surrogateescape', '[\udcff]'),
+ (b'[\xed\xb2\x80]', 'strict', None),
+ (b'[\xed\xb2\x80]', 'ignore', '[]'),
+ (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
 ]
- if VISTA_OR_LATER:
- tests.extend((
- (b'[\xed\xb2\x80]', 'strict', None),
- (b'[\xed\xb2\x80]', 'ignore', '[]'),
- (b'[\xed\xb2\x80]', 'replace', '[\ufffd\ufffd\ufffd]'),
- ))
- else:
- tests.extend((
- (b'[\xed\xb2\x80]', 'strict', '[\udc80]'),
- ))
 for raw, errors, expected in tests:
 if expected is not None:
 try:
@@ -904,7 +887,6 @@
 self.assertRaises(UnicodeDecodeError,
 raw.decode, 'cp65001', errors)
 
- @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 def test_lone_surrogates(self):
 self.assertRaises(UnicodeEncodeError, "\ud800".encode, "cp65001")
 self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "cp65001")
@@ -921,7 +903,6 @@
 self.assertEqual("[\uDC80]".encode("cp65001", "replace"),
 b'[?]')
 
- @unittest.skipUnless(VISTA_OR_LATER, 'require Windows Vista or later')
 def test_surrogatepass_handler(self):
 self.assertEqual("abc\ud800def".encode("cp65001", "surrogatepass"),
 b"abc\xed\xa0\x80def")
@@ -1951,6 +1932,8 @@
 
 if hasattr(codecs, "mbcs_encode"):
 all_unicode_encodings.append("mbcs")
+if hasattr(codecs, "oem_encode"):
+ all_unicode_encodings.append("oem")
 
 # The following encoding is not tested, because it's not supposed
 # to work:
@@ -3119,11 +3102,10 @@
 (b'\xff\xf4\x8f\xbf\xbf', 'ignore', '\U0010ffff'),
 (b'\xff\xf4\x8f\xbf\xbf', 'replace', '\ufffd\U0010ffff'),
 ))
- if VISTA_OR_LATER:
- self.check_encode(self.CP_UTF8, (
- ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
- ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
- ))
+ self.check_encode(self.CP_UTF8, (
+ ('[\U0010ffff\uDC80]', 'ignore', b'[\xf4\x8f\xbf\xbf]'),
+ ('[\U0010ffff\uDC80]', 'replace', b'[\xf4\x8f\xbf\xbf?]'),
+ ))
 
 def test_incremental(self):
 decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
@@ -3144,6 +3126,20 @@
 False)
 self.assertEqual(decoded, ('abc', 3))
 
+ def test_mbcs_alias(self):
+ # Check that looking up our 'default' codepage will return
+ # mbcs when we don't have a more specific one available
+ import _bootlocale
+ def _get_fake_codepage(*a):
+ return 'cp123'
+ old_getpreferredencoding = _bootlocale.getpreferredencoding
+ _bootlocale.getpreferredencoding = _get_fake_codepage
+ try:
+ codec = codecs.lookup('cp123')
+ self.assertEqual(codec.name, 'mbcs')
+ finally:
+ _bootlocale.getpreferredencoding = old_getpreferredencoding
+
 
 class ASCIITest(unittest.TestCase):
 def test_encode(self):
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -626,6 +626,25 @@
 }
 
 /*[clinic input]
+_codecs.oem_decode
+ data: Py_buffer
+ errors: str(accept={str, NoneType}) = NULL
+ final: int(c_default="0") = False
+ /
+[clinic start generated code]*/
+
+static PyObject *
+_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
+ const char *errors, int final)
+/*[clinic end generated code: output=da1617612f3fcad8 input=95b8a92c446b03cd]*/
+{
+ Py_ssize_t consumed = data->len;
+ PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
+ data->buf, data->len, errors, final ? NULL : &consumed);
+ return codec_tuple(decoded, consumed);
+}
+
+/*[clinic input]
 _codecs.code_page_decode
 codepage: int
 data: Py_buffer
@@ -971,6 +990,21 @@
 }
 
 /*[clinic input]
+_codecs.oem_encode
+ str: unicode
+ errors: str(accept={str, NoneType}) = NULL
+ /
+[clinic start generated code]*/
+
+static PyObject *
+_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
+/*[clinic end generated code: output=65d5982c737de649 input=3fc5f0028aad3cda]*/
+{
+ return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
+ PyUnicode_GET_LENGTH(str));
+}
+
+/*[clinic input]
 _codecs.code_page_encode
 code_page: int
 str: unicode
@@ -1075,6 +1109,8 @@
 _CODECS_READBUFFER_ENCODE_METHODDEF
 _CODECS_MBCS_ENCODE_METHODDEF
 _CODECS_MBCS_DECODE_METHODDEF
+ _CODECS_OEM_ENCODE_METHODDEF
+ _CODECS_OEM_DECODE_METHODDEF
 _CODECS_CODE_PAGE_ENCODE_METHODDEF
 _CODECS_CODE_PAGE_DECODE_METHODDEF
 _CODECS_REGISTER_ERROR_METHODDEF
diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@@ -805,6 +805,45 @@
 
 #if defined(HAVE_MBCS)
 
+PyDoc_STRVAR(_codecs_oem_decode__doc__,
+"oem_decode($module, data, errors=None, final=False, /)\n"
+"--\n"
+"\n");
+
+#define _CODECS_OEM_DECODE_METHODDEF \
+ {"oem_decode", (PyCFunction)_codecs_oem_decode, METH_VARARGS, _codecs_oem_decode__doc__},
+
+static PyObject *
+_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
+ const char *errors, int final);
+
+static PyObject *
+_codecs_oem_decode(PyObject *module, PyObject *args)
+{
+ PyObject *return_value = NULL;
+ Py_buffer data = {NULL, NULL};
+ const char *errors = NULL;
+ int final = 0;
+
+ if (!PyArg_ParseTuple(args, "y*|zi:oem_decode",
+ &data, &errors, &final)) {
+ goto exit;
+ }
+ return_value = _codecs_oem_decode_impl(module, &data, errors, final);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+
+#endif /* defined(HAVE_MBCS) */
+
+#if defined(HAVE_MBCS)
+
 PyDoc_STRVAR(_codecs_code_page_decode__doc__,
 "code_page_decode($module, codepage, data, errors=None, final=False, /)\n"
 "--\n"
@@ -1346,6 +1385,38 @@
 
 #if defined(HAVE_MBCS)
 
+PyDoc_STRVAR(_codecs_oem_encode__doc__,
+"oem_encode($module, str, errors=None, /)\n"
+"--\n"
+"\n");
+
+#define _CODECS_OEM_ENCODE_METHODDEF \
+ {"oem_encode", (PyCFunction)_codecs_oem_encode, METH_VARARGS, _codecs_oem_encode__doc__},
+
+static PyObject *
+_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors);
+
+static PyObject *
+_codecs_oem_encode(PyObject *module, PyObject *args)
+{
+ PyObject *return_value = NULL;
+ PyObject *str;
+ const char *errors = NULL;
+
+ if (!PyArg_ParseTuple(args, "U|z:oem_encode",
+ &str, &errors)) {
+ goto exit;
+ }
+ return_value = _codecs_oem_encode_impl(module, str, errors);
+
+exit:
+ return return_value;
+}
+
+#endif /* defined(HAVE_MBCS) */
+
+#if defined(HAVE_MBCS)
+
 PyDoc_STRVAR(_codecs_code_page_encode__doc__,
 "code_page_encode($module, code_page, str, errors=None, /)\n"
 "--\n"
@@ -1446,6 +1517,10 @@
 #define _CODECS_MBCS_DECODE_METHODDEF
 #endif /* !defined(_CODECS_MBCS_DECODE_METHODDEF) */
 
+#ifndef _CODECS_OEM_DECODE_METHODDEF
+ #define _CODECS_OEM_DECODE_METHODDEF
+#endif /* !defined(_CODECS_OEM_DECODE_METHODDEF) */
+
 #ifndef _CODECS_CODE_PAGE_DECODE_METHODDEF
 #define _CODECS_CODE_PAGE_DECODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_DECODE_METHODDEF) */
@@ -1454,7 +1529,11 @@
 #define _CODECS_MBCS_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_MBCS_ENCODE_METHODDEF) */
 
+#ifndef _CODECS_OEM_ENCODE_METHODDEF
+ #define _CODECS_OEM_ENCODE_METHODDEF
+#endif /* !defined(_CODECS_OEM_ENCODE_METHODDEF) */
+
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=0221e4eece62c905 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=7874e2d559d49368 input=a9049054013a1b77]*/
-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /