diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -384,32 +384,11 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T
 ((PyASCIIObject*)op)->length : \
 ((PyCompactUnicodeObject*)op)->wstr_length)
 
-/* Returns the deprecated Py_UNICODE representation's size in code units
- (this includes surrogate pairs as 2 units).
- If the Py_UNICODE representation is not available, it will be computed
- on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
-
-#define PyUnicode_GET_SIZE(op) \
- (assert(PyUnicode_Check(op)), \
- (((PyASCIIObject *)(op))->wstr) ? \
- PyUnicode_WSTR_LENGTH(op) : \
- ((void)PyUnicode_AsUnicode((PyObject *)(op)), \
- assert(((PyASCIIObject *)(op))->wstr), \
- PyUnicode_WSTR_LENGTH(op)))
-
-#define PyUnicode_GET_DATA_SIZE(op) \
- (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
-
 /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
 representation on demand. Using this macro is very inefficient now,
 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
 use PyUnicode_WRITE() and PyUnicode_READ(). */
 
-#define PyUnicode_AS_UNICODE(op) \
- (assert(PyUnicode_Check(op)), \
- (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
- PyUnicode_AsUnicode((PyObject *)(op)))
-
 #define PyUnicode_AS_DATA(op) \
 ((const char *)(PyUnicode_AS_UNICODE(op)))
 
@@ -710,6 +689,25 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Cop
 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
 PyObject *unicode /* Unicode object */
 );
+
+PyAPI_FUNC(Py_UNICODE *) PyUnicode_AS_UNICODE(
+ PyObject *unicode /* Unicode object */
+ );
+
+/* Returns the deprecated Py_UNICODE representation's size in code units
+ (this includes surrogate pairs as 2 units).
+ If the Py_UNICODE representation is not available, it will be computed
+ on request.
+ Return -1 on error.
+
+ Use PyUnicode_GET_LENGTH() for the length in code points. */
+
+PyAPI_FUNC(Py_ssize_t) PyUnicode_GET_SIZE(
+ PyObject *unicode /* Unicode object */
+ );
+
+#define PyUnicode_GET_DATA_SIZE(op) \
+ (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
 #endif
 
 /* Return a read-only pointer to the Unicode object's internal
@@ -862,7 +860,7 @@ PyAPI_FUNC(void) _Py_ReleaseInternedUnic
 The buffer is copied into the new object. */
 
 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
- register const wchar_t *w, /* wchar_t buffer */
+ const wchar_t *w, /* wchar_t buffer */
 Py_ssize_t size /* size of buffer */
 );
 
@@ -880,12 +878,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWide
 
 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
 PyObject *unicode, /* Unicode object */
- register wchar_t *w, /* wchar_t buffer */
+ wchar_t *w, /* wchar_t buffer */
 Py_ssize_t size /* size of buffer */
 );
 
 /* Convert the Unicode object to a wide character string. The output string
- always ends with a nul character. If size is not NULL, write the number of
+ always ends with a null character. If size is not NULL, write the number of
 wide characters (excluding the null character) into *size.
 
 Returns a buffer allocated by PyMem_Alloc() (use PyMem_Free() to free it)
@@ -897,6 +895,23 @@ PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCha
 Py_ssize_t *size /* number of characters of the result */
 );
 
+/* Convert the Unicode object to a wide character string. The output string
+ always ends with a null character. If size is not NULL, write the number of
+ wide characters (excluding the null character) into *size.
+
+ This function caches the wide character string in the Unicode object
+ and subsequent calls will return the same string. The memory is released
+ when the Unicode object is deallocated.
+
+ On error, returns NULL, *size is undefined and an exception is raised. */
+
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharAndSize(
+ PyObject *unicode, /* Unicode object */
+ Py_ssize_t *size /* number of characters of the result */
+ );
+#endif
+
 #ifndef Py_LIMITED_API
 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
 #endif
@@ -2020,7 +2035,7 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrc
 Py_UNICODE c
 );
 
-/* Create a copy of a unicode string ending with a nul character. Return NULL
+/* Create a copy of a unicode string ending with a null character. Return NULL
 and raise a MemoryError exception on memory allocation failure, otherwise
 return a new allocated buffer (use PyMem_Free() to free the buffer). */
 
diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py
--- a/Lib/test/test_capi.py
+++ b/Lib/test/test_capi.py
@@ -8,6 +8,7 @@ import subprocess
 import sys
 import time
 import unittest
+import warnings
 from test import support
 try:
 import threading
@@ -179,10 +180,16 @@ def test_main():
 support.run_unittest(CAPITest, TestPendingCalls, Test6012, EmbeddingTest)
 
 for name in dir(_testcapi):
- if name.startswith('test_'):
- test = getattr(_testcapi, name)
- if support.verbose:
- print("internal", name)
+ if not name.startswith('test_'):
+ continue
+ test = getattr(_testcapi, name)
+ if support.verbose:
+ print("internal", name)
+ if name in ('test_u_code', 'test_Z_code'):
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+ test()
+ else:
 test()
 
 # some extra thread-state tests driven via _testcapi
diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py
--- a/Lib/test/test_getargs2.py
+++ b/Lib/test/test_getargs2.py
@@ -1,4 +1,5 @@
 import unittest
+import warnings
 from test import support
 from _testcapi import getargs_keywords
 
@@ -400,39 +401,51 @@ class Bytes_TestCase(unittest.TestCase):
 class Unicode_TestCase(unittest.TestCase):
 def test_u(self):
 from _testcapi import getargs_u
- self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
- self.assertRaises(TypeError, getargs_u, 'nul:0円')
- self.assertRaises(TypeError, getargs_u, b'bytes')
- self.assertRaises(TypeError, getargs_u, bytearray(b'bytearray'))
- self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
- self.assertRaises(TypeError, getargs_u, None)
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
+ self.assertRaises(TypeError, getargs_u, 'nul:0円')
+ self.assertRaises(TypeError, getargs_u, b'bytes')
+ self.assertRaises(TypeError, getargs_u, bytearray(b'bytearray'))
+ self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
+ self.assertRaises(TypeError, getargs_u, None)
 
 def test_u_hash(self):
 from _testcapi import getargs_u_hash
- self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
- self.assertEqual(getargs_u_hash('nul:0円'), 'nul:0円')
- self.assertRaises(TypeError, getargs_u_hash, b'bytes')
- self.assertRaises(TypeError, getargs_u_hash, bytearray(b'bytearray'))
- self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
- self.assertRaises(TypeError, getargs_u_hash, None)
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
+ self.assertEqual(getargs_u_hash('nul:0円'), 'nul:0円')
+ self.assertRaises(TypeError, getargs_u_hash, b'bytes')
+ self.assertRaises(TypeError, getargs_u_hash, bytearray(b'bytearray'))
+ self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
+ self.assertRaises(TypeError, getargs_u_hash, None)
 
 def test_Z(self):
 from _testcapi import getargs_Z
- self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
- self.assertRaises(TypeError, getargs_Z, 'nul:0円')
- self.assertRaises(TypeError, getargs_Z, b'bytes')
- self.assertRaises(TypeError, getargs_Z, bytearray(b'bytearray'))
- self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
- self.assertIsNone(getargs_Z(None))
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
+ self.assertRaises(TypeError, getargs_Z, 'nul:0円')
+ self.assertRaises(TypeError, getargs_Z, b'bytes')
+ self.assertRaises(TypeError, getargs_Z, bytearray(b'bytearray'))
+ self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
+ self.assertIsNone(getargs_Z(None))
 
 def test_Z_hash(self):
 from _testcapi import getargs_Z_hash
- self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')
- self.assertEqual(getargs_Z_hash('nul:0円'), 'nul:0円')
- self.assertRaises(TypeError, getargs_Z_hash, b'bytes')
- self.assertRaises(TypeError, getargs_Z_hash, bytearray(b'bytearray'))
- self.assertRaises(TypeError, getargs_Z_hash, memoryview(b'memoryview'))
- self.assertIsNone(getargs_Z_hash(None))
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')
+ self.assertEqual(getargs_Z_hash('nul:0円'), 'nul:0円')
+ self.assertRaises(TypeError, getargs_Z_hash, b'bytes')
+ self.assertRaises(TypeError, getargs_Z_hash, bytearray(b'bytearray'))
+ self.assertRaises(TypeError, getargs_Z_hash, memoryview(b'memoryview'))
+ self.assertIsNone(getargs_Z_hash(None))
 
 
 def test_main():
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1808,29 +1808,36 @@ class UnicodeTest(string_tests.CommonTes
 
 def test_encode_decimal(self):
 from _testcapi import unicode_encodedecimal
- self.assertEqual(unicode_encodedecimal('123'),
- b'123')
- self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
- b'3.14')
- self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
- b' 3.14 ')
- self.assertRaises(UnicodeEncodeError,
- unicode_encodedecimal, "123\u20ac", "strict")
- self.assertRaisesRegex(
- ValueError,
- "^'decimal' codec can't encode character",
- unicode_encodedecimal, "123\u20ac", "replace")
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+ self.assertEqual(unicode_encodedecimal('123'),
+ b'123')
+ self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+ b'3.14')
+ self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ b' 3.14 ')
+ self.assertRaises(UnicodeEncodeError,
+ unicode_encodedecimal, "123\u20ac", "strict")
+ self.assertRaisesRegex(
+ ValueError,
+ "^'decimal' codec can't encode character",
+ unicode_encodedecimal, "123\u20ac", "replace")
 
 def test_transform_decimal(self):
 from _testcapi import unicode_transformdecimaltoascii as transform_decimal
- self.assertEqual(transform_decimal('123'),
- '123')
- self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
- '3.14')
- self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
- "\N{EM SPACE}3.14\N{EN SPACE}")
- self.assertEqual(transform_decimal('123\u20ac'),
- '123\u20ac')
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore", DeprecationWarning)
+
+
+ self.assertEqual(transform_decimal('123'),
+ '123')
+ self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+ '3.14')
+ self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ "\N{EM SPACE}3.14\N{EN SPACE}")
+ self.assertEqual(transform_decimal('123\u20ac'),
+ '123\u20ac')
 
 def test_getnewargs(self):
 text = 'abc'
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -687,18 +687,15 @@ unicode_internal_encode(PyObject *self,
 return NULL;
 
 if (PyUnicode_Check(obj)) {
- Py_UNICODE *u;
+ wchar_t *wstr;
 
- if (PyUnicode_READY(obj) < 0) + wstr = PyUnicode_AsWideCharAndSize(obj, &len); + if (wstr == NULL) return NULL; - - u = PyUnicode_AsUnicodeAndSize(obj, &len); - if (u == NULL) - return NULL; - if (len> PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
+ if (len> PY_SSIZE_T_MAX / sizeof(wchar_t))
 return PyErr_NoMemory();
- size = len * sizeof(Py_UNICODE);
- return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
+ size = len * sizeof(wchar_t);
+ return codec_tuple(PyBytes_FromStringAndSize((const char*)wstr, size),
 PyUnicode_GET_LENGTH(obj));
 }
 else {
diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c
--- a/Modules/_ctypes/_ctypes.c
+++ b/Modules/_ctypes/_ctypes.c
@@ -1142,7 +1142,7 @@ static int
 WCharArray_set_value(CDataObject *self, PyObject *value)
 {
 Py_ssize_t result = 0;
- Py_UNICODE *wstr;
+ wchar_t *wstr;
 Py_ssize_t len;
 
 if (value == NULL) {
@@ -1158,7 +1158,7 @@ WCharArray_set_value(CDataObject *self, 
 } else
 Py_INCREF(value);
 
- wstr = PyUnicode_AsUnicodeAndSize(value, &len);
+ wstr = PyUnicode_AsWideCharAndSize(value, &len);
 if (wstr == NULL)
 return -1;
 if ((unsigned)len> self->b_size/sizeof(wchar_t)) {
@@ -3130,10 +3130,10 @@ _validate_paramflags(PyTypeObject *type,
 for (i = 0; i < len; ++i) { PyObject *item = PyTuple_GET_ITEM(paramflags, i); int flag; - char *name; + PyObject *nameobj; PyObject *defval; PyObject *typ; - if (!PyArg_ParseTuple(item, "i&#124;ZO", &flag, &name, &defval)) { + if (!PyArg_ParseTuple(item, "i&#124;UO", &flag, &nameobj, &defval)) { PyErr_SetString(PyExc_TypeError, "paramflags must be a sequence of (int [,string [,value]]) tuples"); return 0; diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1234,7 +1234,7 @@ static PyObject *load_library(PyObject * if (!PyArg_ParseTuple(args, "O&#124;O:LoadLibrary", &nameobj, &ignored)) return NULL; - name = PyUnicode_AsUnicode(nameobj); + name = PyUnicode_AsWideCharAndSize(nameobj, NULL); if (!name) return NULL; diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -1259,7 +1259,7 @@ U_get(void *ptr, Py_ssize_t size) static PyObject * U_set(void *ptr, PyObject *value, Py_ssize_t length) { - Py_UNICODE *wstr; + wchar_t *wstr; Py_ssize_t size; /* It's easier to calculate in characters than in bytes */ @@ -1273,7 +1273,7 @@ U_set(void *ptr, PyObject *value, Py_ssi } else Py_INCREF(value); - wstr = PyUnicode_AsUnicodeAndSize(value, &size); + wstr = PyUnicode_AsWideCharAndSize(value, &size); if (wstr == NULL) return NULL; if (size> length) {
@@ -1477,7 +1477,7 @@ BSTR_set(void *ptr, PyObject *value, Py_
 if (value) {
 wchar_t* wvalue;
 Py_ssize_t size;
- wvalue = PyUnicode_AsUnicodeAndSize(value, &size);
+ wvalue = PyUnicode_AsWideCharAndSize(value, &size);
 if (wvalue == NULL)
 return NULL;
 if ((unsigned) size != size) {
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1502,6 +1502,7 @@ unicode_aswidecharstring(PyObject *self,
 static PyObject *
 unicode_encodedecimal(PyObject *self, PyObject *args)
 {
+ PyObject *obj;
 Py_UNICODE *unicode;
 Py_ssize_t length;
 char *errors = NULL;
@@ -1509,7 +1510,11 @@ unicode_encodedecimal(PyObject *self, Py
 Py_ssize_t decimal_length, new_length;
 int res;
 
- if (!PyArg_ParseTuple(args, "u#&#124;s", &unicode, &length, &errors))
+ if (!PyArg_ParseTuple(args, "U&#124;s", &obj, &errors))
+ return NULL;
+
+ unicode = PyUnicode_AsWideCharAndSize(obj, &length);
+ if (unicode == NULL)
 return NULL;
 
 decimal_length = length * 7; /* len('&#8364;') */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1639,14 +1639,14 @@ get_latin1_char(unsigned char ch)
 return unicode;
 }
 
-PyObject *
-PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
+static PyObject *
+unicode_fromwidechar(const Py_UNICODE *wstr, Py_ssize_t size)
 {
 PyObject *unicode;
 Py_UCS4 maxchar = 0;
 Py_ssize_t num_surrogates;
 
- if (u == NULL)
+ if (wstr == NULL)
 return (PyObject*)_PyUnicode_New(size);
 
 /* If the Unicode data is known at construction time, we can apply
@@ -1660,12 +1660,12 @@ PyUnicode_FromUnicode(const Py_UNICODE *
 
 /* Single character Unicode objects in the Latin-1 range are
 shared when using this constructor */
- if (size == 1 && *u < 256) - return get_latin1_char((unsigned char)*u); + if (size == 1 && wstr[0] < 256) + return get_latin1_char((unsigned char)wstr[0]); /* If not empty and not single character, copy the Unicode data into the new object */ - if (find_maxchar_surrogates(u, u + size, + if (find_maxchar_surrogates(wstr, wstr + size, &maxchar, &num_surrogates) == -1) return NULL; @@ -1674,27 +1674,30 @@ PyUnicode_FromUnicode(const Py_UNICODE * if (!unicode) return NULL; - switch (PyUnicode_KIND(unicode)) { + switch (PyUnicode_KIND(unicode)) + { case PyUnicode_1BYTE_KIND: _PyUnicode_CONVERT_BYTES(Py_UNICODE, unsigned char, - u, u + size, PyUnicode_1BYTE_DATA(unicode)); + wstr, + wstr + size, PyUnicode_1BYTE_DATA(unicode)); break; case PyUnicode_2BYTE_KIND: #if Py_UNICODE_SIZE == 2 - Py_MEMCPY(PyUnicode_2BYTE_DATA(unicode), u, size * 2); + Py_MEMCPY(PyUnicode_2BYTE_DATA(unicode), wstr, size * 2); #else _PyUnicode_CONVERT_BYTES(Py_UNICODE, Py_UCS2, - u, u + size, PyUnicode_2BYTE_DATA(unicode)); + wstr, + wstr + size, PyUnicode_2BYTE_DATA(unicode)); #endif break; case PyUnicode_4BYTE_KIND: #if SIZEOF_WCHAR_T == 2 /* This is the only case which has to process surrogates, thus a simple copy loop is not enough and we need a function. */ - unicode_convert_wchar_to_ucs4(u, u + size, unicode); + unicode_convert_wchar_to_ucs4(wstr, wstr + size, unicode); #else assert(num_surrogates == 0); - Py_MEMCPY(PyUnicode_4BYTE_DATA(unicode), u, size * 4); + Py_MEMCPY(PyUnicode_4BYTE_DATA(unicode), wstr, size * 4); #endif break; default: @@ -1705,6 +1708,17 @@ PyUnicode_FromUnicode(const Py_UNICODE * } PyObject * +PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) +{ + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_FromUnicode() has been deprecated", + 1)) + return NULL; + + return unicode_fromwidechar(u, size); +} + +PyObject * PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) { if (size < 0) { @@ -2113,7 +2127,7 @@ PyUnicode_AsUCS4Copy(PyObject *string) #ifdef HAVE_WCHAR_H PyObject * -PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) +PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) { if (w == NULL) { if (size == 0) @@ -2126,7 +2140,7 @@ PyUnicode_FromWideChar(register const wc size = wcslen(w); } - return PyUnicode_FromUnicode(w, size); + return unicode_fromwidechar(w, size); } #endif /* HAVE_WCHAR_H */ @@ -2729,7 +2743,7 @@ unicode_aswidechar(PyObject *unicode, Py_ssize_t res; const wchar_t *wstr; - wstr = PyUnicode_AsUnicodeAndSize(unicode, &res); + wstr = PyUnicode_AsWideCharAndSize(unicode, &res); if (wstr == NULL) return -1; @@ -3421,8 +3435,8 @@ static int unicode_as_unicode_calls = 0; #endif -Py_UNICODE * -PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size) +wchar_t* +PyUnicode_AsWideCharAndSize(PyObject *unicode, Py_ssize_t *size) { const unsigned char *one_byte; #if SIZEOF_WCHAR_T == 4 @@ -3439,105 +3453,150 @@ PyUnicode_AsUnicodeAndSize(PyObject *uni PyErr_BadArgument(); return NULL; } - if (_PyUnicode_WSTR(unicode) == NULL) { - /* Non-ASCII compact unicode object */ - assert(_PyUnicode_KIND(unicode) != 0); - assert(PyUnicode_IS_READY(unicode)); + + if (_PyUnicode_WSTR(unicode) != NULL) { + if (size != NULL) + *size = PyUnicode_WSTR_LENGTH(unicode); + return _PyUnicode_WSTR(unicode); + } + + /* Non-ASCII compact unicode object */ + assert(_PyUnicode_KIND(unicode) != 0); + assert(PyUnicode_IS_READY(unicode)); #ifdef Py_DEBUG - ++unicode_as_unicode_calls; -#endif - - if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { + ++unicode_as_unicode_calls; +#endif + + if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { #if SIZEOF_WCHAR_T == 2 - four_bytes = PyUnicode_4BYTE_DATA(unicode); - ucs4_end = four_bytes + _PyUnicode_LENGTH(unicode); - num_surrogates = 0; - - for (; four_bytes < ucs4_end; ++four_bytes) { - if (*four_bytes> 0xFFFF)
- ++num_surrogates;
- }
-
- _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(
- sizeof(wchar_t) * (_PyUnicode_LENGTH(unicode) + 1 + num_surrogates));
- if (!_PyUnicode_WSTR(unicode)) {
- PyErr_NoMemory();
- return NULL;
- }
- _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode) + num_surrogates;
-
- w = _PyUnicode_WSTR(unicode);
- wchar_end = w + _PyUnicode_WSTR_LENGTH(unicode);
- four_bytes = PyUnicode_4BYTE_DATA(unicode);
- for (; four_bytes < ucs4_end; ++four_bytes, ++w) { - if (*four_bytes> 0xFFFF) {
- assert(*four_bytes <= 0x10FFFF); - /* encode surrogate pair in this case */ - *w++ = Py_UNICODE_HIGH_SURROGATE(*four_bytes); - *w = Py_UNICODE_LOW_SURROGATE(*four_bytes); - } - else - *w = *four_bytes; - - if (w> wchar_end) {
- assert(0 && "Miscalculated string end");
- }
- }
+ four_bytes = PyUnicode_4BYTE_DATA(unicode);
+ ucs4_end = four_bytes + _PyUnicode_LENGTH(unicode);
+ num_surrogates = 0;
+
+ for (; four_bytes < ucs4_end; ++four_bytes) { + if (*four_bytes> 0xFFFF)
+ ++num_surrogates;
+ }
+
+ _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(
+ sizeof(wchar_t) * (_PyUnicode_LENGTH(unicode) + 1 + num_surrogates));
+ if (!_PyUnicode_WSTR(unicode)) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode) + num_surrogates;
+
+ w = _PyUnicode_WSTR(unicode);
+ wchar_end = w + _PyUnicode_WSTR_LENGTH(unicode);
+ four_bytes = PyUnicode_4BYTE_DATA(unicode);
+ for (; four_bytes < ucs4_end; ++four_bytes, ++w) { + if (*four_bytes> 0xFFFF) {
+ assert(*four_bytes <= 0x10FFFF); + /* encode surrogate pair in this case */ + *w++ = Py_UNICODE_HIGH_SURROGATE(*four_bytes); + *w = Py_UNICODE_LOW_SURROGATE(*four_bytes); + } + else + *w = *four_bytes; + + if (w> wchar_end) {
+ assert(0 && "Miscalculated string end");
+ }
+ }
+ *w = 0;
+#else
+ /* sizeof(wchar_t) == 4 */
+ Py_FatalError("Impossible unicode object state, wstr and str "
+ "should share memory already.");
+ return NULL;
+#endif
+ }
+ else {
+ _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
+ (_PyUnicode_LENGTH(unicode) + 1));
+ if (!_PyUnicode_WSTR(unicode)) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ if (!PyUnicode_IS_COMPACT_ASCII(unicode))
+ _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode);
+ w = _PyUnicode_WSTR(unicode);
+ wchar_end = w + _PyUnicode_LENGTH(unicode);
+
+ if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
+ one_byte = PyUnicode_1BYTE_DATA(unicode);
+ for (; w < wchar_end; ++one_byte, ++w)
+ *w = *one_byte;
+ /* null-terminate the wstr */
+ *w = 0;
+ }
+ else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+#if SIZEOF_WCHAR_T == 4
+ two_bytes = PyUnicode_2BYTE_DATA(unicode);
+ for (; w < wchar_end; ++two_bytes, ++w)
+ *w = *two_bytes;
+ /* null-terminate the wstr */
 *w = 0;
 #else
- /* sizeof(wchar_t) == 4 */
- Py_FatalError("Impossible unicode object state, wstr and str "
- "should share memory already.");
+ /* sizeof(wchar_t) == 2 */
+ PyObject_FREE(_PyUnicode_WSTR(unicode));
+ _PyUnicode_WSTR(unicode) = NULL;
+ Py_FatalError("Impossible unicode object state, wstr "
+ "and str should share memory already.");
 return NULL;
 #endif
 }
 else {
- _PyUnicode_WSTR(unicode) = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) *
- (_PyUnicode_LENGTH(unicode) + 1));
- if (!_PyUnicode_WSTR(unicode)) {
- PyErr_NoMemory();
- return NULL;
- }
- if (!PyUnicode_IS_COMPACT_ASCII(unicode))
- _PyUnicode_WSTR_LENGTH(unicode) = _PyUnicode_LENGTH(unicode);
- w = _PyUnicode_WSTR(unicode);
- wchar_end = w + _PyUnicode_LENGTH(unicode);
-
- if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
- one_byte = PyUnicode_1BYTE_DATA(unicode);
- for (; w < wchar_end; ++one_byte, ++w)
- *w = *one_byte;
- /* null-terminate the wstr */
- *w = 0;
- }
- else if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
-#if SIZEOF_WCHAR_T == 4
- two_bytes = PyUnicode_2BYTE_DATA(unicode);
- for (; w < wchar_end; ++two_bytes, ++w)
- *w = *two_bytes;
- /* null-terminate the wstr */
- *w = 0;
-#else
- /* sizeof(wchar_t) == 2 */
- PyObject_FREE(_PyUnicode_WSTR(unicode));
- _PyUnicode_WSTR(unicode) = NULL;
- Py_FatalError("Impossible unicode object state, wstr "
- "and str should share memory already.");
- return NULL;
-#endif
- }
- else {
- assert(0 && "This should never happen.");
- }
- }
- }
+ assert(0 && "This should never happen.");
+ }
+ }
+
 if (size != NULL)
 *size = PyUnicode_WSTR_LENGTH(unicode);
 return _PyUnicode_WSTR(unicode);
 }
 
 Py_UNICODE *
+PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
+{
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "PyUnicode_AsUnicodeAndSize() has been deprecated",
+ 1))
+ return NULL;
+ return PyUnicode_AsWideCharAndSize(unicode, size);
+}
+
+Py_UNICODE *
+PyUnicode_AS_UNICODE(PyObject *unicode)
+{
+ return PyUnicode_AsUnicodeAndSize(unicode, NULL);
+}
+
+Py_ssize_t
+PyUnicode_GET_SIZE(PyObject *unicode)
+{
+ Py_UNICODE *u;
+ assert(PyUnicode_Check(unicode));
+
+ if (PyErr_WarnEx(PyExc_DeprecationWarning,
+ "PyUnicode_GET_SIZE() has been deprecated",
+ 1))
+ return -1;
+
+ if (_PyUnicode_WSTR(unicode))
+ return PyUnicode_WSTR_LENGTH(unicode);
+ u = PyUnicode_AsUnicode(unicode);
+#ifdef Py_DEBUG
+ assert(u != NULL);
+#else
+ if (u == NULL)
+ return -1;
+#endif
+ return PyUnicode_WSTR_LENGTH(unicode);
+}
+
+Py_UNICODE *
 PyUnicode_AsUnicode(PyObject *unicode)
 {
 return PyUnicode_AsUnicodeAndSize(unicode, NULL);
</div><div class="naked_ctrl">
<form action="/index.cgi/contrast" method="get" name="gate">
<p><a href="http://altstyle.alfasado.net">AltStyle</a> によって変換されたページ <a href="https://bugs.python.org/file23845/unicode_warn_deprecate.patch">(-&gt;オリジナル)</a>
/ <label>アドレス: <input type="text" name="naked_post_url" value="https://bugs.python.org/file23845/unicode_warn_deprecate.patch" size="22" /></label> <label>モード: <select name="naked_post_mode">
<option value="default">デフォルト</option>
<option value="speech">音声ブラウザ</option>
<option value="ruby">ルビ付き</option>
<option value="contrast" selected="selected">配色反転</option>
<option value="larger-text">文字拡大</option>
<option value="mobile">モバイル</option>
</select>
<input type="submit" value="表示" />
</p>
</form>
</div>