[Python-checkins] bpo-40943: PY_SSIZE_T_CLEAN required for '#' formats (GH-20784)

Fri Jun 19 05:45:40 EDT 2020

https://github.com/python/cpython/commit/37bb2895561d3e63a631f10875567b4e33b30c07
commit: 37bb2895561d3e63a631f10875567b4e33b30c07
branch: master
author: Victor Stinner <vstinner at python.org>
committer: GitHub <noreply at github.com>
date: 2020年06月19日T11:45:31+02:00
summary:
bpo-40943: PY_SSIZE_T_CLEAN required for '#' formats (GH-20784)
The PY_SSIZE_T_CLEAN macro must now be defined to use
PyArg_ParseTuple() and Py_BuildValue() "#" formats: "es#", "et#",
"s#", "u#", "y#", "z#", "U#" and "Z#". See the PEP 353.
Update _testcapi.test_buildvalue_issue38913().
files:
A Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst
M Doc/c-api/arg.rst
M Doc/whatsnew/3.10.rst
M Modules/_testcapimodule.c
M Python/getargs.c
M Python/modsupport.c

diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst
index b7baad589a72c..26e872c5a348e 100644
--- a/Doc/c-api/arg.rst
+++ b/Doc/c-api/arg.rst
@@ -55,13 +55,11 @@ which disallows mutable objects such as :class:`bytearray`.
 
 .. note::
 
- For all ``#`` variants of formats (``s#``, ``y#``, etc.), the type of
- the length argument (int or :c:type:`Py_ssize_t`) is controlled by
- defining the macro :c:macro:`PY_SSIZE_T_CLEAN` before including
- :file:`Python.h`. If the macro was defined, length is a
- :c:type:`Py_ssize_t` rather than an :c:type:`int`. This behavior will change
- in a future Python version to only support :c:type:`Py_ssize_t` and
- drop :c:type:`int` support. It is best to always define :c:macro:`PY_SSIZE_T_CLEAN`.
+ For all ``#`` variants of formats (``s#``, ``y#``, etc.), the macro
+ :c:macro:`PY_SSIZE_T_CLEAN` must be defined before including
+ :file:`Python.h`. On Python 3.9 and older, the type of the length argument
+ is :c:type:`Py_ssize_t` if the :c:macro:`PY_SSIZE_T_CLEAN` macro is defined,
+ or int otherwise.
 
 
 ``s`` (:class:`str`) [const char \*]
@@ -90,7 +88,7 @@ which disallows mutable objects such as :class:`bytearray`.
 In this case the resulting C string may contain embedded NUL bytes.
 Unicode objects are converted to C strings using ``'utf-8'`` encoding.
 
-``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
+``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
 Like ``s*``, except that it doesn't accept mutable objects.
 The result is stored into two C variables,
 the first one a pointer to a C string, the second one its length.
@@ -105,7 +103,7 @@ which disallows mutable objects such as :class:`bytearray`.
 Like ``s*``, but the Python object may also be ``None``, in which case the
 ``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``.
 
-``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
 Like ``s#``, but the Python object may also be ``None``, in which case the C
 pointer is set to ``NULL``.
 
@@ -124,7 +122,7 @@ which disallows mutable objects such as :class:`bytearray`.
 bytes-like objects. **This is the recommended way to accept
 binary data.**
 
-``y#`` (read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`]
+``y#`` (read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`]
 This variant on ``s#`` doesn't accept Unicode objects, only bytes-like
 objects.
 
@@ -155,7 +153,7 @@ which disallows mutable objects such as :class:`bytearray`.
 Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
 :c:func:`PyUnicode_AsWideCharString`.
 
-``u#`` (:class:`str`) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
+``u#`` (:class:`str`) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
 This variant on ``u`` stores into two C variables, the first one a pointer to a
 Unicode data buffer, the second one its length. This variant allows
 null code points.
@@ -172,7 +170,7 @@ which disallows mutable objects such as :class:`bytearray`.
 Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using
 :c:func:`PyUnicode_AsWideCharString`.
 
-``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`]
+``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, :c:type:`Py_ssize_t`]
 Like ``u#``, but the Python object may also be ``None``, in which case the
 :c:type:`Py_UNICODE` pointer is set to ``NULL``.
 
@@ -213,7 +211,7 @@ which disallows mutable objects such as :class:`bytearray`.
 recoding them. Instead, the implementation assumes that the byte string object uses
 the encoding passed in as parameter.
 
-``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
+``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
 This variant on ``s#`` is used for encoding Unicode into a character buffer.
 Unlike the ``es`` format, this variant allows input data which contains NUL
 characters.
@@ -244,7 +242,7 @@ which disallows mutable objects such as :class:`bytearray`.
 In both cases, *\*buffer_length* is set to the length of the encoded data
 without the trailing NUL byte.
 
-``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length]
+``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length]
 Same as ``es#`` except that byte string objects are passed through without recoding
 them. Instead, the implementation assumes that the byte string object uses the
 encoding passed in as parameter.
@@ -549,7 +547,7 @@ Building values
 Convert a null-terminated C string to a Python :class:`str` object using ``'utf-8'``
 encoding. If the C string pointer is ``NULL``, ``None`` is used.
 
- ``s#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``s#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
 Convert a C string and its length to a Python :class:`str` object using ``'utf-8'``
 encoding. If the C string pointer is ``NULL``, the length is ignored and
 ``None`` is returned.
@@ -558,14 +556,14 @@ Building values
 This converts a C string to a Python :class:`bytes` object. If the C
 string pointer is ``NULL``, ``None`` is returned.
 
- ``y#`` (:class:`bytes`) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``y#`` (:class:`bytes`) [const char \*, :c:type:`Py_ssize_t`]
 This converts a C string and its lengths to a Python object. If the C
 string pointer is ``NULL``, ``None`` is returned.
 
 ``z`` (:class:`str` or ``None``) [const char \*]
 Same as ``s``.
 
- ``z#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
 Same as ``s#``.
 
 ``u`` (:class:`str`) [const wchar_t \*]
@@ -573,7 +571,7 @@ Building values
 data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``,
 ``None`` is returned.
 
- ``u#`` (:class:`str`) [const wchar_t \*, int or :c:type:`Py_ssize_t`]
+ ``u#`` (:class:`str`) [const wchar_t \*, :c:type:`Py_ssize_t`]
 Convert a Unicode (UTF-16 or UCS-4) data buffer and its length to a Python
 Unicode object. If the Unicode buffer pointer is ``NULL``, the length is ignored
 and ``None`` is returned.
@@ -581,7 +579,7 @@ Building values
 ``U`` (:class:`str` or ``None``) [const char \*]
 Same as ``s``.
 
- ``U#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`]
+ ``U#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`]
 Same as ``s#``.
 
 ``i`` (:class:`int`) [int]
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 566827bf90ff3..9c1dca1152a64 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -155,6 +155,13 @@ New Features
 Porting to Python 3.10
 ----------------------
 
+* The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
+ :c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use
+ ``#``: ``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
+ See :ref:`Parsing arguments and building values
+ <arg-parsing>` and the :pep:`353`.
+ (Contributed by Victor Stinner in :issue:`40943`.)
+
 * Since :c:func:`Py_TYPE()` is changed to the inline static function,
 ``Py_TYPE(obj) = new_type`` must be replaced with ``Py_SET_TYPE(obj, new_type)``:
 see :c:func:`Py_SET_TYPE()` (available since Python 3.9). For backward
diff --git a/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst
new file mode 100644
index 0000000000000..360ddae34cb96
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst	
@@ -0,0 +1,5 @@
+The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use
+:c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use ``#``:
+``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``.
+See :ref:`Parsing arguments and building values <arg-parsing>` and the
+:pep:`353`.
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 5302641a9a37e..808483ebd7bf4 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -6868,29 +6868,36 @@ test_buildvalue_issue38913(PyObject *self, PyObject *Py_UNUSED(ignored))
 PyObject *res;
 const char str[] = "string";
 const Py_UNICODE unicode[] = L"unicode";
- PyErr_SetNone(PyExc_ZeroDivisionError);
+ assert(!PyErr_Occurred());
 
 res = Py_BuildValue("(s#O)", str, 1, Py_None);
 assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
 return NULL;
 }
+ PyErr_Clear();
+
 res = Py_BuildValue("(z#O)", str, 1, Py_None);
 assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
 return NULL;
 }
+ PyErr_Clear();
+
 res = Py_BuildValue("(y#O)", str, 1, Py_None);
 assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
 return NULL;
 }
+ PyErr_Clear();
+
 res = Py_BuildValue("(u#O)", unicode, 1, Py_None);
 assert(res == NULL);
- if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) {
+ if (!PyErr_ExceptionMatches(PyExc_SystemError)) {
 return NULL;
 }
-
 PyErr_Clear();
+
+
 Py_RETURN_NONE;
 }
diff --git a/Python/getargs.c b/Python/getargs.c
index cf0cc0783687a..aaf687a46b7f6 100644
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -656,27 +656,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 char *msgbuf, size_t bufsize, freelist_t *freelist)
 {
 /* For # codes */
-#define FETCH_SIZE int *q=NULL;Py_ssize_t *q2=NULL;\
- if (flags & FLAG_SIZE_T) q2=va_arg(*p_va, Py_ssize_t*); \
- else { \
- if (PyErr_WarnEx(PyExc_DeprecationWarning, \
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { \
- return NULL; \
- } \
- q=va_arg(*p_va, int*); \
- }
-#define STORE_SIZE(s) \
- if (flags & FLAG_SIZE_T) \
- *q2=s; \
- else { \
- if (INT_MAX < s) { \
- PyErr_SetString(PyExc_OverflowError, \
- "size does not fit in an int"); \
- return converterr("", arg, msgbuf, bufsize); \
- } \
- *q = (int)s; \
- }
-#define BUFFER_LEN ((flags & FLAG_SIZE_T) ? *q2:*q)
+#define REQUIRE_PY_SSIZE_T_CLEAN \
+ if (!(flags & FLAG_SIZE_T)) { \
+ PyErr_SetString(PyExc_SystemError, \
+ "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
+ return NULL; \
+ }
 #define RETURN_ERR_OCCURRED return msgbuf
 
 const char *format = *p_format;
@@ -931,8 +916,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 if (count < 0)
 return converterr(buf, arg, msgbuf, bufsize);
 if (*format == '#') {
- FETCH_SIZE;
- STORE_SIZE(count);
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
+ *psize = count;
 format++;
 } else {
 if (strlen(*p) != (size_t)count) {
@@ -974,11 +960,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 } else if (*format == '#') { /* a string or read-only bytes-like object */
 /* "s#" or "z#" */
 const void **p = (const void **)va_arg(*p_va, const char **);
- FETCH_SIZE;
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
 
 if (c == 'z' && arg == Py_None) {
 *p = NULL;
- STORE_SIZE(0);
+ *psize = 0;
 }
 else if (PyUnicode_Check(arg)) {
 Py_ssize_t len;
@@ -987,7 +974,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 return converterr(CONV_UNICODE,
 arg, msgbuf, bufsize);
 *p = sarg;
- STORE_SIZE(len);
+ *psize = len;
 }
 else { /* read-only bytes-like object */
 /* XXX Really? */
@@ -995,7 +982,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 Py_ssize_t count = convertbuffer(arg, p, &buf);
 if (count < 0)
 return converterr(buf, arg, msgbuf, bufsize);
- STORE_SIZE(count);
+ *psize = count;
 }
 format++;
 } else {
@@ -1034,18 +1021,19 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
 
 if (*format == '#') {
 /* "u#" or "Z#" */
- FETCH_SIZE;
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
 
 if (c == 'Z' && arg == Py_None) {
 *p = NULL;
- STORE_SIZE(0);
+ *psize = 0;
 }
 else if (PyUnicode_Check(arg)) {
 Py_ssize_t len;
 *p = PyUnicode_AsUnicodeAndSize(arg, &len);
 if (*p == NULL)
 RETURN_ERR_OCCURRED;
- STORE_SIZE(len);
+ *psize = len;
 }
 else
 return converterr(c == 'Z' ? "str or None" : "str",
@@ -1160,22 +1148,11 @@ _Py_COMP_DIAG_POP
 trailing 0-byte
 
 */
- int *q = NULL; Py_ssize_t *q2 = NULL;
- if (flags & FLAG_SIZE_T) {
- q2 = va_arg(*p_va, Py_ssize_t*);
- }
- else {
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1))
- {
- Py_DECREF(s);
- return NULL;
- }
- q = va_arg(*p_va, int*);
- }
+ REQUIRE_PY_SSIZE_T_CLEAN;
+ Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*);
 
 format++;
- if (q == NULL && q2 == NULL) {
+ if (psize == NULL) {
 Py_DECREF(s);
 return converterr(
 "(buffer_len is NULL)",
@@ -1195,30 +1172,20 @@ _Py_COMP_DIAG_POP
 arg, msgbuf, bufsize);
 }
 } else {
- if (size + 1 > BUFFER_LEN) {
+ if (size + 1 > *psize) {
 Py_DECREF(s);
 PyErr_Format(PyExc_ValueError,
 "encoded string too long "
 "(%zd, maximum length %zd)",
- (Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1));
+ (Py_ssize_t)size, (Py_ssize_t)(*psize - 1));
 RETURN_ERR_OCCURRED;
 }
 }
 memcpy(*buffer, ptr, size+1);
 
- if (flags & FLAG_SIZE_T) {
- *q2 = size;
- }
- else {
- if (INT_MAX < size) {
- Py_DECREF(s);
- PyErr_SetString(PyExc_OverflowError,
- "size does not fit in an int");
- return converterr("", arg, msgbuf, bufsize);
- }
- *q = (int)size;
- }
- } else {
+ *psize = size;
+ }
+ else {
 /* Using a 0-terminated buffer:
 
 - the encoded string has to be 0-terminated
@@ -1356,9 +1323,7 @@ _Py_COMP_DIAG_POP
 *p_format = format;
 return NULL;
 
-#undef FETCH_SIZE
-#undef STORE_SIZE
-#undef BUFFER_LEN
+#undef REQUIRE_PY_SSIZE_T_CLEAN
 #undef RETURN_ERR_OCCURRED
 }
 
diff --git a/Python/modsupport.c b/Python/modsupport.c
index 845bdcb2b6f1b..2637039d4a151 100644
--- a/Python/modsupport.c
+++ b/Python/modsupport.c
@@ -283,6 +283,13 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n, int
 static PyObject *
 do_mkvalue(const char **p_format, va_list *p_va, int flags)
 {
+#define ERROR_NEED_PY_SSIZE_T_CLEAN \
+ { \
+ PyErr_SetString(PyExc_SystemError, \
+ "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \
+ return NULL; \
+ }
+
 for (;;) {
 switch (*(*p_format)++) {
 case '(':
@@ -341,14 +348,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
 Py_ssize_t n;
 if (**p_format == '#') {
 ++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
 n = va_arg(*p_va, Py_ssize_t);
+ }
 else {
 n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
 }
 }
 else
@@ -394,14 +399,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
 Py_ssize_t n;
 if (**p_format == '#') {
 ++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
 n = va_arg(*p_va, Py_ssize_t);
+ }
 else {
 n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
 }
 }
 else
@@ -432,14 +435,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
 Py_ssize_t n;
 if (**p_format == '#') {
 ++*p_format;
- if (flags & FLAG_SIZE_T)
+ if (flags & FLAG_SIZE_T) {
 n = va_arg(*p_va, Py_ssize_t);
+ }
 else {
 n = va_arg(*p_va, int);
- if (PyErr_WarnEx(PyExc_DeprecationWarning,
- "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) {
- return NULL;
- }
+ ERROR_NEED_PY_SSIZE_T_CLEAN;
 }
 }
 else
@@ -507,6 +508,8 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
 
 }
 }
+
+#undef ERROR_NEED_PY_SSIZE_T_CLEAN
 }