[Python-checkins] [3.10] bpo-38256: Fix binascii.crc32 large input. (GH-32000) (GH-32013)
gpshead
webhook-mailer at python.org
Sun Mar 20 17:47:06 EDT 2022
https://github.com/python/cpython/commit/4c989e19c84ec224655bbbde9422e16d4a838a80
commit: 4c989e19c84ec224655bbbde9422e16d4a838a80
branch: 3.10
author: Gregory P. Smith <greg at krypto.org>
committer: gpshead <greg at krypto.org>
date: 2022年03月20日T14:46:52-07:00
summary:
[3.10] bpo-38256: Fix binascii.crc32 large input. (GH-32000) (GH-32013)
Inputs >= 4GiB to `binascii.crc32(...)` when compiled to use the zlib
crc32 implementation (the norm on POSIX) no longer return the wrong
result.
files:
A Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst
M Lib/test/test_binascii.py
M Modules/binascii.c
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 4d1bf2cce1f1e..13d4e67e586e9 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -4,7 +4,7 @@
import binascii
import array
import re
-from test.support import warnings_helper
+from test.support import bigmemtest, _1G, _4G, warnings_helper
# Note: "*_hex" functions are aliases for "(un)hexlify"
@@ -449,6 +449,14 @@ class BytearrayBinASCIITest(BinASCIITest):
class MemoryviewBinASCIITest(BinASCIITest):
type2test = memoryview
+class ChecksumBigBufferTestCase(unittest.TestCase):
+ """bpo-38256 - check that inputs >=4 GiB are handled correctly."""
+
+ @bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
+ def test_big_buffer(self, size):
+ data = b"nyan" * (_1G + 1)
+ self.assertEqual(binascii.crc32(data), 1044521549)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst
new file mode 100644
index 0000000000000..d9b57513b0631
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst
@@ -0,0 +1,5 @@
+Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to
+work properly on inputs 4+GiB in length instead of returning the wrong
+result. The workaround prior to this was to always feed the function
+data in increments smaller than 4GiB or to just call the zlib module
+function.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 1f3248b6049b3..3777580a79f2a 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1120,16 +1120,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
#ifdef USE_ZLIB_CRC32
-/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
+/* The same core as zlibmodule.c zlib_crc32_impl. */
{
- const Byte *buf;
- Py_ssize_t len;
- int signed_val;
-
- buf = (Byte*)data->buf;
- len = data->len;
- signed_val = crc32(crc, buf, len);
- return (unsigned int)signed_val & 0xffffffffU;
+ unsigned char *buf = data->buf;
+ Py_ssize_t len = data->len;
+
+ /* Avoid truncation of length for very large buffers. crc32() takes
+ length as an unsigned int, which may be narrower than Py_ssize_t. */
+ while ((size_t)len > UINT_MAX) {
+ crc = crc32(crc, buf, UINT_MAX);
+ buf += (size_t) UINT_MAX;
+ len -= (size_t) UINT_MAX;
+ }
+ crc = crc32(crc, buf, (unsigned int)len);
+ return crc & 0xffffffff;
}
#else /* USE_ZLIB_CRC32 */
{ /* By Jim Ahlstrom; All rights transferred to CNRI */
More information about the Python-checkins
mailing list