[Python-checkins] cpython: Fix 64-bit safety issue in BZ2Compressor and BZ2Decompressor.

nadeem.vawda python-checkins at python.org
Tue Apr 12 23:05:40 CEST 2011


http://hg.python.org/cpython/rev/0010cc5f22d4
changeset: 69275:0010cc5f22d4
user: Nadeem Vawda <nadeem.vawda at gmail.com>
date: Tue Apr 12 23:02:42 2011 +0200
summary:
 Fix 64-bit safety issue in BZ2Compressor and BZ2Decompressor.
files:
 Lib/test/test_bz2.py | 36 +++++++++++++++++++++++++++++++-
 Modules/_bz2module.c | 33 +++++++++++++++++++++--------
 2 files changed, 59 insertions(+), 10 deletions(-)
diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py
--- a/Lib/test/test_bz2.py
+++ b/Lib/test/test_bz2.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 from test import support
-from test.support import TESTFN
+from test.support import TESTFN, precisionbigmemtest, _4G
 
 import unittest
 from io import BytesIO
 import os
+import random
 import subprocess
 import sys
 
@@ -415,6 +416,23 @@
 data += bz2c.flush()
 self.assertEqual(self.decompress(data), self.TEXT)
 
+ @precisionbigmemtest(size=_4G + 100, memuse=2)
+ def testCompress4G(self, size):
+ # "Test BZ2Compressor.compress()/flush() with >4GiB input"
+ bz2c = BZ2Compressor()
+ data = b"x" * size
+ try:
+ compressed = bz2c.compress(data)
+ compressed += bz2c.flush()
+ finally:
+ data = None # Release memory
+ data = bz2.decompress(compressed)
+ try:
+ self.assertEqual(len(data), size)
+ self.assertEqual(len(data.strip(b"x")), 0)
+ finally:
+ data = None
+
 class BZ2DecompressorTest(BaseTest):
 def test_Constructor(self):
 self.assertRaises(TypeError, BZ2Decompressor, 42)
@@ -453,6 +471,22 @@
 text = bz2d.decompress(self.DATA)
 self.assertRaises(EOFError, bz2d.decompress, b"anything")
 
+ @precisionbigmemtest(size=_4G + 100, memuse=3)
+ def testDecompress4G(self, size):
+ # "Test BZ2Decompressor.decompress() with >4GiB input"
+ blocksize = 10 * 1024 * 1024
+ block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little')
+ try:
+ data = block * (size // blocksize + 1)
+ compressed = bz2.compress(data)
+ bz2d = BZ2Decompressor()
+ decompressed = bz2d.decompress(compressed)
+ self.assertTrue(decompressed == data)
+ finally:
+ data = None
+ compressed = None
+ decompressed = None
+
 
 class FuncTest(BaseTest):
 "Test module functions"
diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c
--- a/Modules/_bz2module.c
+++ b/Modules/_bz2module.c
@@ -36,6 +36,8 @@
 #define RELEASE_LOCK(obj)
 #endif
 
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
 
 typedef struct {
 PyObject_HEAD
@@ -145,8 +147,10 @@
 if (result == NULL)
 return NULL;
 c->bzs.next_in = data;
- /* FIXME This is not 64-bit clean - avail_in is an int. */
- c->bzs.avail_in = len;
+ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
+ Do compression in chunks of no more than UINT_MAX bytes each. */
+ c->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= c->bzs.avail_in;
 c->bzs.next_out = PyBytes_AS_STRING(result);
 c->bzs.avail_out = PyBytes_GET_SIZE(result);
 for (;;) {
@@ -161,6 +165,11 @@
 if (catch_bz2_error(bzerror))
 goto error;
 
+ if (c->bzs.avail_in == 0 && len > 0) {
+ c->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= c->bzs.avail_in;
+ }
+
 /* In regular compression mode, stop when input data is exhausted.
 In flushing mode, stop when all buffered data has been flushed. */
 if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
@@ -354,8 +363,10 @@
 if (result == NULL)
 return result;
 d->bzs.next_in = data;
- /* FIXME This is not 64-bit clean - avail_in is an int. */
- d->bzs.avail_in = len;
+ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
+ Do decompression in chunks of no more than UINT_MAX bytes each. */
+ d->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= d->bzs.avail_in;
 d->bzs.next_out = PyBytes_AS_STRING(result);
 d->bzs.avail_out = PyBytes_GET_SIZE(result);
 for (;;) {
@@ -371,17 +382,21 @@
 goto error;
 if (bzerror == BZ_STREAM_END) {
 d->eof = 1;
- if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */
+ len += d->bzs.avail_in;
+ if (len > 0) { /* Save leftover input to unused_data */
 Py_CLEAR(d->unused_data);
- d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in,
- d->bzs.avail_in);
+ d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
 if (d->unused_data == NULL)
 goto error;
 }
 break;
 }
- if (d->bzs.avail_in == 0)
- break;
+ if (d->bzs.avail_in == 0) {
+ if (len == 0)
+ break;
+ d->bzs.avail_in = MIN(len, UINT_MAX);
+ len -= d->bzs.avail_in;
+ }
 if (d->bzs.avail_out == 0) {
 if (grow_buffer(&result) < 0)
 goto error;
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /