diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -221,13 +221,32 @@ decompress a multi-stream input with :class:`LZMADecompressor`, you must create a new decompressor for each stream. - .. method:: decompress(data) + .. method:: decompress(data, max_length=-1) - Decompress *data* (a :class:`bytes` object), returning a :class:`bytes` - object containing the decompressed data for at least part of the input. - Some of *data* may be buffered internally, for use in later calls to - :meth:`decompress`. The returned data should be concatenated with the - output of any previous calls to :meth:`decompress`. + Decompresses *data* (a :term:`bytes-like object`), returning + uncompressed data as bytes. Some of *data* may be buffered + internally, for use in later calls to :meth:`decompress`. The + returned data should be concatenated with the output of any + previous calls to :meth:`decompress`. + + If *max_length* is nonnegative, returns at most *max_length* + bytes of decompressed data. If this limit is reached and further + output can be produced, the :attr:`~.needs_input` attribute will + be set to ``False``. In this case, the next call to + :meth:`~.decompress` may provide *data* as ``b''`` to obtain + more of the output. + + If all of the input data was decompressed and returned (either + because this was less than *max_length* bytes, or because + *max_length* was negative), the :attr:`~.needs_input` attribute + will be set to ``True``. + + Attempting to decompress data after the end of stream is reached + raises an `EOFError`. Any data found after the end of the + stream is ignored and saved in the :attr:`~.unused_data` attribute. + + .. versionchanged:: 3.5 + Added the *max_length* parameter. .. attribute:: check @@ -245,6 +264,12 @@ Before the end of the stream is reached, this will be ``b""``. + .. attribute:: needs_input + + ``False`` if the :meth:`.decompress` method can provide more + decompressed data before requiring new uncompressed input. + + .. versionadded:: 3.5 .. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None) diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py --- a/Lib/test/test_lzma.py +++ b/Lib/test/test_lzma.py @@ -135,6 +135,41 @@ self.assertTrue(lzd.eof) self.assertEqual(lzd.unused_data, b"") + def test_decompressor_chunks_maxsize(self): + lzd = LZMADecompressor() + max_length = 100 + out = [] + + # Feed first half the input + len_ = len(COMPRESSED_XZ) // 2 + out.append(lzd.decompress(COMPRESSED_XZ[:len_], + max_length=max_length)) + self.assertFalse(lzd.needs_input) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve more data without providing more input + out.append(lzd.decompress(b'', max_length=max_length)) + self.assertFalse(lzd.needs_input) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve more data while providing more input + out.append(lzd.decompress(COMPRESSED_XZ[len_:], + max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + # Retrieve remaining uncompressed data + while True: + if lzd.eof: + break + out.append(lzd.decompress(b'', max_length=max_length)) + self.assertLessEqual(len(out[-1]), max_length) + + out = b"".join(out) + self.assertEqual(out, INPUT) + self.assertEqual(lzd.check, lzma.CHECK_CRC64) + self.assertTrue(lzd.eof) + self.assertEqual(lzd.unused_data, b"") + def test_decompressor_unused_data(self): lzd = LZMADecompressor() extra = b"fooblibar" diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -66,6 +66,7 @@ int check; char eof; PyObject *unused_data; + PyObject *needs_input; #ifdef WITH_THREAD PyThread_type_lock lock; #endif @@ -142,10 +143,15 @@ #endif static int -grow_buffer(PyObject **buf) +grow_buffer(PyObject **buf, Py_ssize_t maxsize) { size_t size = PyBytes_GET_SIZE(*buf); - return _PyBytes_Resize(buf, size + (size>> 3) + 6); + size_t newsize = size + (size>> 3) + 6; + + if (maxsize>= 0 && newsize> maxsize) + newsize = maxsize; + + return _PyBytes_Resize(buf, newsize); } @@ -527,7 +533,7 @@ (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) { break; } else if (c->lzs.avail_out == 0) { - if (grow_buffer(&result) == -1) + if (grow_buffer(&result, -1) == -1) goto error; c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; @@ -889,16 +895,37 @@ /* LZMADecompressor class. */ static PyObject * -decompress(Decompressor *d, uint8_t *data, size_t len) +decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length) { size_t data_size = 0; PyObject *result; - result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + if (max_length < 0 || max_length>= INITIAL_BUFFER_SIZE) + result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE); + else + result = PyBytes_FromStringAndSize(NULL, max_length); if (result == NULL) return NULL; - d->lzs.next_in = data; - d->lzs.avail_in = len; + + // There may be some unconsumed input left + if (d->lzs.next_in != NULL && len != 0) { + uint8_t *data_full; + data_full = PyMem_Malloc(len + d->lzs.avail_in); + if (data_full == NULL) { + PyErr_SetNone(PyExc_MemoryError); + goto error; + } + memcpy(data_full, d->lzs.next_in, d->lzs.avail_in); + memcpy(data_full + d->lzs.avail_in, data, len); + PyMem_Free((void*)d->lzs.next_in); + d->lzs.next_in = data_full; + d->lzs.avail_in += len; + } + else if (len != 0) { + d->lzs.next_in = data; + d->lzs.avail_in = len; + } + d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result); d->lzs.avail_out = PyBytes_GET_SIZE(result); for (;;) { @@ -908,6 +935,13 @@ lzret = lzma_code(&d->lzs, LZMA_RUN); data_size = (char *)d->lzs.next_out - PyBytes_AS_STRING(result); Py_END_ALLOW_THREADS + if (max_length>= 0) { + max_length -= data_size; + if (max_length < 0) { + PyErr_SetString(Error, "Internal error in lzma module"); + goto error; + } + } if (catch_lzma_error(lzret)) goto error; if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) @@ -921,11 +955,34 @@ if (d->unused_data == NULL) goto error; } + d->lzs.next_in = NULL; break; } else if (d->lzs.avail_in == 0) { + d->lzs.next_in = NULL; + Py_DECREF(d->needs_input); + d->needs_input = Py_True; + Py_INCREF(d->needs_input); break; } else if (d->lzs.avail_out == 0) { - if (grow_buffer(&result) == -1) + if (max_length == 0) { + /* Reached maximum length of uncompressed data, need + to preserve remainder of compressed input for next + invocation */ + uint8_t *tail; + tail = PyMem_Malloc(d->lzs.avail_in); + if (tail == NULL) { + PyErr_SetNone(PyExc_MemoryError); + goto error; + } + memcpy(tail, d->lzs.next_in, d->lzs.avail_in); + d->lzs.next_in = tail; + + Py_DECREF(d->needs_input); + d->needs_input = Py_False; + Py_INCREF(d->needs_input); + break; + } + if (grow_buffer(&result, max_length) == -1) goto error; d->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size; d->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size; @@ -937,6 +994,7 @@ return result; error: + d->lzs.next_in = NULL; Py_XDECREF(result); return NULL; } @@ -946,20 +1004,27 @@ self: self(type="Decompressor *") data: Py_buffer - / + max_length: Py_ssize_t=-1 -Provide data to the decompressor object. +Decompresses *data*, returning uncompressed data as bytes. -Returns a chunk of decompressed data if possible, or b'' otherwise. +If *max_length* is nonnegative, returns at most *max_length* bytes of +decompressed data. If this limit is reached and further output can be +produced, *self.needs_input* will be set to ``False``. In this case, the next +call to *decompress()* may provide *data* as b'' to obtain more of the output. -Attempting to decompress data after the end of stream is reached -raises an EOFError. Any data found after the end of the stream -is ignored and saved in the unused_data attribute. +If all of the input data was decompressed and returned (either because this +was less than *max_length* bytes, or because *max_length* was negative), +*self.needs_input* will be set to True. + +Attempting to decompress data after the end of stream is reached raises an +EOFError. Any data found after the end of the stream is ignored and saved in +the unused_data attribute. [clinic start generated code]*/ static PyObject * -_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data) -/*[clinic end generated code: output=d86e78da7ff0ff21 input=50c4768b821bf0ef]*/ +_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data, Py_ssize_t max_length) +/*[clinic end generated code: output=1532a5bb23629001 input=262e4e217f49039b]*/ { PyObject *result = NULL; @@ -967,7 +1032,7 @@ if (self->eof) PyErr_SetString(PyExc_EOFError, "Already at end of stream"); else - result = decompress(self, data->buf, data->len); + result = decompress(self, data->buf, data->len, max_length); RELEASE_LOCK(self); return result; } @@ -1055,6 +1120,7 @@ self->alloc.alloc = PyLzma_Malloc; self->alloc.free = PyLzma_Free; self->lzs.allocator = &self->alloc; + self->lzs.next_in = NULL; #ifdef WITH_THREAD self->lock = PyThread_allocate_lock(); @@ -1065,6 +1131,8 @@ #endif self->check = LZMA_CHECK_UNKNOWN; + self->needs_input = Py_True; + Py_INCREF(self->needs_input); self->unused_data = PyBytes_FromStringAndSize(NULL, 0); if (self->unused_data == NULL) goto error; @@ -1113,8 +1181,12 @@ static void Decompressor_dealloc(Decompressor *self) { + if(self->lzs.next_in != NULL) + PyMem_Free((void*)self->lzs.next_in); + lzma_end(&self->lzs); Py_CLEAR(self->unused_data); + Py_CLEAR(self->needs_input); #ifdef WITH_THREAD if (self->lock != NULL) PyThread_free_lock(self->lock); @@ -1134,6 +1206,9 @@ PyDoc_STRVAR(Decompressor_eof_doc, "True if the end-of-stream marker has been reached."); +PyDoc_STRVAR(Decompressor_needs_input_doc, +"True more input is needed before more decompressed data can be provided"); + PyDoc_STRVAR(Decompressor_unused_data_doc, "Data found after the end of the compressed stream."); @@ -1142,6 +1217,8 @@ Decompressor_check_doc}, {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY, Decompressor_eof_doc}, + {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY, + Decompressor_needs_input_doc}, {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY, Decompressor_unused_data_doc}, {NULL} diff --git a/Modules/clinic/_lzmamodule.c.h b/Modules/clinic/_lzmamodule.c.h --- a/Modules/clinic/_lzmamodule.c.h +++ b/Modules/clinic/_lzmamodule.c.h @@ -62,34 +62,43 @@ } PyDoc_STRVAR(_lzma_LZMADecompressor_decompress__doc__, -"decompress($self, data, /)\n" +"decompress($self, /, data, max_length=-1)\n" "--\n" "\n" -"Provide data to the decompressor object.\n" +"Decompresses *data*, returning uncompressed data as bytes.\n" "\n" -"Returns a chunk of decompressed data if possible, or b\'\' otherwise.\n" +"If *max_length* is nonnegative, returns at most *max_length* bytes of\n" +"decompressed data. If this limit is reached and further output can be\n" +"produced, *self.needs_input* will be set to ``False``. In this case, the next\n" +"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n" "\n" -"Attempting to decompress data after the end of stream is reached\n" -"raises an EOFError. Any data found after the end of the stream\n" -"is ignored and saved in the unused_data attribute."); +"If all of the input data was decompressed and returned (either because this\n" +"was less than *max_length* bytes, or because *max_length* was negative),\n" +"*self.needs_input* will be set to True.\n" +"\n" +"Attempting to decompress data after the end of stream is reached raises an\n" +"EOFError. Any data found after the end of the stream is ignored and saved in\n" +"the unused_data attribute."); #define _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF \ - {"decompress", (PyCFunction)_lzma_LZMADecompressor_decompress, METH_VARARGS, _lzma_LZMADecompressor_decompress__doc__}, + {"decompress", (PyCFunction)_lzma_LZMADecompressor_decompress, METH_VARARGS|METH_KEYWORDS, _lzma_LZMADecompressor_decompress__doc__}, static PyObject * -_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data); +_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data, Py_ssize_t max_length); static PyObject * -_lzma_LZMADecompressor_decompress(Decompressor *self, PyObject *args) +_lzma_LZMADecompressor_decompress(Decompressor *self, PyObject *args, PyObject *kwargs) { PyObject *return_value = NULL; + static char *_keywords[] = {"data", "max_length", NULL}; Py_buffer data = {NULL, NULL}; + Py_ssize_t max_length = -1; - if (!PyArg_ParseTuple(args, - "y*:decompress", - &data)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "y*|n:decompress", _keywords, + &data, &max_length)) goto exit; - return_value = _lzma_LZMADecompressor_decompress_impl(self, &data); + return_value = _lzma_LZMADecompressor_decompress_impl(self, &data, max_length); exit: /* Cleanup for data */ @@ -242,4 +251,4 @@ return return_value; } -/*[clinic end generated code: output=808fec8216ac712b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d17fac38b09626d8 input=a9049054013a1b77]*/

AltStyle によって変換されたページ (->オリジナル) /