[Python-checkins] cpython: Add lzma.{encode,decode}_filter_properties().

Sun May 6 23:07:51 CEST 2012

http://hg.python.org/cpython/rev/9118ef2b651a
changeset: 76808:9118ef2b651a
parent: 76806:6e799c0091db
user: Nadeem Vawda <nadeem.vawda at gmail.com>
date: Sun May 06 23:01:27 2012 +0200
summary:
 Add lzma.{encode,decode}_filter_properties().
files:
 Doc/library/lzma.rst | 26 ++++
 Lib/lzma.py | 1 +
 Lib/test/test_lzma.py | 43 ++++++
 Modules/_lzmamodule.c | 186 +++++++++++++++++++++++++++++-
 4 files changed, 252 insertions(+), 4 deletions(-)

diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst
--- a/Doc/library/lzma.rst
+++ b/Doc/library/lzma.rst
@@ -235,6 +235,32 @@
 feature set.
 
 
+.. function:: encode_filter_properties(filter)
+
+ Return a :class:`bytes` object encoding the options (properties) of the
+ filter specified by *filter* (a dictionary).
+
+ *filter* is interpreted as a filter specifier, as described in
+ :ref:`filter-chain-specs`.
+
+ The returned data does not include the filter ID itself, only the options.
+
+ This function is primarily of interest to users implementing custom file
+ formats.
+
+
+.. function:: decode_filter_properties(filter_id, encoded_props)
+
+ Return a dictionary describing a filter with ID *filter_id*, and options
+ (properties) decoded from the :class:`bytes` object *encoded_props*.
+
+ The returned dictionary is a filter specifier, as described in
+ :ref:`filter-chain-specs`.
+
+ This function is primarily of interest to users implementing custom file
+ formats.
+
+
 .. _filter-chain-specs:
 
 Specifying custom filter chains
diff --git a/Lib/lzma.py b/Lib/lzma.py
--- a/Lib/lzma.py
+++ b/Lib/lzma.py
@@ -19,6 +19,7 @@
 
 "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
 "compress", "decompress", "check_is_supported",
+ "encode_filter_properties", "decode_filter_properties",
 ]
 
 import io
diff --git a/Lib/test/test_lzma.py b/Lib/test/test_lzma.py
--- a/Lib/test/test_lzma.py
+++ b/Lib/test/test_lzma.py
@@ -944,6 +944,49 @@
 # This value should not be a valid check ID.
 self.assertFalse(lzma.check_is_supported(lzma.CHECK_UNKNOWN))
 
+ def test_encode_filter_properties(self):
+ with self.assertRaises(TypeError):
+ lzma.encode_filter_properties(b"not a dict")
+ with self.assertRaises(ValueError):
+ lzma.encode_filter_properties({"id": 0x100})
+ with self.assertRaises(ValueError):
+ lzma.encode_filter_properties({"id": lzma.FILTER_LZMA2, "junk": 12})
+ with self.assertRaises(lzma.LZMAError):
+ lzma.encode_filter_properties({"id": lzma.FILTER_DELTA,
+ "dist": 9001})
+
+ # Test with parameters used by zipfile module.
+ props = lzma.encode_filter_properties({
+ "id": lzma.FILTER_LZMA1,
+ "pb": 2,
+ "lp": 0,
+ "lc": 3,
+ "dict_size": 8 << 20,
+ })
+ self.assertEqual(props, b"]\x00\x00\x80\x00")
+
+ def test_decode_filter_properties(self):
+ with self.assertRaises(TypeError):
+ lzma.decode_filter_properties(lzma.FILTER_X86, {"should be": bytes})
+ with self.assertRaises(lzma.LZMAError):
+ lzma.decode_filter_properties(lzma.FILTER_DELTA, b"too long")
+
+ # Test with parameters used by zipfile module.
+ filterspec = lzma.decode_filter_properties(
+ lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00")
+ self.assertEqual(filterspec["id"], lzma.FILTER_LZMA1)
+ self.assertEqual(filterspec["pb"], 2)
+ self.assertEqual(filterspec["lp"], 0)
+ self.assertEqual(filterspec["lc"], 3)
+ self.assertEqual(filterspec["dict_size"], 8 << 20)
+
+ def test_filter_properties_roundtrip(self):
+ spec1 = lzma.decode_filter_properties(
+ lzma.FILTER_LZMA1, b"]\x00\x00\x80\x00")
+ reencoded = lzma.encode_filter_properties(spec1)
+ spec2 = lzma.decode_filter_properties(lzma.FILTER_LZMA1, reencoded)
+ self.assertEqual(spec1, spec2)
+
 
 # Test data:
 
diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c
--- a/Modules/_lzmamodule.c
+++ b/Modules/_lzmamodule.c
@@ -137,6 +137,9 @@
 uint32_t - the "I" (unsigned int) specifier is the right size, but
 silently ignores overflows on conversion.
 
+ lzma_vli - the "K" (unsigned PY_LONG_LONG) specifier is the right
+ size, but like "I" it silently ignores overflows on conversion.
+
 lzma_mode and lzma_match_finder - these are enumeration types, and
 so the size of each is implementation-defined. Worse, different
 enum types can be of different sizes within the same program, so
@@ -147,12 +150,12 @@
 static int \
 FUNCNAME(PyObject *obj, void *ptr) \
 { \
- unsigned long val; \
+ unsigned PY_LONG_LONG val; \
 \
- val = PyLong_AsUnsignedLong(obj); \
+ val = PyLong_AsUnsignedLongLong(obj); \
 if (PyErr_Occurred()) \
 return 0; \
- if ((unsigned long)(TYPE)val != val) { \
+ if ((unsigned PY_LONG_LONG)(TYPE)val != val) { \
 PyErr_SetString(PyExc_OverflowError, \
 "Value too large for " #TYPE " type"); \
 return 0; \
@@ -162,13 +165,17 @@
 }
 
 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
+INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
 
 #undef INT_TYPE_CONVERTER_FUNC
 
 
-/* Filter specifier parsing functions. */
+/* Filter specifier parsing.
+
+ This code handles converting filter specifiers (Python dicts) into
+ the C lzma_filter structs expected by liblzma. */
 
 static void *
 parse_filter_spec_lzma(PyObject *spec)
@@ -358,6 +365,88 @@
 }
 
 
+/* Filter specifier construction.
+
+ This code handles converting C lzma_filter structs into
+ Python-level filter specifiers (represented as dicts). */
+
+static int
+spec_add_field(PyObject *spec, _Py_Identifier *key, unsigned PY_LONG_LONG value)
+{
+ int status;
+ PyObject *value_object;
+
+ value_object = PyLong_FromUnsignedLongLong(value);
+ if (value_object == NULL)
+ return -1;
+
+ status = _PyDict_SetItemId(spec, key, value_object);
+ Py_DECREF(value_object);
+ return status;
+}
+
+static PyObject *
+build_filter_spec(const lzma_filter *f)
+{
+ PyObject *spec;
+
+ spec = PyDict_New();
+ if (spec == NULL)
+ return NULL;
+
+#define ADD_FIELD(SOURCE, FIELD) \
+ do { \
+ _Py_IDENTIFIER(FIELD); \
+ if (spec_add_field(spec, &PyId_##FIELD, SOURCE->FIELD) == -1) \
+ goto error;\
+ } while (0)
+
+ ADD_FIELD(f, id);
+
+ switch (f->id) {
+ case LZMA_FILTER_LZMA1:
+ case LZMA_FILTER_LZMA2: {
+ lzma_options_lzma *options = f->options;
+ ADD_FIELD(options, dict_size);
+ ADD_FIELD(options, lc);
+ ADD_FIELD(options, lp);
+ ADD_FIELD(options, pb);
+ ADD_FIELD(options, mode);
+ ADD_FIELD(options, nice_len);
+ ADD_FIELD(options, mf);
+ ADD_FIELD(options, depth);
+ break;
+ }
+ case LZMA_FILTER_DELTA: {
+ lzma_options_delta *options = f->options;
+ ADD_FIELD(options, dist);
+ break;
+ }
+ case LZMA_FILTER_X86:
+ case LZMA_FILTER_POWERPC:
+ case LZMA_FILTER_IA64:
+ case LZMA_FILTER_ARM:
+ case LZMA_FILTER_ARMTHUMB:
+ case LZMA_FILTER_SPARC: {
+ lzma_options_bcj *options = f->options;
+ ADD_FIELD(options, start_offset);
+ break;
+ }
+ default:
+ PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
+ goto error;
+ }
+
+#undef ADD_FIELD
+
+ return spec;
+
+error:
+ Py_DECREF(spec);
+ return NULL;
+}
+
+
 /* LZMACompressor class. */
 
 static PyObject *
@@ -1005,11 +1094,100 @@
 }
 
 
+PyDoc_STRVAR(encode_filter_properties_doc,
+"encode_filter_properties(filter) -> bytes\n"
+"\n"
+"Return a bytes object encoding the options (properties) of the filter\n"
+"specified by *filter* (a dict).\n"
+"\n"
+"The result does not include the filter ID itself, only the options.\n"
+"\n"
+"This function is primarily of interest to users implementing custom\n"
+"file formats.\n");
+
+static PyObject *
+encode_filter_properties(PyObject *self, PyObject *args)
+{
+ PyObject *filterspec;
+ lzma_filter filter;
+ lzma_ret lzret;
+ uint32_t encoded_size;
+ PyObject *result = NULL;
+
+ if (!PyArg_ParseTuple(args, "O:encode_filter_properties", &filterspec))
+ return NULL;
+
+ if (parse_filter_spec(&filter, filterspec) == NULL)
+ return NULL;
+
+ lzret = lzma_properties_size(&encoded_size, &filter);
+ if (catch_lzma_error(lzret))
+ goto error;
+
+ result = PyBytes_FromStringAndSize(NULL, encoded_size);
+ if (result == NULL)
+ goto error;
+
+ lzret = lzma_properties_encode(
+ &filter, (uint8_t *)PyBytes_AS_STRING(result));
+ if (catch_lzma_error(lzret))
+ goto error;
+
+ PyMem_Free(filter.options);
+ return result;
+
+error:
+ Py_XDECREF(result);
+ PyMem_Free(filter.options);
+ return NULL;
+}
+
+
+PyDoc_STRVAR(decode_filter_properties_doc,
+"decode_filter_properties(filter_id, encoded_props) -> dict\n"
+"\n"
+"Return a dict describing a filter with ID *filter_id*, and options\n"
+"(properties) decoded from the bytes object *encoded_props*.\n"
+"\n"
+"This function is primarily of interest to users implementing custom\n"
+"file formats.\n");
+
+static PyObject *
+decode_filter_properties(PyObject *self, PyObject *args)
+{
+ Py_buffer encoded_props;
+ lzma_filter filter;
+ lzma_ret lzret;
+ PyObject *result = NULL;
+
+ if (!PyArg_ParseTuple(args, "O&y*:decode_filter_properties",
+ lzma_vli_converter, &filter.id, &encoded_props))
+ return NULL;
+
+ lzret = lzma_properties_decode(
+ &filter, NULL, encoded_props.buf, encoded_props.len);
+ PyBuffer_Release(&encoded_props);
+ if (catch_lzma_error(lzret))
+ return NULL;
+
+ result = build_filter_spec(&filter);
+
+ /* We use vanilla free() here instead of PyMem_Free() - filter.options was
+ allocated by lzma_properties_decode() using the default allocator. */
+ free(filter.options);
+ return result;
+}
+
+
 /* Module initialization. */
 
 static PyMethodDef module_methods[] = {
 {"check_is_supported", (PyCFunction)check_is_supported,
 METH_VARARGS, check_is_supported_doc},
+ {"encode_filter_properties", (PyCFunction)encode_filter_properties,
+ METH_VARARGS, encode_filter_properties_doc},
+ {"decode_filter_properties", (PyCFunction)decode_filter_properties,
+ METH_VARARGS, decode_filter_properties_doc},
 {NULL}
 };
 
-- 
Repository URL: http://hg.python.org/cpython