[Python-checkins] bpo-32503: Avoid creating too small frames in pickles. (#5127)

Serhiy Storchaka webhook-mailer at python.org
Sat Jan 20 09:42:47 EST 2018


https://github.com/python/cpython/commit/1211c9a9897a174b7261ca258cabf289815a40d8
commit: 1211c9a9897a174b7261ca258cabf289815a40d8
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018年01月20日T16:42:44+02:00
summary:
bpo-32503: Avoid creating too small frames in pickles. (#5127)
files:
A Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
M Lib/pickle.py
M Lib/test/pickletester.py
M Modules/_pickle.c
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 301e8cf5589..e6d003787ba 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -183,6 +183,7 @@ def __init__(self, value):
 
 class _Framer:
 
+ _FRAME_SIZE_MIN = 4
 _FRAME_SIZE_TARGET = 64 * 1024
 
 def __init__(self, file_write):
@@ -203,11 +204,12 @@ def commit_frame(self, force=False):
 if f.tell() >= self._FRAME_SIZE_TARGET or force:
 data = f.getbuffer()
 write = self.file_write
- # Issue a single call to the write method of the underlying
- # file object for the frame opcode with the size of the
- # frame. The concatenation is expected to be less expensive
- # than issuing an additional call to write.
- write(FRAME + pack("<Q", len(data)))
+ if len(data) >= self._FRAME_SIZE_MIN:
+ # Issue a single call to the write method of the underlying
+ # file object for the frame opcode with the size of the
+ # frame. The concatenation is expected to be less expensive
+ # than issuing an additional call to write.
+ write(FRAME + pack("<Q", len(data)))
 
 # Issue a separate call to write to append the frame
 # contents without concatenation to the above to avoid a
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 062ec5ae72b..b84b87861d0 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2037,6 +2037,7 @@ def test_setitems_on_non_dicts(self):
 
 # Exercise framing (proto >= 4) for significant workloads
 
+ FRAME_SIZE_MIN = 4
 FRAME_SIZE_TARGET = 64 * 1024
 
 def check_frame_opcodes(self, pickled):
@@ -2047,36 +2048,43 @@ def check_frame_opcodes(self, pickled):
 framed by default and are therefore considered a frame by themselves in
 the following consistency check.
 """
- last_arg = last_pos = last_frame_opcode_size = None
- frameless_opcode_sizes = {
- 'BINBYTES': 5,
- 'BINUNICODE': 5,
- 'BINBYTES8': 9,
- 'BINUNICODE8': 9,
- }
+ frame_end = frameless_start = None
+ frameless_opcodes = {'BINBYTES', 'BINUNICODE', 'BINBYTES8', 'BINUNICODE8'}
 for op, arg, pos in pickletools.genops(pickled):
- if op.name in frameless_opcode_sizes:
- if len(arg) > self.FRAME_SIZE_TARGET:
- frame_opcode_size = frameless_opcode_sizes[op.name]
- arg = len(arg)
- else:
- continue
- elif op.name == 'FRAME':
- frame_opcode_size = 9
- else:
- continue
-
- if last_pos is not None:
- # The previous frame's size should be equal to the number
- # of bytes up to the current frame.
- frame_size = pos - last_pos - last_frame_opcode_size
- self.assertEqual(frame_size, last_arg)
- last_arg, last_pos = arg, pos
- last_frame_opcode_size = frame_opcode_size
- # The last frame's size should be equal to the number of bytes up
- # to the pickle's end.
- frame_size = len(pickled) - last_pos - last_frame_opcode_size
- self.assertEqual(frame_size, last_arg)
+ if frame_end is not None:
+ self.assertLessEqual(pos, frame_end)
+ if pos == frame_end:
+ frame_end = None
+
+ if frame_end is not None: # framed
+ self.assertNotEqual(op.name, 'FRAME')
+ if op.name in frameless_opcodes:
+ # Only short bytes and str objects should be written
+ # in a frame
+ self.assertLessEqual(len(arg), self.FRAME_SIZE_TARGET)
+
+ else: # not framed
+ if (op.name == 'FRAME' or
+ (op.name in frameless_opcodes and
+ len(arg) > self.FRAME_SIZE_TARGET)):
+ # Frame or large bytes or str object
+ if frameless_start is not None:
+ # Only short data should be written outside of a frame
+ self.assertLess(pos - frameless_start,
+ self.FRAME_SIZE_MIN)
+ frameless_start = None
+ elif frameless_start is None and op.name != 'PROTO':
+ frameless_start = pos
+
+ if op.name == 'FRAME':
+ self.assertGreaterEqual(arg, self.FRAME_SIZE_MIN)
+ frame_end = pos + 9 + arg
+
+ pos = len(pickled)
+ if frame_end is not None:
+ self.assertEqual(frame_end, pos)
+ elif frameless_start is not None:
+ self.assertLess(pos - frameless_start, self.FRAME_SIZE_MIN)
 
 def test_framing_many_objects(self):
 obj = list(range(10**5))
@@ -2095,7 +2103,8 @@ def test_framing_many_objects(self):
 
 def test_framing_large_objects(self):
 N = 1024 * 1024
- obj = [b'x' * N, b'y' * N, 'z' * N]
+ small_items = [[i] for i in range(10)]
+ obj = [b'x' * N, *small_items, b'y' * N, 'z' * N]
 for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
 for fast in [False, True]:
 with self.subTest(proto=proto, fast=fast):
@@ -2119,12 +2128,9 @@ def test_framing_large_objects(self):
 # Perform full equality check if the lengths match.
 self.assertEqual(obj, unpickled)
 n_frames = count_opcode(pickle.FRAME, pickled)
- if not fast:
- # One frame per memoize for each large object.
- self.assertGreaterEqual(n_frames, len(obj))
- else:
- # One frame at the beginning and one at the end.
- self.assertGreaterEqual(n_frames, 2)
+ # A single frame for small objects between
+ # first two large objects.
+ self.assertEqual(n_frames, 1)
 self.check_frame_opcodes(pickled)
 
 def test_optional_frames(self):
@@ -2152,7 +2158,9 @@ def remove_frames(pickled, keep_frame=None):
 
 frame_size = self.FRAME_SIZE_TARGET
 num_frames = 20
- obj = [bytes([i]) * frame_size for i in range(num_frames)]
+ # Large byte objects (dict values) intermitted with small objects
+ # (dict keys)
+ obj = {i: bytes([i]) * frame_size for i in range(num_frames)}
 
 for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
 pickled = self.dumps(obj, proto)
diff --git a/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst b/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
new file mode 100644
index 00000000000..609c59308e6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-01-07-09-22-26.bpo-32503.ViMxpD.rst
@@ -0,0 +1 @@
+Pickling with protocol 4 no longer creates too small frames.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index f8cbea12e80..f06a87d04f2 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -119,8 +119,8 @@ enum {
 /* Prefetch size when unpickling (disabled on unpeekable streams) */
 PREFETCH = 8192 * 16,
 
+ FRAME_SIZE_MIN = 4,
 FRAME_SIZE_TARGET = 64 * 1024,
-
 FRAME_HEADER_SIZE = 9
 };
 
@@ -949,13 +949,6 @@ _write_size64(char *out, size_t value)
 }
 }
 
-static void
-_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
-{
- qdata[0] = FRAME;
- _write_size64(qdata + 1, frame_len);
-}
-
 static int
 _Pickler_CommitFrame(PicklerObject *self)
 {
@@ -966,7 +959,14 @@ _Pickler_CommitFrame(PicklerObject *self)
 return 0;
 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
- _Pickler_WriteFrameHeader(self, qdata, frame_len);
+ if (frame_len >= FRAME_SIZE_MIN) {
+ qdata[0] = FRAME;
+ _write_size64(qdata + 1, frame_len);
+ }
+ else {
+ memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
+ self->output_len -= FRAME_HEADER_SIZE;
+ }
 self->frame_start = -1;
 return 0;
 }


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /