[Python-checkins] cpython (merge 3.3 -> 3.3): Merge heads.

alexandre.vassalotti python-checkins at python.org
Sun Apr 14 12:45:25 CEST 2013


http://hg.python.org/cpython/rev/fdb8e5028c0d
changeset: 83361:fdb8e5028c0d
branch: 3.3
parent: 83357:ca5fc67e0ad1
parent: 83360:4ced30417300
user: Alexandre Vassalotti <alexandre at peadrop.com>
date: Sun Apr 14 03:31:40 2013 -0700
summary:
 Merge heads.
files:
 Lib/pickletools.py | 127 +++++++++++++++++++++++++++-----
 Misc/NEWS | 3 +
 2 files changed, 109 insertions(+), 21 deletions(-)
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -13,6 +13,7 @@
 import codecs
 import pickle
 import re
+import sys
 
 __all__ = ['dis', 'genops', 'optimize']
 
@@ -165,8 +166,9 @@
 
 # Represents the number of bytes consumed by a two-argument opcode where
 # the first argument gives the number of bytes in the second argument.
-TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
-TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
 
 class ArgumentDescriptor(object):
 __slots__ = (
@@ -194,7 +196,8 @@
 assert isinstance(n, int) and (n >= 0 or
 n in (UP_TO_NEWLINE,
 TAKEN_FROM_ARGUMENT1,
- TAKEN_FROM_ARGUMENT4))
+ TAKEN_FROM_ARGUMENT4,
+ TAKEN_FROM_ARGUMENT4U))
 self.n = n
 
 self.reader = reader
@@ -265,6 +268,27 @@
 doc="Four-byte signed integer, little-endian, 2's complement.")
 
 
+def read_uint4(f):
+ r"""
+ >>> import io
+ >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
+ 255
+ >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
+ True
+ """
+
+ data = f.read(4)
+ if len(data) == 4:
+ return _unpack("<I", data)[0]
+ raise ValueError("not enough data in stream to read uint4")
+
+uint4 = ArgumentDescriptor(
+ name='uint4',
+ n=4,
+ reader=read_uint4,
+ doc="Four-byte unsigned integer, little-endian.")
+
+
 def read_stringnl(f, decode=True, stripquotes=True):
 r"""
 >>> import io
@@ -421,6 +445,67 @@
 """)
 
 
+def read_bytes1(f):
+ r"""
+ >>> import io
+ >>> read_bytes1(io.BytesIO(b"\x00"))
+ b''
+ >>> read_bytes1(io.BytesIO(b"\x03abcdef"))
+ b'abc'
+ """
+
+ n = read_uint1(f)
+ assert n >= 0
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
+ (n, len(data)))
+
+bytes1 = ArgumentDescriptor(
+ name="bytes1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_bytes1,
+ doc="""A counted bytes string.
+
+ The first argument is a 1-byte unsigned int giving the number
+ of bytes, and the second argument is that many bytes.
+ """)
+
+
+def read_bytes4(f):
+ r"""
+ >>> import io
+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
+ b''
+ >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
+ b'abc'
+ >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
+ """
+
+ n = read_uint4(f)
+ if n > sys.maxsize:
+ raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
+ (n, len(data)))
+
+bytes4 = ArgumentDescriptor(
+ name="bytes4",
+ n=TAKEN_FROM_ARGUMENT4U,
+ reader=read_bytes4,
+ doc="""A counted bytes string.
+
+ The first argument is a 4-byte little-endian unsigned int giving
+ the number of bytes, and the second argument is that many bytes.
+ """)
+
+
 def read_unicodestringnl(f):
 r"""
 >>> import io
@@ -464,9 +549,9 @@
 ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
 """
 
- n = read_int4(f)
- if n < 0:
- raise ValueError("unicodestring4 byte count < 0: %d" % n)
+ n = read_uint4(f)
+ if n > sys.maxsize:
+ raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
 data = f.read(n)
 if len(data) == n:
 return str(data, 'utf-8', 'surrogatepass')
@@ -475,7 +560,7 @@
 
 unicodestring4 = ArgumentDescriptor(
 name="unicodestring4",
- n=TAKEN_FROM_ARGUMENT4,
+ n=TAKEN_FROM_ARGUMENT4U,
 reader=read_unicodestring4,
 doc="""A counted Unicode string.
 
@@ -872,7 +957,7 @@
 assert isinstance(x, StackObject)
 self.stack_after = stack_after
 
- assert isinstance(proto, int) and 0 <= proto <= 3
+ assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
 self.proto = proto
 
 assert isinstance(doc, str)
@@ -1038,28 +1123,28 @@
 
 I(name='BINBYTES',
 code='B',
- arg=string4,
+ arg=bytes4,
 stack_before=[],
 stack_after=[pybytes],
 proto=3,
 doc="""Push a Python bytes object.
 
- There are two arguments: the first is a 4-byte little-endian signed int
- giving the number of bytes in the string, and the second is that many
- bytes, which are taken literally as the bytes content.
+ There are two arguments: the first is a 4-byte little-endian unsigned int
+ giving the number of bytes, and the second is that many bytes, which are
+ taken literally as the bytes content.
 """),
 
 I(name='SHORT_BINBYTES',
 code='C',
- arg=string1,
+ arg=bytes1,
 stack_before=[],
 stack_after=[pybytes],
 proto=3,
- doc="""Push a Python string object.
+ doc="""Push a Python bytes object.
 
 There are two arguments: the first is a 1-byte unsigned int giving
- the number of bytes in the string, and the second is that many bytes,
- which are taken literally as the string content.
+ the number of bytes, and the second is that many bytes, which are taken
+ literally as the string content.
 """),
 
 # Ways to spell None.
@@ -1118,7 +1203,7 @@
 proto=1,
 doc="""Push a Python Unicode string object.
 
- There are two arguments: the first is a 4-byte little-endian signed int
+ There are two arguments: the first is a 4-byte little-endian unsigned int
 giving the number of bytes in the string. The second is that many
 bytes, and is the UTF-8 encoding of the Unicode string.
 """),
@@ -1422,13 +1507,13 @@
 
 I(name='LONG_BINGET',
 code='j',
- arg=int4,
+ arg=uint4,
 stack_before=[],
 stack_after=[anyobject],
 proto=1,
 doc="""Read an object from the memo and push it on the stack.
 
- The index of the memo object to push is given by the 4-byte signed
+ The index of the memo object to push is given by the 4-byte unsigned
 little-endian integer following.
 """),
 
@@ -1459,14 +1544,14 @@
 
 I(name='LONG_BINPUT',
 code='r',
- arg=int4,
+ arg=uint4,
 stack_before=[],
 stack_after=[],
 proto=1,
 doc="""Store the stack top into the memo. The stack is not popped.
 
 The index of the memo location to write into is given by the 4-byte
- signed little-endian integer following.
+ unsigned little-endian integer following.
 """),
 
 # Access the extension registry (predefined objects). Akin to the GET
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -58,6 +58,9 @@
 - Issue #17526: fix an IndexError raised while passing code without filename to
 inspect.findsource(). Initial patch by Tyler Doyle.
 
+- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
+ integers where appropriate. Initial patch by Serhiy Storchaka.
+
 IDLE
 ----
 
-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /