[Python-checkins] python/dist/src/Lib codecs.py,1.48,1.49

Sun Sep 18 10:34:43 CEST 2005

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18277/Lib
Modified Files:
	codecs.py 
Log Message:
Patch #1268314: Cache lines in StreamReader.readlines for performance.
Will backport to Python 2.4.
Index: codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/codecs.py,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -d -r1.48 -r1.49

--- codecs.py	1 Sep 2005 11:56:53 -0000	1.48
+++ codecs.py	18 Sep 2005 08:34:39 -0000	1.49
@@ -232,6 +232,7 @@
 # For str->str decoding this will stay a str
 # For str->unicode decoding the first read will promote it to unicode
 self.charbuffer = ""
+ self.linebuffer = None
 
 def decode(self, input, errors='strict'):
 raise NotImplementedError
@@ -264,6 +265,11 @@
 optional encoding endings or state markers are available
 on the stream, these should be read too.
 """
+ # If we have lines cached, first merge them back into characters
+ if self.linebuffer:
+ self.charbuffer = "".join(self.linebuffer)
+ self.linebuffer = None
+ 
 # read until we get the required number of characters (if available)
 while True:
 # can the request can be satisfied from the character buffer?
@@ -316,6 +322,20 @@
 read() method.
 
 """
+ # If we have lines cached from an earlier read, return
+ # them unconditionally
+ if self.linebuffer:
+ line = self.linebuffer[0]
+ del self.linebuffer[0]
+ if len(self.linebuffer) == 1:
+ # revert to charbuffer mode; we might need more data
+ # next time
+ self.charbuffer = self.linebuffer[0]
+ self.linebuffer = None
+ if not keepends:
+ line = line.splitlines(False)[0]
+ return line
+ 
 readsize = size or 72
 line = ""
 # If size is given, we call read() only once
@@ -331,6 +351,22 @@
 line += data
 lines = line.splitlines(True)
 if lines:
+ if len(lines) > 1:
+ # More than one line result; the first line is a full line
+ # to return
+ line = lines[0]
+ del lines[0]
+ if len(lines) > 1:
+ # cache the remaining lines
+ lines[-1] += self.charbuffer
+ self.linebuffer = lines
+ self.charbuffer = None
+ else:
+ # only one remaining line, put it back into charbuffer
+ self.charbuffer = lines[0] + self.charbuffer
+ if not keepends:
+ line = line.splitlines(False)[0]
+ break
 line0withend = lines[0]
 line0withoutend = lines[0].splitlines(False)[0]
 if line0withend != line0withoutend: # We really have a line end
@@ -376,6 +412,7 @@
 """
 self.bytebuffer = ""
 self.charbuffer = u""
+ self.linebuffer = None
 
 def seek(self, offset, whence=0):
 """ Set the input stream's current position.