[Python-checkins] cpython (merge 3.4 -> default): Issue #22982: Improve BOM handling when seeking to multiple positions of a

antoine.pitrou python-checkins at python.org
Mon Apr 13 20:04:51 CEST 2015


https://hg.python.org/cpython/rev/3583e5191b96
changeset: 95591:3583e5191b96
parent: 95589:18429ff02fa3
parent: 95590:946740824eaf
user: Antoine Pitrou <solipsis at pitrou.net>
date: Mon Apr 13 20:02:33 2015 +0200
summary:
 Issue #22982: Improve BOM handling when seeking to multiple positions of a writable text file.
files:
 Lib/_pyio.py | 26 +++++++++++++++-----------
 Lib/test/test_io.py | 13 +++++++++++++
 Misc/NEWS | 3 +++
 Modules/_io/textio.c | 25 +++++++++++++++++++++----
 4 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/Lib/_pyio.py b/Lib/_pyio.py
--- a/Lib/_pyio.py
+++ b/Lib/_pyio.py
@@ -2275,6 +2275,19 @@
 return buffer
 
 def seek(self, cookie, whence=0):
+ def _reset_encoder(position):
+ """Reset the encoder (merely useful for proper BOM handling)"""
+ try:
+ encoder = self._encoder or self._get_encoder()
+ except LookupError:
+ # Sometimes the encoder doesn't exist
+ pass
+ else:
+ if position != 0:
+ encoder.setstate(0)
+ else:
+ encoder.reset()
+
 if self.closed:
 raise ValueError("tell on closed file")
 if not self._seekable:
@@ -2295,6 +2308,7 @@
 self._snapshot = None
 if self._decoder:
 self._decoder.reset()
+ _reset_encoder(position)
 return position
 if whence != 0:
 raise ValueError("unsupported whence (%r)" % (whence,))
@@ -2332,17 +2346,7 @@
 raise OSError("can't restore logical file position")
 self._decoded_chars_used = chars_to_skip
 
- # Finally, reset the encoder (merely useful for proper BOM handling)
- try:
- encoder = self._encoder or self._get_encoder()
- except LookupError:
- # Sometimes the encoder doesn't exist
- pass
- else:
- if cookie != 0:
- encoder.setstate(0)
- else:
- encoder.reset()
+ _reset_encoder(cookie)
 return cookie
 
 def read(self, size=None):
diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2730,6 +2730,19 @@
 with self.open(filename, 'rb') as f:
 self.assertEqual(f.read(), 'bbbzzz'.encode(charset))
 
+ def test_seek_append_bom(self):
+ # Same test, but first seek to the start and then to the end
+ filename = support.TESTFN
+ for charset in ('utf-8-sig', 'utf-16', 'utf-32'):
+ with self.open(filename, 'w', encoding=charset) as f:
+ f.write('aaa')
+ with self.open(filename, 'a', encoding=charset) as f:
+ f.seek(0)
+ f.seek(0, self.SEEK_END)
+ f.write('xxx')
+ with self.open(filename, 'rb') as f:
+ self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
+
 def test_errors_property(self):
 with self.open(support.TESTFN, "w") as f:
 self.assertEqual(f.errors, "strict")
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -24,6 +24,9 @@
 Library
 -------
 
+- Issue #22982: Improve BOM handling when seeking to multiple positions of
+ a writable text file.
+
 - Issue #23464: Removed deprecated asyncio JoinableQueue.
 
 - Issue #23529: Limit the size of decompressed data when reading from
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -2048,11 +2048,10 @@
 }
 
 static int
-_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
+_textiowrapper_encoder_reset(textio *self, int start_of_stream)
 {
 PyObject *res;
- /* Same as _textiowrapper_decoder_setstate() above. */
- if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
+ if (start_of_stream) {
 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
 self->encoding_start_of_stream = 1;
 }
@@ -2067,6 +2066,14 @@
 return 0;
 }
 
+static int
+_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
+{
+ /* Same as _textiowrapper_decoder_setstate() above. */
+ return _textiowrapper_encoder_reset(
+ self, cookie->start_pos == 0 && cookie->dec_flags == 0);
+}
+
 static PyObject *
 textiowrapper_seek(textio *self, PyObject *args)
 {
@@ -2134,7 +2141,17 @@
 }
 
 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
- Py_XDECREF(cookieObj);
+ Py_CLEAR(cookieObj);
+ if (res == NULL)
+ goto fail;
+ if (self->encoder) {
+ /* If seek() == 0, we are at the start of stream, otherwise not */
+ cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
+ if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
+ Py_DECREF(res);
+ goto fail;
+ }
+ }
 return res;
 }
 else if (whence != 0) {
-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list

AltStyle によって変換されたページ (->オリジナル) /