[Python-checkins] r70587 - in python/branches/py3k/Lib: linecache.py tokenize.py

Tue Mar 24 23:30:15 CET 2009

Author: benjamin.peterson
Date: Tue Mar 24 23:30:15 2009
New Revision: 70587
Log:
reuse tokenize.detect_encoding in linecache instead of a custom solution
patch by Victor Stinner #4016
Modified:
 python/branches/py3k/Lib/linecache.py
 python/branches/py3k/Lib/tokenize.py
Modified: python/branches/py3k/Lib/linecache.py
==============================================================================

--- python/branches/py3k/Lib/linecache.py	(original)
+++ python/branches/py3k/Lib/linecache.py	Tue Mar 24 23:30:15 2009
@@ -7,7 +7,7 @@
 
 import sys
 import os
-import re
+import tokenize
 
 __all__ = ["getline", "clearcache", "checkcache"]
 
@@ -120,27 +120,11 @@
 pass
 else:
 # No luck
-## print '*** Cannot stat', filename, ':', msg
 return []
-## print("Refreshing cache for %s..." % fullname)
- try:
- fp = open(fullname, 'rU')
+ with open(fullname, 'rb') as fp:
+ coding, line = tokenize.detect_encoding(fp.readline)
+ with open(fullname, 'r', encoding=coding) as fp:
 lines = fp.readlines()
- fp.close()
- except Exception as msg:
-## print '*** Cannot open', fullname, ':', msg
- return []
- coding = "utf-8"
- for line in lines[:2]:
- m = re.search(r"coding[:=]\s*([-\w.]+)", line)
- if m:
- coding = m.group(1)
- break
- try:
- lines = [line if isinstance(line, str) else str(line, coding)
- for line in lines]
- except:
- pass # Hope for the best
 size, mtime = stat.st_size, stat.st_mtime
 cache[filename] = size, mtime, lines, fullname
 return lines
Modified: python/branches/py3k/Lib/tokenize.py
==============================================================================
--- python/branches/py3k/Lib/tokenize.py	(original)
+++ python/branches/py3k/Lib/tokenize.py	Tue Mar 24 23:30:15 2009
@@ -27,7 +27,6 @@
 import re, string, sys
 from token import *
 from codecs import lookup, BOM_UTF8
-from itertools import chain, repeat
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
 import token
@@ -327,13 +326,15 @@
 which tells you which encoding was used to decode the bytes stream.
 """
 encoding, consumed = detect_encoding(readline)
- def readline_generator():
+ def readline_generator(consumed):
+ for line in consumed:
+ yield line
 while True:
 try:
 yield readline()
 except StopIteration:
 return
- chained = chain(consumed, readline_generator())
+ chained = readline_generator(consumed)
 return _tokenize(chained.__next__, encoding)