File: timeIO.py

File: timeIO.py
⇨ Raw text view and save
⇨ Page script and template
"""
test various read and write file I/O modes for speed
in the version of Python that is running this script;
runs most common and valid read/write coding patterns;
tests ascii and binary, but not wide-char unicode files;
printed results can be parsed later for comparisons
"""
######################################################################
# generic timer
######################################################################
import time
def timeOnce(func, *args):
 start = time.clock()
 func(*args) # ignore any return value
 return time.clock() - start
def timerAvg(func, *args):
 warmcache = timeOnce(func, *args)
 reps = 3
 runavg = 0 # take average of 3 runs
 for i in range(reps):
 runavg += timeOnce(func, *args)
 return runavg / reps # CHANGED: take low = "best"
# The following may be a bit better, but isn't directly comparable
def timerBest(func, *args):
 warmcache = timeOnce(func, *args) # make sure disk caches active
 reps = 3
 runs = [] # take min of N runs
 for i in range(reps):
 runs.append(timeOnce(func, *args))
 return min(runs)
timer = timerBest # CHANGED
######################################################################
# file read tests
######################################################################
#=====================================================================
# all of the following are probably valid use cases for 2.6 and 3.0,
# though lines/text and blocks/binary combos seem more typical in 3.0
# (programs will pick str xor bytes for text or binary data in 3.0); 
# truly binary files can only be read in binary mode in 3.0, because 
# they cannot be decoded into characters in text mode, and it makes 
# no sense to read truly binary files by lines: they have no lineends;
# the allAtOnce modes may fail for pathologically large files;
#
# 3.0 has str + bytes; 2.6 has just str, plus binary files
# open mode default = 'r' = 'rt' in 3.0, and 'r' in 2.6
# (both mean text mode input when the mode argument is omitted)
#=====================================================================
blocksize = 1024 * 32
def read_byLines_textMode(filename): 
 for line in open(filename): # 2.6 text mode returns str, does not decode (use codecs.open)
 pass # 3.0 text mode returns str, after decoding content
def read_byLines_binaryMode(filename): # less common in 3.0?
 for line in open(filename, 'rb'): # 2.6 binary mode returns str, does not decode
 pass # 3.0 binary mode returns bytes, does not decode
def read_byBlocks_textMode(filename, size=blocksize):
 f = open(filename)
 while True: # less common in 3.0?
 block = f.read(size)
 if not block: break
def read_byBlocks_binaryMode(filename, size=blocksize):
 f = open(filename, 'rb')
 while True:
 block = f.read(size)
 if not block: break
def read_allAtOnce_textMode(filename): # not for very large files
 text = open(filename).read() 
def read_allAtOnce_binaryMode(filename): # not for very large files
 text = open(filename, 'rb').read()
######################################################################
# file write tests
######################################################################
#=====================================================================
# all the following work, but tests "write_byLines_binaryMode" and
# "write_byBlocks_textMode" are probably invalid use cases for 3.0,
# where programs are more likely to pick str xor bytes for text
# or binary data, and not convert to str or bytes just to write in 
# text or binary mode; portability issues: 3.0's encoding arg required 
# by 3.0's bytes() converter is not allowed in 2.6's bytes(), and 
# 2.6's str.decode() creates a unicode object which adds some cost;
#
# hoist set-up ops out to avoid charging to test funcs
# 'xx' / b'xx' are str / bytes in 3.0, both are str in 2.6
# 'xx' == b'xx' and bytes(x) == str(X) in 2.6
# 2.6: str is a seq of bytes, unicode a distinct type
# 3.0: str is seq of Unicode chars, bytes is seq of ints
#=====================================================================
oneMeg = 1024 * 1024
halfMeg = oneMeg // 2 # use truncating division in both 2.6 and 3.0
repsList = list(range(halfMeg)) # force list in both 2.6 and 3.0
aLine = '*' * 49 + '\n' # 25M in file ((50+\r?) * ((1024 * 1024) / 2))
aBlock = b'1\x0234\x05' * 10 # 25M in file ((5 * 10) * (1M / 2))
aFileStr = aLine * halfMeg # 25M characters
aFileBin = aBlock * halfMeg # 25M bytes
print ('\nOutput data sizes: %s %s %s %s %s' % 
 (len(repsList), len(aLine), len(aBlock), len(aFileStr), len(aFileBin)) )
def write_byLines_textMode(filename): # writing by blocks in text mode is similar
 file = open(filename, 'w') # 3.0 text mode takes str, encodes content, xlates newlines 
 for i in repsList: # 2.6 text mode takes str, xlates newlines
 file.write(aLine) # 3.0 text mode takes open() flag to control lineends
 file.close() 
def write_byLines_binaryMode(filename): # less common in 3.0?
 file = open(filename, 'wb') # 3.0 binary mode takes bytes, does not decode or xlate
 for i in repsList: # 2.6 binary mode takes str, does not xlate newlines
 file.write(aLine.encode()) # encode() makes bytes in 3.0, same str in 2.6
 file.close() 
def write_byBlocks_textMode(filename): # less common in 3.0?
 file = open(filename, 'w') # decode() makes str in 3.0, unicode in 2.6 
 for i in repsList: 
 file.write(aBlock.decode()) 
 file.close()
def write_byBlocks_binaryMode(filename): # writing by lines in binary mode is similar
 file = open(filename, 'wb') 
 for i in repsList: 
 file.write(aBlock) 
 file.close() 
 
 
def write_allAtOnce_textMode(filename): # not for very large files 
 open(filename, 'w').write(aFileStr)
def write_allAtOnce_binaryMode(filename): # not for very large files
 open(filename, 'wb').write(aFileBin) 
######################################################################
# run, collect test data for Python running me
######################################################################
def timePython():
 import sys, os
 outputfile = 'timeIO.out' # hard-code: I create this
 textfile, binaryfile = sys.argv[1:3] # input files vary, command line 
 tests = {textfile: (read_byLines_textMode, 
 read_byLines_binaryMode, # less common in 3.0?
 read_byBlocks_textMode, # less common in 3.0?
 read_byBlocks_binaryMode,
 read_allAtOnce_textMode, # not for very large files
 read_allAtOnce_binaryMode), # not for very large files
 binaryfile: (read_byBlocks_binaryMode, # other read modes not valid,
 read_allAtOnce_binaryMode), # for truly binary data files
 outputfile: (write_byLines_textMode,
 write_byLines_binaryMode, # less common in 3.0?
 write_byBlocks_textMode, # less common in 3.0?
 write_byBlocks_binaryMode,
 write_allAtOnce_textMode, # not for very large files
 write_allAtOnce_binaryMode) # not for very large files
 }
 for filename in (textfile, binaryfile, outputfile):
 filesize = os.path.getsize(filename) if os.path.exists(filename) else '0' # CHANGED
 version = sys.version.split()[0]
 print('\n[Python {0}: {1}, {2} bytes]'.format(version, filename, filesize))
 for func in tests[filename]:
 try:
 testtime = timer(func, filename)
 except:
 print('%-26s => %s, %s' % (func.__name__, '*fail*', sys.exc_info()[0]))
 else:
 # int/int=float+remainder in 3.0, but not 2.6
 filemegs = float(filesize) / oneMeg
 testid = '%-26s (%s=%.2fM)' % (func.__name__, filename, filemegs)
 print('%-46s => %f' % (testid, testtime))
if __name__ == '__main__': 
 timePython() # the version running me