Finding empty columns. Is there a faster way?

nn pruebauno at latinmail.com
Thu Apr 21 12:40:36 EDT 2011


time head -1000000 myfile >/dev/null
real 0m4.57s
user 0m3.81s
sys 0m0.74s
time ./repnullsalt.py '|' myfile
0 1 Null columns:
11, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 33, 45, 50, 68
real 1m28.94s
user 1m28.11s
sys 0m0.72s
import sys
def main():
 with open(sys.argv[2],'rb') as inf:
 limit = sys.argv[3] if len(sys.argv)>3 else 1
 dlm = sys.argv[1].encode('latin1')
 nulls = [x==b'' for x in next(inf)[:-1].split(dlm)]
 enum = enumerate
 split = bytes.split
 out = sys.stdout
 prn = print
 for j, r in enum(inf):
 if j%1000000==0:
 prn(j//1000000,end=' ')
 out.flush()
 if j//1000000>=limit:
 break
 for i, cur in enum(split(r[:-1],dlm)):
 nulls[i] |= cur==b''
 print('Null columns:')
 print(', '.join(str(i+1) for i,val in enumerate(nulls) if val))
if not (len(sys.argv)>2):
 sys.exit("Usage: "+sys.argv[0]+
 " <delimiter> <filename> <limit>")
main()


More information about the Python-list mailing list

AltStyle によって変換されたページ (->オリジナル) /