- 29.4k
- 3
- 49
- 98
import os
import time
def create_title(message_string, titles):
req_num = int(message_string[0:8])
obj_num = int(message_string[8:16])
if not req_num and not obj_num:
return
at = "AT{}_{}".format(req_num, obj_num)
title_prefix = titles[at]
return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles):
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
os.chdir(target_dir)
with open(target_file) as log:
print 'Opening', target_file
# Bootstrap
for message in log:
data = message.split()
if len(data) == 14 and data[2] == "0":
break
while True:
# Using message rather than reusing data here; see next comment
data = message.split()[6:]
title = create_title(''.join(data), titles)
if title is None:
break
with open(title, 'w') as section:
print 'Created', title
for message in log:
# Knowing the input format, you should be able to extract
# the same information than the next two ifs by analyzing
# message rather than splitting it, as ferada suggested
data = message.split()
if len(data) == 14:
if data[2] == "0":
break
section.write(message)
if __name__ == '__main__':
start_time = time.time()
split_asc_file(..,..,..) #Whatever
print "Splitting completed in {} seconds".format(time.time() - start_time)
import os
import time
def create_title(message_string):
req_num = int(message_string[0:8])
obj_num = int(message_string[8:16])
if not req_num and not obj_num:
return
at = "AT{}_{}".format(req_num, obj_num)
title_prefix = titles[at]
return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles):
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
os.chdir(target_dir)
with open(target_file) as log:
print 'Opening', target_file
# Bootstrap
for message in log:
data = message.split()
if len(data) == 14 and data[2] == "0":
break
while True:
# Using message rather than reusing data here; see next comment
data = message.split()[6:]
title = create_title(''.join(data))
if title is None:
break
with open(title, 'w') as section:
print 'Created', title
for message in log:
# Knowing the input format, you should be able to extract
# the same information than the next two ifs by analyzing
# message rather than splitting it, as ferada suggested
data = message.split()
if len(data) == 14:
if data[2] == "0":
break
section.write(message)
if __name__ == '__main__':
start_time = time.time()
split_asc_file(..,..,..) #Whatever
print "Splitting completed in {} seconds".format(time.time() - start_time)
import os
import time
def create_title(message_string, titles):
req_num = int(message_string[0:8])
obj_num = int(message_string[8:16])
if not req_num and not obj_num:
return
at = "AT{}_{}".format(req_num, obj_num)
title_prefix = titles[at]
return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles):
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
os.chdir(target_dir)
with open(target_file) as log:
print 'Opening', target_file
# Bootstrap
for message in log:
data = message.split()
if len(data) == 14 and data[2] == "0":
break
while True:
# Using message rather than reusing data here; see next comment
data = message.split()[6:]
title = create_title(''.join(data), titles)
if title is None:
break
with open(title, 'w') as section:
print 'Created', title
for message in log:
# Knowing the input format, you should be able to extract
# the same information than the next two ifs by analyzing
# message rather than splitting it, as ferada suggested
data = message.split()
if len(data) == 14:
if data[2] == "0":
break
section.write(message)
if __name__ == '__main__':
start_time = time.time()
split_asc_file(..,..,..) #Whatever
print "Splitting completed in {} seconds".format(time.time() - start_time)
What I get from your code is that, you skip messages until the first dummy one which indicate the first section and then you have the following cycle:
- Extract title information out of the dummy message;
- Open a file to extract out messages of this section into it;
- Write relevant messages until the next dummy one.
Reorganizing your code to follow this layout more closely can lead you to remove you if section
tests which are executed at each line and may be slowing thing a bit.
You can also remove your if title
since create_title
will never return anything other than a string of more than 5 characters. But I guess that it was used before to check for the end of the tests and I’ll reuse that.
By combining that with proposals by @ferada, you can end up with:
import os
import time
def create_title(message_string):
req_num = int(message_string[0:8])
obj_num = int(message_string[8:16])
if not req_num and not obj_num:
return
at = "AT{}_{}".format(req_num, obj_num)
title_prefix = titles[at]
return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles):
if not os.path.isdir(target_dir):
os.makedirs(target_dir)
os.chdir(target_dir)
with open(target_file) as log:
print 'Opening', target_file
# Bootstrap
for message in log:
data = message.split()
if len(data) == 14 and data[2] == "0":
break
while True:
# Using message rather than reusing data here; see next comment
data = message.split()[6:]
title = create_title(''.join(data))
if title is None:
break
with open(title, 'w') as section:
print 'Created', title
for message in log:
# Knowing the input format, you should be able to extract
# the same information than the next two ifs by analyzing
# message rather than splitting it, as ferada suggested
data = message.split()
if len(data) == 14:
if data[2] == "0":
break
section.write(message)
if __name__ == '__main__':
start_time = time.time()
split_asc_file(..,..,..) #Whatever
print "Splitting completed in {} seconds".format(time.time() - start_time)
The workflow I proposed let you also open the section
file using a with
statement which is prefered in python. I also changed mkdir
in makedirs
, just in case.