Skip to main content
Code Review

Return to Answer

replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/
Source Link

By combining that with proposals by @ferada @ferada, you can end up with:

By combining that with proposals by @ferada, you can end up with:

By combining that with proposals by @ferada, you can end up with:

Added `titles` as a parameter to `create_title` since I moved this function out of `split_asc_file`
Source Link
import os
import time
def create_title(message_string, titles):
 req_num = int(message_string[0:8])
 obj_num = int(message_string[8:16])
 if not req_num and not obj_num:
 return
 at = "AT{}_{}".format(req_num, obj_num)
 title_prefix = titles[at]
 return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles): 
 if not os.path.isdir(target_dir):
 os.makedirs(target_dir)
 os.chdir(target_dir)
 with open(target_file) as log:
 print 'Opening', target_file
 
 # Bootstrap
 for message in log:
 data = message.split()
 if len(data) == 14 and data[2] == "0":
 break
 while True:
 # Using message rather than reusing data here; see next comment
 data = message.split()[6:]
 title = create_title(''.join(data), titles)
 if title is None:
 break
 with open(title, 'w') as section:
 print 'Created', title
 for message in log:
 # Knowing the input format, you should be able to extract
 # the same information than the next two ifs by analyzing
 # message rather than splitting it, as ferada suggested
 data = message.split()
 if len(data) == 14:
 if data[2] == "0":
 break
 section.write(message)
if __name__ == '__main__':
 start_time = time.time()
 split_asc_file(..,..,..) #Whatever
 print "Splitting completed in {} seconds".format(time.time() - start_time)
import os
import time
def create_title(message_string):
 req_num = int(message_string[0:8])
 obj_num = int(message_string[8:16])
 if not req_num and not obj_num:
 return
 at = "AT{}_{}".format(req_num, obj_num)
 title_prefix = titles[at]
 return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles): 
 if not os.path.isdir(target_dir):
 os.makedirs(target_dir)
 os.chdir(target_dir)
 with open(target_file) as log:
 print 'Opening', target_file
 
 # Bootstrap
 for message in log:
 data = message.split()
 if len(data) == 14 and data[2] == "0":
 break
 while True:
 # Using message rather than reusing data here; see next comment
 data = message.split()[6:]
 title = create_title(''.join(data))
 if title is None:
 break
 with open(title, 'w') as section:
 print 'Created', title
 for message in log:
 # Knowing the input format, you should be able to extract
 # the same information than the next two ifs by analyzing
 # message rather than splitting it, as ferada suggested
 data = message.split()
 if len(data) == 14:
 if data[2] == "0":
 break
 section.write(message)
if __name__ == '__main__':
 start_time = time.time()
 split_asc_file(..,..,..) #Whatever
 print "Splitting completed in {} seconds".format(time.time() - start_time)
import os
import time
def create_title(message_string, titles):
 req_num = int(message_string[0:8])
 obj_num = int(message_string[8:16])
 if not req_num and not obj_num:
 return
 at = "AT{}_{}".format(req_num, obj_num)
 title_prefix = titles[at]
 return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles): 
 if not os.path.isdir(target_dir):
 os.makedirs(target_dir)
 os.chdir(target_dir)
 with open(target_file) as log:
 print 'Opening', target_file
 
 # Bootstrap
 for message in log:
 data = message.split()
 if len(data) == 14 and data[2] == "0":
 break
 while True:
 # Using message rather than reusing data here; see next comment
 data = message.split()[6:]
 title = create_title(''.join(data), titles)
 if title is None:
 break
 with open(title, 'w') as section:
 print 'Created', title
 for message in log:
 # Knowing the input format, you should be able to extract
 # the same information than the next two ifs by analyzing
 # message rather than splitting it, as ferada suggested
 data = message.split()
 if len(data) == 14:
 if data[2] == "0":
 break
 section.write(message)
if __name__ == '__main__':
 start_time = time.time()
 split_asc_file(..,..,..) #Whatever
 print "Splitting completed in {} seconds".format(time.time() - start_time)
Source Link

What I get from your code is that, you skip messages until the first dummy one which indicate the first section and then you have the following cycle:

  • Extract title information out of the dummy message;
  • Open a file to extract out messages of this section into it;
  • Write relevant messages until the next dummy one.

Reorganizing your code to follow this layout more closely can lead you to remove you if section tests which are executed at each line and may be slowing thing a bit.

You can also remove your if title since create_title will never return anything other than a string of more than 5 characters. But I guess that it was used before to check for the end of the tests and I’ll reuse that.

By combining that with proposals by @ferada, you can end up with:

import os
import time
def create_title(message_string):
 req_num = int(message_string[0:8])
 obj_num = int(message_string[8:16])
 if not req_num and not obj_num:
 return
 at = "AT{}_{}".format(req_num, obj_num)
 title_prefix = titles[at]
 return "{}_{}.asc".format(title_prefix, at)
def split_asc_file(target_file, target_dir, titles): 
 if not os.path.isdir(target_dir):
 os.makedirs(target_dir)
 os.chdir(target_dir)
 with open(target_file) as log:
 print 'Opening', target_file
 
 # Bootstrap
 for message in log:
 data = message.split()
 if len(data) == 14 and data[2] == "0":
 break
 while True:
 # Using message rather than reusing data here; see next comment
 data = message.split()[6:]
 title = create_title(''.join(data))
 if title is None:
 break
 with open(title, 'w') as section:
 print 'Created', title
 for message in log:
 # Knowing the input format, you should be able to extract
 # the same information than the next two ifs by analyzing
 # message rather than splitting it, as ferada suggested
 data = message.split()
 if len(data) == 14:
 if data[2] == "0":
 break
 section.write(message)
if __name__ == '__main__':
 start_time = time.time()
 split_asc_file(..,..,..) #Whatever
 print "Splitting completed in {} seconds".format(time.time() - start_time)

The workflow I proposed let you also open the section file using a with statement which is prefered in python. I also changed mkdir in makedirs, just in case.

lang-py

AltStyle によって変換されたページ (->オリジナル) /