Script to replace contents inside the files

Mon Jul 24 00:57:25 EDT 2017

I have thousands of html files inside a folder. I want to replace the
filename present inside another files. Say for ex:- fileName :-
'abcd1234.html' is found inside another file say file2.html. Then I want to
remove the last 4 digits of the fileName i.e,. 'abcd1234.html' =>
'abcd.htm'.
I have tried a script . But your suggestions upon the script are welcomed.
Regards,
Kunal
-------------- next part --------------
import os
import re
def script_to_create_folder():
 path_list = []
 filename_list = []
 path = r'D:\macrocodesrequired\Testing_Script\Real_testing_\New folder\brpt'
 #path = r'H:\Script_Work\New_folder\Actual_testing\brpt'
 for (root, dirs, name) in os.walk(path):
 for nm in name:
 if (
 'About' in root or 'Community' in root or 'support' in root \
 or 'home' in root or 'Products' in root or 'service' in root \
 or 'solutions' in root or 'training' in root \
 or 'wheretobuy' in root
 ):
 pass
 if (
 'default' in nm or 'index' in nm or 'category' in nm \
 or 'Category' in nm or 'Default' in nm or 'Index' in nm \
 or 'home' in nm or 'support' in nm
 ):
 pass
 else:
 filename_list.append(nm)
 path_list.append(os.path.join(root, nm))
 # print(path_list)
 # print(filename_list)
 for path in path_list:
 for names in filename_list:
 find_filename_inside_files(names, path)
def find_filename_inside_files(file_name, dir_path):
 pattern_list = ['\d+$', '\d+\w$', '\d+-\d$', '\w\d+$', '\d\w\d\w', '\w\d+$', '\w\d\w\d']
 data = []
 replace_str = ''
 read_cnt = 0
 digits_to_replace = 0
 with open(dir_path, 'r', encoding='utf-8') as file_handle:
 data = file_handle.read()
 #print(data)
 if file_name in data:
 #print(file_name)
 for search_pattern in pattern_list:
 read_cnt = 0
 if '-' in file_name:
 #print("===>",search_pattern)
 if re.search(search_pattern, file_name.split('.')[0]):
 digits_to_replace = filename_with_hypen(file_name, search_pattern)
 read_cnt = 1
 position = file_handle.tell()
 replace_str = replace_oldstring_newstring(
 data,
 file_name,
 digits_to_replace
 )
 # file_handle.seek(0, 0)
 # file_handle.write(replace_str)
 elif re.search(search_pattern, file_name.split('.')[0]):
 digits_to_replace = filename_without_hypen(file_name, search_pattern)
 read_cnt = 1
 replace_str = replace_oldstring_newstring(data, file_name, digits_to_replace)
 if read_cnt == 1:
 #print("write to")
 print(file_name)
 print(dir_path)
 with open(dir_path, 'w', encoding='utf-8') as file_out:
 file_out.write(replace_str)
 exit()
def filename_without_hypen(file_name, pattern):
 #print(file_name)
 value = re.search(pattern, file_name.split('.')[0])
 if bool(value):
 last_digits = value.group()
 if len(last_digits) > 2:
 return -(len(last_digits))
 elif len(last_digits) > 0 and len(last_digits) <= 3:
 return -(len(last_digits))
def filename_with_hypen(file_name, pattern):
 value = re.search(pattern, file_name.split('.')[0])
 if bool(value):
 last_digits = value.group()
 if '-2' in last_digits or '-3' in last_digits:
 return -(len(last_digits))
 else:
 return -(len(last_digits))
def replace_oldstring_newstring(data, filename, last_digits_to_replace):
 print("in replace")
 ind = data.index(filename)
 temp_str = data[ind:(ind + len(filename))]
 replace_str = data.replace(temp_str.split('.')[0][last_digits_to_replace:], '')
 replace_str = replace_str.replace(".html", ".htm")
 return replace_str
def main():
 script_to_create_folder()
if __name__ == '__main__':
 main()