Organize files based on inputted keyword

Question 1

My code basically organizes the files in a given directory based on the keyword inputted by the user. For example, if there is a directory with three files: sample 1.txt, sample 2.txt and random.txt and if the user inputted sample as the keyword, then the program will make a folder named sample and move all files containing sample into that folder. The function works, but I feel like it could be divided into other functions or can be made shorter:

def specific_name_category(keyword, folder_to_track, verbose):
 start_time = time.monotonic()
 moved = {}
 movedList = []
 no_of_files_moved = 0
 size = 0
 movedFiles = False
 count = 0
 file_mappings = collections.defaultdict()
 file_names = []
 keyword = keyword.lower()
 delimiters = ['.', ',', '!', ' ', '-', ';', '?', '*', '!', '@', '#', '$', '%', '^', '&', '(', ')', '_', '/', '|','<', '>']
 regexPattern = '|'.join(map(re.escape, delimiters))
 if check_files(folder_to_track):
 for file in os.listdir(folder_to_track):
 if not os.path.isdir(os.path.join(folder_to_track, file)):
 try:
 for filename in os.listdir(folder_to_track):
 filename = filename.lower()
 file_names.append(filename)
 for filename in os.listdir(folder_to_track):
 filename = filename.lower()
 splittedstring = re.split(regexPattern, filename, 0)
 if not os.path.isdir(os.path.join(folder_to_track, filename)) and keyword in splittedstring:
 file_mappings.setdefault(keyword, []).append(filename)
 for folder_name, folder_items in file_mappings.items():
 folder_path = os.path.join(folder_to_track, folder_name)
 folder_exists = os.path.exists(folder_path)
 if not folder_exists:
 os.mkdir(folder_path)
 for filename in file_names:
 filename = filename.lower()
 splittedstring = re.split(regexPattern, filename, 0)
 if folder_name in splittedstring:
 count = count + 1
 source = os.path.join(folder_to_track, filename)
 size = size + os.path.getsize(source)
 destination = os.path.join(folder_path, filename)
 moved[filename] = folder_name + '/' + filename
 moveIncrementing(source, destination)
 movedFiles = True
 if folder_exists:
 for filename in file_names:
 filename = filename.lower()
 splittedstring = re.split(regexPattern, filename, 0)
 if folder_name in splittedstring and not os.path.isdir(
 os.path.join(folder_to_track, filename)):
 count = count + 1
 source = os.path.join(folder_to_track, filename)
 size = size + os.path.getsize(source)
 destination = os.path.join(folder_path, filename)
 moved[filename] = folder_name + '/' + filename
 moveIncrementing(source, destination)
 movedFiles = True
 end_time = time.monotonic()
 for key, value in moved.items():
 no_of_files_moved = no_of_files_moved + 1
 movedList.append(f"{no_of_files_moved}) {key} {Fore.GREEN} --> {Fore.WHITE} {value}")
 output = '\n'.join(map(str, movedList))
 if movedFiles:
 displayProgressbar(count)
 if count == 1 and verbose:
 return f"Successfully moved {count} file{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}{os.linesep}Files Moved:{os.linesep}{output}{os.linesep}"
 elif count != 1 and verbose:
 return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}{os.linesep}Files Moved:{os.linesep}{output}{os.linesep}"
 elif count == 1 and not verbose:
 return f"Successfully moved {count} file{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
 elif count != 1 and not verbose:
 return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
 else:
 return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
 else:
 return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep
 except Exception as e:
 return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep
 elif os.listdir(folder_to_track).__len__() == 0:
 return os.linesep + Fore.RED + f'Error: {folder_to_track} is empty' + os.linesep
 elif all(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in os.listdir(folder_to_track)):
 return os.linesep + Fore.RED + f'Error: {folder_to_track} only contains folders' + os.linesep
 else: 
 return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep

check_files():

def check_files(folder_to_track):
 files = os.listdir(folder_to_track)
 isdir = any(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in files)
 allisdir = all(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in files)
 
 if allisdir or files.__len__() == 0:
 return False
 else: 
 return True

calc_size():

UNITS_MAPPING = [
 (1<<50, ' PB'),
 (1<<40, ' TB'),
 (1<<30, ' GB'),
 (1<<20, ' MB'),
 (1<<10, ' KB'),
 (1, (' byte', ' bytes')),
]
def calc_size(bytes, units=UNITS_MAPPING):
 for factor, suffix in units:
 if bytes >= factor:
 break
 amount = int(bytes / factor)
 if isinstance(suffix, tuple):
 singular, multiple = suffix
 if amount == 1:
 suffix = singular
 else:
 suffix = multiple
 return str(amount) + suffix

moveIncrementing():

def moveIncrementing(source, destination):
 try:
 i = 0
 destination_name = os.path.splitext(destination)[0]
 destination_extension = os.path.splitext(destination)[-1]
 while True:
 try:
 if i == 0:
 destination = destination_name + destination_extension
 else:
 destination = destination_name + " " + "(" + str(i) + ")" + destination_extension
 return os.rename(source, destination if i else destination)
 
 except OSError as ex:
 i = i + 1
 pass
 except Exception as e:
 pass

displayProgressbar():


def displayProgressbar(item_count):
 print(os.linesep)
 if (item_count > 0):
 bar = IncrementalBar('Organizing...', max=item_count)
 for _ in range(item_count):
 bar.next()
 time.sleep(0.01)
 bar.finish()

Question 2

Use import pathlib already! It's much more convenient than the ancient os.path API.

Question 3

Naming

The PEP 8 style guide recommends snake_case for function names. You already do this for some functions, like calc_size. For consistency, change:

moveIncrementing

to:

move_incrementing

There are other functions as well.

The variable named bytes is the same name as a Python built-in function. This can be confusing. To eliminate the confusion, rename the variable as something like num_bytes. The first clue is that "bytes" has special coloring (syntax highlighting) in the question, as it does when I copy the code into my editor.

Documentation

The PEP 8 style guide recommends adding a docstring for the functions to describe they do, what their input types are and what they return.

Simpler

Lines like this:

i = i + 1

are simpler using the special assignment operators:

i += 1

These 2 lines:

filename = filename.lower()
file_names.append(filename)

can be combined, eliminating a variable:

file_names.append(filename.lower())

Efficiency

In this line in the moveIncrementing function:

return os.rename(source, destination if i else destination)

I'm pretty sure the ternary operator is not needed because you always return the same value, namely destination. This is simpler:

return os.rename(source, destination)

In the specific_name_category function, these separate if statements:

if not folder_exists:
if folder_exists:

should be combined into a single if/else statement:

if not folder_exists:
else:

The folder_exists can only have one value at a time, so the checks are mutually exclusive. This makes the code more efficient since you don't have to perform the 2nd check if the first is true. Also, this more clearly shows the intent of the code.

This can also be simplified by eliminating the folder_exists variable:

if os.path.exists(folder_path):
else:

Note the change in polarity, so the statements under each clause should be swapped.

DRY

You have several very long lines with a lot of common code, such as:

f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"

Long lines are hard to read and maintain. You should try to shorten them.

One way to do so would be to factor out common code. This is repeated verbatim and can be assigned to a variable:

{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}

toolic toolic 14.6k5 gold badges29 silver badges203 bronze badges · Accepted Answer · 2025-02-06 18:43:03Z

Naming

The PEP 8 style guide recommends snake_case for function names. You already do this for some functions, like calc_size. For consistency, change:

moveIncrementing

to:

move_incrementing

There are other functions as well.

The variable named bytes is the same name as a Python built-in function. This can be confusing. To eliminate the confusion, rename the variable as something like num_bytes. The first clue is that "bytes" has special coloring (syntax highlighting) in the question, as it does when I copy the code into my editor.

Documentation

The PEP 8 style guide recommends adding a docstring for the functions to describe they do, what their input types are and what they return.

Simpler

Lines like this:

i = i + 1

are simpler using the special assignment operators:

i += 1

These 2 lines:

filename = filename.lower()
file_names.append(filename)

can be combined, eliminating a variable:

file_names.append(filename.lower())

Efficiency

In this line in the moveIncrementing function:

return os.rename(source, destination if i else destination)

I'm pretty sure the ternary operator is not needed because you always return the same value, namely destination. This is simpler:

return os.rename(source, destination)

In the specific_name_category function, these separate if statements:

if not folder_exists:
if folder_exists:

should be combined into a single if/else statement:

if not folder_exists:
else:

The folder_exists can only have one value at a time, so the checks are mutually exclusive. This makes the code more efficient since you don't have to perform the 2nd check if the first is true. Also, this more clearly shows the intent of the code.

This can also be simplified by eliminating the folder_exists variable:

if os.path.exists(folder_path):
else:

Note the change in polarity, so the statements under each clause should be swapped.

DRY

You have several very long lines with a lot of common code, such as:

f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"

Long lines are hard to read and maintain. You should try to shorten them.

One way to do so would be to factor out common code. This is repeated verbatim and can be assigned to a variable:

{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}

Stack Exchange Network

Organize files based on inputted keyword

1 Answer 1

Naming

Documentation

Simpler

Efficiency

DRY

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

1 Answer 1

Naming

Documentation

Simpler

Efficiency

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related