My code basically organizes the files in a given directory based on the keyword inputted by the user. For example, if there is a directory with three files: sample 1.txt
, sample 2.txt
and random.txt
and if the user inputted sample
as the keyword, then the program will make a folder named sample
and move all files containing sample
into that folder.
The function works, but I feel like it could be divided into other functions or can be made shorter:
def specific_name_category(keyword, folder_to_track, verbose):
start_time = time.monotonic()
moved = {}
movedList = []
no_of_files_moved = 0
size = 0
movedFiles = False
count = 0
file_mappings = collections.defaultdict()
file_names = []
keyword = keyword.lower()
delimiters = ['.', ',', '!', ' ', '-', ';', '?', '*', '!', '@', '#', '$', '%', '^', '&', '(', ')', '_', '/', '|','<', '>']
regexPattern = '|'.join(map(re.escape, delimiters))
if check_files(folder_to_track):
for file in os.listdir(folder_to_track):
if not os.path.isdir(os.path.join(folder_to_track, file)):
try:
for filename in os.listdir(folder_to_track):
filename = filename.lower()
file_names.append(filename)
for filename in os.listdir(folder_to_track):
filename = filename.lower()
splittedstring = re.split(regexPattern, filename, 0)
if not os.path.isdir(os.path.join(folder_to_track, filename)) and keyword in splittedstring:
file_mappings.setdefault(keyword, []).append(filename)
for folder_name, folder_items in file_mappings.items():
folder_path = os.path.join(folder_to_track, folder_name)
folder_exists = os.path.exists(folder_path)
if not folder_exists:
os.mkdir(folder_path)
for filename in file_names:
filename = filename.lower()
splittedstring = re.split(regexPattern, filename, 0)
if folder_name in splittedstring:
count = count + 1
source = os.path.join(folder_to_track, filename)
size = size + os.path.getsize(source)
destination = os.path.join(folder_path, filename)
moved[filename] = folder_name + '/' + filename
moveIncrementing(source, destination)
movedFiles = True
if folder_exists:
for filename in file_names:
filename = filename.lower()
splittedstring = re.split(regexPattern, filename, 0)
if folder_name in splittedstring and not os.path.isdir(
os.path.join(folder_to_track, filename)):
count = count + 1
source = os.path.join(folder_to_track, filename)
size = size + os.path.getsize(source)
destination = os.path.join(folder_path, filename)
moved[filename] = folder_name + '/' + filename
moveIncrementing(source, destination)
movedFiles = True
end_time = time.monotonic()
for key, value in moved.items():
no_of_files_moved = no_of_files_moved + 1
movedList.append(f"{no_of_files_moved}) {key} {Fore.GREEN} --> {Fore.WHITE} {value}")
output = '\n'.join(map(str, movedList))
if movedFiles:
displayProgressbar(count)
if count == 1 and verbose:
return f"Successfully moved {count} file{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}{os.linesep}Files Moved:{os.linesep}{output}{os.linesep}"
elif count != 1 and verbose:
return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}{os.linesep}Files Moved:{os.linesep}{output}{os.linesep}"
elif count == 1 and not verbose:
return f"Successfully moved {count} file{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
elif count != 1 and not verbose:
return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
else:
return f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
else:
return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep
except Exception as e:
return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep
elif os.listdir(folder_to_track).__len__() == 0:
return os.linesep + Fore.RED + f'Error: {folder_to_track} is empty' + os.linesep
elif all(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in os.listdir(folder_to_track)):
return os.linesep + Fore.RED + f'Error: {folder_to_track} only contains folders' + os.linesep
else:
return os.linesep + Fore.RED + f'Error: Could not organize {folder_to_track}' + os.linesep
check_files():
def check_files(folder_to_track):
files = os.listdir(folder_to_track)
isdir = any(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in files)
allisdir = all(os.path.isdir(os.path.join(folder_to_track, file + '//')) for file in files)
if allisdir or files.__len__() == 0:
return False
else:
return True
calc_size():
UNITS_MAPPING = [
(1<<50, ' PB'),
(1<<40, ' TB'),
(1<<30, ' GB'),
(1<<20, ' MB'),
(1<<10, ' KB'),
(1, (' byte', ' bytes')),
]
def calc_size(bytes, units=UNITS_MAPPING):
for factor, suffix in units:
if bytes >= factor:
break
amount = int(bytes / factor)
if isinstance(suffix, tuple):
singular, multiple = suffix
if amount == 1:
suffix = singular
else:
suffix = multiple
return str(amount) + suffix
moveIncrementing():
def moveIncrementing(source, destination):
try:
i = 0
destination_name = os.path.splitext(destination)[0]
destination_extension = os.path.splitext(destination)[-1]
while True:
try:
if i == 0:
destination = destination_name + destination_extension
else:
destination = destination_name + " " + "(" + str(i) + ")" + destination_extension
return os.rename(source, destination if i else destination)
except OSError as ex:
i = i + 1
pass
except Exception as e:
pass
displayProgressbar():
def displayProgressbar(item_count):
print(os.linesep)
if (item_count > 0):
bar = IncrementalBar('Organizing...', max=item_count)
for _ in range(item_count):
bar.next()
time.sleep(0.01)
bar.finish()
-
\$\begingroup\$ Use import pathlib already! It's much more convenient than the ancient os.path API. \$\endgroup\$J_H– J_H2025年02月07日 16:41:53 +00:00Commented Feb 7 at 16:41
1 Answer 1
Naming
The PEP 8 style guide recommends
snake_case for function names. You already do this for some functions,
like calc_size
. For consistency, change:
moveIncrementing
to:
move_incrementing
There are other functions as well.
The variable named bytes
is the same name as a Python built-in function.
This can be confusing. To eliminate the confusion, rename the variable
as something like num_bytes
. The first clue is that "bytes" has special
coloring (syntax highlighting) in the question, as it does when I copy the code
into my editor.
Documentation
The PEP 8 style guide recommends adding a docstring for the functions to describe they do, what their input types are and what they return.
Simpler
Lines like this:
i = i + 1
are simpler using the special assignment operators:
i += 1
These 2 lines:
filename = filename.lower()
file_names.append(filename)
can be combined, eliminating a variable:
file_names.append(filename.lower())
Efficiency
In this line in the moveIncrementing
function:
return os.rename(source, destination if i else destination)
I'm pretty sure the ternary operator is not needed because you
always return the same value, namely destination
. This is simpler:
return os.rename(source, destination)
In the specific_name_category
function, these separate if
statements:
if not folder_exists:
if folder_exists:
should be combined into a single if/else
statement:
if not folder_exists:
else:
The folder_exists
can only have one value at a time, so the checks are
mutually exclusive. This makes the code more efficient since you don't
have to perform the 2nd check if the first is true. Also, this more
clearly shows the intent of the code.
This can also be simplified by eliminating the folder_exists
variable:
if os.path.exists(folder_path):
else:
Note the change in polarity, so the statements under each clause should be swapped.
DRY
You have several very long lines with a lot of common code, such as:
f"Successfully moved {count} files{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}{os.linesep}"
Long lines are hard to read and maintain. You should try to shorten them.
One way to do so would be to factor out common code. This is repeated verbatim and can be assigned to a variable:
{os.linesep}Time taken: {timedelta(seconds=end_time - start_time)}{os.linesep}Total size of files moved: {calc_size(size)}