The following is a general purpose file compression function with the following features:
- It can take either a single file or directory as the source input
- It defaults to the source directory when to destination is input
- It will compress all files of a type associated with the extension parameter
- It removes the original files if desired
This allows for a simple minimum working input of providing only the source directory (for which it defaults to compressing all containing files) While allowing specific destinations, individual files, or specific file extensions to be compressed.
import glob
import zipfile
import os
def _zip_and_delete_prot(src, dst = None, arcname = None, remove_uncompressed = True, extension = ""):
"""Takes either an individual or list of filenames from the source (src)
folder and compresses them into the destination (dst) with the option to
delete the original.
Parameters:
src: the path to the source directory
sdt: the path to the destination directory. If left empty
it will default to the directory of the source files
arcname: the name of the compressed file (and file path) must correspond
to the number of files grabbed from the source
remove_uncompressed: if True deletes the original input files
extension: The extension of the batch of files you want compressed
in the source directory, examples are ".txt", ".csv", ".xls"
"""
# Checks if a destination other than the source is provided, if not defaults to src
if dst is None:
if os.path.isdir(src) is True:
dst = src
else:
dst = os.path.dirname(src)
# Checks if the src is a file or a directory, if a directory we then group all files
# with the extension given by the extension parameter.
if os.path.isdir(src) is True:
fnames = sorted(glob.glob(src + f'\*{extension}'))
else:
fnames = [src]
# Iterating over all files and compressing them
for _file in fnames:
# Creating a variable with the .zip filename extension
file_bn_zip = os.path.basename(_file)[:-3]+"zip"
zip_dst = dst + "\\" + file_bn_zip
# Create a Zip file in write mode
with zipfile.ZipFile(zip_dst, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Write to archive without directory structure included in the compression
if arcname is None:
zipf.write(_file, arcname = os.path.basename(_file))
print(f"File {os.path.basename(_file)} written to {file_bn_zip}")
# If arcname is provided try to use the provided arcname
else:
try:
zipf.write(_file, arcname)
print(f"Arcname {arcname} supplied and compressed")
except:
print(f"Arcnane {arcname} failed, check that structure
corresponds to the src files structure")
# Removing uncompressed files if desired
if remove_uncompressed is True:
os.remove(_file)
print(f"Input file {os.path.basename(_file)} removed")
1 Answer 1
Replace your os
module import with pathlib
and use more Path
.
Add PEP484 types to your function signatures.
sdt
is spelled dst
in your parameters.
Raw concatenation of an extension string to a glob pattern is asking for trouble; call escape()
on it.
Re. # Create a Zip file in write mode
- never
# Do thing
do_thing()
Obvious comments are more harmful than having no comment at all.
Don't is True
; use boolean variables as predicates directly.
Suggested
Only lightly tested -
import glob
import zipfile
from pathlib import Path
def _zip_and_delete_prot(
src: Path,
dst: Path | None = None,
arcname: str | None = None,
remove_uncompressed: bool = True,
extension: str = '',
) -> None:
"""Takes either an individual or list of filenames from the source (src)
folder and compresses them into the destination (dst) with the option to
delete the original.
Parameters:
src: the path to the source directory
dst: the path to the destination directory. If left empty
it will default to the directory of the source files
arcname: the name of the compressed file (and file path) must correspond
to the number of files grabbed from the source
remove_uncompressed: if True deletes the original input files
extension: The extension of the batch of files you want compressed
in the source directory, examples are '.txt', '.csv', '.xls'
"""
# Checks if a destination other than the source is provided, if not defaults to src
if dst is None:
if src.is_dir():
dst = src
else:
dst = src.parent
# Checks if the src is a file or a directory, if a directory we then group all files
# with the extension given by the extension parameter.
if src.is_dir():
fnames = sorted(src.glob('*' + glob.escape(extension)))
else:
fnames = src,
# Iterating over all files and compressing them
for file in fnames:
file_bn_zip = file.with_suffix('.zip')
zip_dst = dst / file_bn_zip.name
# Write to archive without directory structure included in the compression
# If arcname is provided try to use the provided arcname
if arcname is None:
arcname_inner = file.name
else:
arcname_inner = arcname
with zipfile.ZipFile(file=zip_dst, mode='w', compression=zipfile.ZIP_DEFLATED) as zipf:
try:
zipf.write(filename=file, arcname=arcname_inner)
print(f'File {arcname_inner} written to {zip_dst}')
except IOError:
print(f'File {arcname_inner} failed; check that structure '
'corresponds to the src files structure')
if remove_uncompressed:
file.unlink()
print(f'Input file {file.name} removed')
-
1\$\begingroup\$ Thank you so much for giving an example. I only discovered the PEP484 signatures a couple days ago and seeing how you formatted the parameters was incredibly informative. \$\endgroup\$Christian Fitzner– Christian Fitzner2024年02月08日 16:19:57 +00:00Commented Feb 8, 2024 at 16:19
-
\$\begingroup\$ Also the general formatting and use of methods was very helpful \$\endgroup\$Christian Fitzner– Christian Fitzner2024年02月08日 16:26:41 +00:00Commented Feb 8, 2024 at 16:26
Explore related questions
See similar questions with these tags.
import
statements were added in revision 3. \$\endgroup\$