I wrote the following program for encrypting PDF Files for checking out the PyPDF2 module.
It takes a path and a password via the command line or user input. Then all PDF files in the provided folder and subfolders get encrypted.
edit: the counterpart to drcrypt can be found in Decrypting PDF-Files
pdf_encrypter.py
"""
Takes a path and a password provided by the command line or if not
available by the user input.
Each .pdf file found in a folder and sub folder which is not encrypted
gets encrypted.
"""
import os
import sys
from typing import Tuple
import PyPDF2
def get_password_from_user() -> str:
"""
Asks the user to enter a password
"""
while True:
input_string: str = input("Enter Password:")
if input_string:
return input_string
def get_path_from_user() -> str:
"""Asks for a path from the User"""
while True:
input_string: str = input("Enter absolute Path:")
input_list: list[str] = input_string.split("\\")
path = os.path.join(*input_list)
if os.path.exists(path):
return path
print("Path doesn't exist\n")
def get_path_and_password() -> Tuple[str, str]:
"""
Gets path and password from command line or
if not available from the user
"""
if len(sys.argv) > 2:
return sys.argv[1], sys.argv[2]
return get_path_from_user(), get_password_from_user()
def is_encrypted(filename: str) -> bool:
"""Checks if file is encrypted"""
with open(filename, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
return pdf_reader.isEncrypted
def encrypt(filename: str, password: str) -> str:
"""
Encrypts a file and returns the filename of the encrypted file.
Precondition: File is not encrypted
"""
with open(filename, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
pdf_writer = PyPDF2.PdfFileWriter()
for page_number in range(pdf_reader.numPages):
pdf_writer.addPage(pdf_reader.getPage(page_number))
pdf_writer.encrypt(password)
filename_encrypted = filename.rstrip('.pdf') + "_encrypted.pdf"
with open(filename_encrypted, 'wb') as pdf_file_encrypted:
pdf_writer.write(pdf_file_encrypted)
return filename_encrypted
def decrypt(filename: str, password: str) -> bool:
"""
Try to decrypt a file. If not successful a false is returned.
If the file passed is not encrypted also a false is passed
"""
with open(filename, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
if not pdf_reader.isEncrypted:
return False
return pdf_reader.decrypt(password)
def pdf_encrypter():
"""
Main routine
If file was found which is not encrypted a new encrypted copy is
created.
Then the encrypted copy is decrypted temporary to test that the
encryption works.
If successful the original files get deleted.
"""
path, password = get_path_and_password()
for folder_name, _, filenames in os.walk(path):
for filename in filenames:
if not filename.endswith('.pdf'):
continue
if is_encrypted(os.path.join(folder_name, filename)):
continue
filename_encrypted: str = encrypt(
os.path.join(folder_name, filename), password)
if decrypt(os.path.join(
folder_name, filename_encrypted), password):
print("remove" + filename)
if __name__ == "__main__":
pdf_encrypter()
I would like to hear suggestions on how to improve the program / make it easier to read etc...
Is it easy to understand or to complicated?
What can be solved more simple?
Are there any bad habbits?
Im not sure if the input for the path is good handled like this.
I tested it under Windows.
2 Answers 2
This is pretty good for a beginner! I can see a few improvements but overall your code is
- Well structured into functions
- Has decent docstrings
- Makes use of the latest Python features (Typing)
But there are still some improvements to be made,
a common expression is Python comes with batteries included. I've spotted a few missed standard library modules making your life easier in the future.
Use
argparse
oversys.argv
argparse
is a very cool Python module making it easy to get input from the commandlineGetting password via
input()
is a security riskThere is another Python module made for this situation:
getpass
The way you get the path is a ugly
Luckily we can make use of yet another Python module
pathlib
You can enter a path (with any kind of slashes) and it will automatically convert it to the operating systems path correct slashes
-
1\$\begingroup\$ Thanks for mention these modules. It looks like unlike c++ python has already many good utility libraries available by default. \$\endgroup\$Sandro4912– Sandro49122019年01月10日 18:47:52 +00:00Commented Jan 10, 2019 at 18:47
Putting the suggestions from Ludisposed together we can improve the code with argparse
, getpass
and pathlib
like this:
def get_password_from_user() -> str:
"""
Asks the user to enter a password
"""
while True:
password: str = getpass(promt="Enter password: ")
if password:
return input_string
def get_path_from_user() -> str:
"""Asks for a path from the User"""
while True:
path = Path(input("Enter absolute Path:"))
if os.path.exists(path):
return path
print("Path doesn't exist\n")
def get_path_and_password() -> Tuple[str, str]:
"""
Gets path and password from command line or
if not available from the user
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--path", help="provide root folder to look for PDFs to encrypt",
type=str)
parser.add_argument(
"--password", help="password to encrypt PDFs with", type=str)
args = parser.parse_args()
if args.path and args.password:
return Path(args.path), args.password
return get_path_from_user(), get_password_from_user()