I am writing a python password manager, and I know there's a lot of scrutiny that goes into storing passwords (don't worry, mine aren't plaintext). I was hoping that this community could help me improve style, use of libraries, or anything else. Any and all pointers are gladly accepted.
There were a few ideas that I implemented here:
- encrypting each password with a unique salt, even in memory
- encrypting each database with a unique salt when they are stored long-term
- be able to save to a database file (custom format)
- be able to read from a database file (custom format)
I know that there are a lot of services that do this kind of thing already, but I thought I'd give it a spin, to learn and have fun. Some samples of how to use the library are provided by the runner file.
As this got a lot of attention, my most recent code will be kept on this GitHub repo.
runner:
import sys, os
from .passdb import PassDB
if __name__ == "__main__":
a = PassDB()
# print(a)
a.password = "password"
a.set_entry("user", "localhost", "sample_password")
# print(a.enc_str())
a_copy = PassDB.open_db(a.enc_str(), "password")
# print(a_copy.password)
if a_copy is not None:
print(a_copy.get_entry("user@localhost"))
print(a_copy.get_password("user@localhost"))
a_copy.save_as("tmp.passdb", "sample Password")
passdb.py
:
import base64
import hashlib
import pandas
from Crypto import Random
from Crypto.Cipher import AES
import json
import re
from io import StringIO
import datetime
class PassDB(object):
_valid_init_fields = ["data", "path", "password", "settings"]
version = "Version 0.0.1"
settings: dict
data: pandas.DataFrame
_defaults = {
"salt_size": 64,
"block_size": 32, # Using AES256
"enc_sample_content": "The provided password is correct",
"salt": None,
"path": None,
"hash_depth": 9
}
_format = """### PYPASSMAN {version} ###
{settings}
### SAMPLE ###
{enc_sample}
### DATA ###
{data}
"""
def __init__(self, *args, **kwargs):
if len(args) > 3:
raise TypeError("Too Many Arguments")
if len(args) > 2:
self.data = args[2]
else:
self.data = None
if len(args) > 1:
self.password = args[1]
else:
self.password = None
if len(args) > 0:
self.path = args[0]
else:
self.path = None
for key, arg in kwargs.items():
if key in self._valid_init_fields:
setattr(self, key, arg)
if self.data is None:
self.data = pandas.DataFrame(
columns=[
"account",
"hostname",
"salt",
"password",
"hash_depth",
"dateModified",
"dateCreated"
]
)
if getattr(self, "settings", None) is None:
self.settings = self._defaults.copy()
if self.settings.get("salt", None) is None:
self.settings["salt"] = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
for key in self._defaults.keys():
if key not in self.settings:
self.settings[key] = self._defaults[key]
@classmethod
def open_db(cls, raw, password):
settings, sample, data = (*map(
lambda string: string.strip(),
re.split(r"###.*###\n", raw)[1:]
),)
settings = json.loads(settings)
sample = cls._decrypt(sample, password, settings["salt"], settings["hash_depth"])
if not sample == settings["enc_sample_content"]:
raise ValueError(
"Cannot open PassDB: incorrect password provided")
data = cls._decrypt(data, password, settings["salt"], settings["hash_depth"])
data = pandas.read_csv(StringIO(data))
output = cls(
settings=settings,
data=data,
password=password
)
return output
def save_as(self, path, password):
settings_cp = self.settings.copy()
settings_cp["path"] = path
new_dict = self.__class__(
data = self.data,
path = path,
password = password,
settings = settings_cp
)
new_dict.save()
return True
def save(self):
with open(self.path, "w+") as dest:
enc_data = self._encrypt(
self.data.to_csv(index_label="index"),
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
enc_sample = self._encrypt(
self.settings["enc_sample_content"],
self.password, self.settings["salt"],
self.settings["hash_depth"])
dest.write(self._format.format(
version=str(self.version),
settings=json.dumps(self.settings),
data=enc_data,
enc_sample=enc_sample
))
@classmethod
def _encrypt(cls, raw, password, salt, hash_depth):
raw = cls._pad(raw)
iv = Random.new().read(AES.block_size)
salt = base64.b64decode(salt)
key = hashlib.sha256(
str(password).encode() + salt
).digest()
for i in range(hash_depth):
key = hashlib.sha256(key + salt).digest()
cipher = AES.new(key, AES.MODE_CBC, iv)
return base64.b64encode(iv + cipher.encrypt(raw)).decode("utf-8")
@classmethod
def _decrypt(cls, enc, password, salt, hash_depth):
enc = base64.b64decode(enc)
iv = enc[:AES.block_size]
salt = base64.b64decode(salt)
key = hashlib.sha256(
password.encode() + salt
).digest()
for i in range(hash_depth):
key = hashlib.sha256(key + salt).digest()
cipher = AES.new(key, AES.MODE_CBC, iv)
try:
return cls._unpad(
cipher.decrypt(
enc[AES.block_size:]
)
).decode('utf-8')
except UnicodeDecodeError:
raise ValueError("Incorrect Password")
@classmethod
def _pad(cls, s):
bs = cls._defaults["block_size"]
return (
s + (bs - len(s) % bs) *
chr(bs - len(s) % bs)
)
@staticmethod
def _unpad(s):
return s[:-ord(s[len(s)-1:])]
def enc_str(self):
enc_data = self._encrypt(
self.data.to_csv(index_label="index"),
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
enc_sample = self._encrypt(
self.settings["enc_sample_content"],
self.password, self.settings["salt"],
self.settings["hash_depth"]
)
return (self._format.format(
version=str(self.version),
enc_sample=enc_sample,
settings=json.dumps(self.settings),
data=enc_data
))
def __str__(self):
path = self.settings["path"]
return "PassDB <{} entries{}>".format(
len(self.data),
" at '{}'".format(path) if path is not None else ""
)
def set_entry(self, *args):
account, hostname, password = None, None, None
if len(args) == 1:
account, hostname_password = args[0].split("@")
hostname, password, other = hostname_password.split(":")
elif len(args) == 2:
account_hostname, password = args
account, hostname = account_hostname.split("@")
elif len(args) == 3:
account, hostname, password = args
else:
raise ValueError("""
PassDB.set_entry :: Too many arguments
usage(1): get_password(account, hostname, password)
usage(2): get_password("{account}@{hostname}", password)
usage(3): get_password("{account}@{hostname}:{password}") """
)
for char in (":", "@"):
for item in account, hostname, password:
if char in item:
raise ValueError("""
account, hostname, and password cannot contain colon (:) or at symbol (@)""")
if len(self.data) > 0:
for index, entry in self.data.iterrows():
if entry["account"] == account and entry["hostname"] == hostname:
salt = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
password = self._encrypt(
password,
self.settings["salt"],
salt,
self.settings["hash_depth"]
)
self.data.loc[index] = (
account, hostname,
salt, password,
self.settings["hash_depth"],
str(datetime.datetime.utcnow().isoformat()),
str(datetime.datetime.utcnow().isoformat())
)
else:
salt = base64.b64encode(Random.new().read(
self.settings["salt_size"]
)).decode("utf-8")
password = self._encrypt(
password,
self.settings["salt"],
salt,
self.settings["hash_depth"]
)
self.data.loc[0] = (
account,
hostname,
salt,
password,
self.settings["hash_depth"],
str(datetime.datetime.utcnow().isoformat()),
str(datetime.datetime.utcnow().isoformat())
)
def get_entry(self, *args):
if len(args) == 1:
account, hostname = args[0].split("@")
elif len(args) == 2:
account, hostname = args
else:
raise ValueError("""
PassDB.get_entry :: Too many arguments
usage(1): get_entry(account, hostname)
usage(2): get_entry("{account}@{hostname}")""")
if(getattr(self, "password") is None):
raise ValueError("Cannot get entry when PassDB instance password is None")
if(len(self.data)) == 0:
return None
for index, entry in self.data.iterrows():
if entry["account"] == account and entry["hostname"] == hostname:
return entry
return None
def get_password(self, *args):
if len(args) == 1:
account, hostname = args[0].split("@")
elif len(args) == 2:
account, hostname = args
else:
raise ValueError("""
PassDB.get_password :: Too many arguments
usage(1): get_password(account, hostname)
usage(2): get_password("{account}@{hostname}")""")
entry = self.get_entry(account, hostname)
if isinstance(entry["password"], str):
return self._decrypt(entry["password"], self.settings["salt"], entry["salt"], entry["hash_depth"])
raise ValueError("Password for {account}@{hostname} in unexpected format".format(**entry))
```
-
\$\begingroup\$ I rolled back your last edit. After getting an answer you are not allowed to change your code anymore. This is to ensure that answers do not get invalidated and have to hit a moving target. If you have changed your code you can either post it as an answer (if it would constitute a code review) or ask a new question with your changed code (linking back to this one as reference). Refer to this meta post for more information \$\endgroup\$Sᴀᴍ Onᴇᴌᴀ– Sᴀᴍ Onᴇᴌᴀ ♦2019年04月30日 19:11:37 +00:00Commented Apr 30, 2019 at 19:11
-
\$\begingroup\$ @SᴀᴍOnᴇᴌᴀ, sorry. I clearly need to take a closer look at the community guidelines. \$\endgroup\$David Culbreth– David Culbreth2019年04月30日 23:12:14 +00:00Commented Apr 30, 2019 at 23:12
-
\$\begingroup\$ Its okay - it happens to many users here... \$\endgroup\$Sᴀᴍ Onᴇᴌᴀ– Sᴀᴍ Onᴇᴌᴀ ♦2019年05月01日 00:03:04 +00:00Commented May 1, 2019 at 0:03
3 Answers 3
Some general tips:
- The runner should use argparse to parse arguments. It most definitely should not hardcode passwords.
(object)
is redundant in Python 3 class definitions.I'd recommend running any Python code through Black, flake8 and mypy with a strict configuration like this one:
[flake8] doctests = true exclude = .git max-complexity = 5 max-line-length = 120 ignore = W503,E203 [mypy] check_untyped_defs = true disallow_untyped_defs = true ignore_missing_imports = true no_implicit_optional = true warn_redundant_casts = true warn_return_any = true warn_unused_ignores = true
- You reuse variable names with completely different semantics. This is a really bad idea for understanding what the code is doing and following along even otherwise trivial logic. For example,
settings = json.loads(settings)
means that settings is originally astr
, effectively a serialized JSON object, and afterwards adict
. These have completely different semantics and interaction patterns. The easiest way to deal with this is to treat almost every variable as immutable, and naming the variables according to what they really are. For example,settings = json.loads(serialized_settings)
. - Names should be descriptive, for example
password_database = PasswordDatabase()
. - Don't use
*args
and**kwargs
unless you need dynamic parameter lists. Rather than indexing*args
you should use named parameters. If they have default values those should go in the method signature. .get(foo, None)
can be simplified to.get(foo)
-get()
returnsNone
by default.if foo is None
can in the vast majority of cases be changed to the more idiomaticif foo
.- I would highly recommend using a well-known open format such as the KeePass one for storing this data.
This should not be in there:
if not sample == settings["enc_sample_content"]: raise ValueError( "Cannot open PassDB: incorrect password provided")
- There is a lot of encoding and decoding happening, which greatly obfuscates the state and looks unnecessary in several places.
- I would not trust this sort of code without a comprehensive test suite.
With the caveat that I'm not a cryptographer:
- Salting does not make sense unless you're hashing the password (which you don't want to do in this case). I'll refrain from any other comments on how the salting is done unless someone corrects this.
-
\$\begingroup\$ I support your note on salting symmetrically encrypted passwords. The random IV in AES CBC should be functionaly equivalent. (Disclaimer: also not a cryptographer here) \$\endgroup\$AlexV– AlexV2019年04月30日 06:46:43 +00:00Commented Apr 30, 2019 at 6:46
-
\$\begingroup\$ +1 for #4 implying the validity of variable reuse in cases of tight semantic relevance \$\endgroup\$Samy Bencherif– Samy Bencherif2019年04月30日 08:47:53 +00:00Commented Apr 30, 2019 at 8:47
-
\$\begingroup\$ Could you elaborate on #4? Maybe provide an example? I'm not sure that I understand what you mean. \$\endgroup\$David Culbreth– David Culbreth2019年04月30日 16:25:28 +00:00Commented Apr 30, 2019 at 16:25
-
\$\begingroup\$ Regarding point 2 - what are your thoughts on the Zen of Python, specifically "explicit is better than implicit"? \$\endgroup\$Adam Barnes– Adam Barnes2019年05月01日 11:56:18 +00:00Commented May 1, 2019 at 11:56
-
\$\begingroup\$ @AdamBarnes Yes, but simple is better than complex :) Seriously though, a lot of code would be much more verbose if we always specified the default. I expect
(object)
was removed from Python 3 precisely because it doesn't really add any useful information to the reader, while making the code just that little bit less verbose. \$\endgroup\$l0b0– l0b02019年05月01日 21:12:59 +00:00Commented May 1, 2019 at 21:12
Something about the cryptography:
- Do you still use the unmaintained PyCrypto library or the new PyCryptodome (a maintained mostly compatible drop-in-replacement)?
- You are using the CBC mode correctly (random IV for encryption), which is good.
- Data is not authenticated - even encrypted data can be changed without the possibility to detect. You can use HMAC (hash based message authentication code) or an AEAD (authenticated encryption with additional data) encryption mode.
- Your password derivation function has good ideas (Rounds + Salt), but is still a bit weak: Only 9 rounds by default are too less for todays standards. As the derivation functions apply the same ideas as for password storage, consider looking at those: E.g. PBKDF2 (which is included in Python) or Argon2 (one of the most modern).
concerning is None
In case of self.data
(pandas DataFrame) your usage of if foo is None
is the only valid option. In the other cases I disagree with the opinion, that if not foo
is better than if foo is None
, as it is in no way generally correct to assume that an empty object should be handled by the if-clause. An if foo is None
explicitley tells me, that there is only a single case, that needs special treatment. However, you have some rather strange constructs: I don't see the reason for using getattr
in if(getattr(self, "password") is None)
(also: redundant parentheses). This should be just if self.password is None
- or if not self.password
in case you also want to refuse empty passwords. There are other ones, but they mostly originate from you rather complicated __init__
mechanisms.
concerning __init__
Your constructor is too complicated. It either takes keyword arguments, that it maps, or maps arguments that may come via command line. I highly recommend to split the two cases: Create an alternative constructor as classmethod from_cli
that parses the command line arguments (argparse
or similar), and uses them as named arguments for the real constructor, that needs a clear signature like __init__(self, data=None, path=None, password=None, settings=None)
and sets the member variables explicitely. That way it's much easier to grasp, what state an instance of PassDB
is in after creation.
-
\$\begingroup\$ Thanks for your feedback! Due to the community guidelines, I'm not allowed to update the code once I've received my inital feedback. If you look on the repo I posted in the question, you'll see that I dramatically simplified my init method. \$\endgroup\$David Culbreth– David Culbreth2019年05月01日 12:05:26 +00:00Commented May 1, 2019 at 12:05