First the PasswordStore
, which is pretty straight-forward. It stores title-password association, but it is important that a title can have multiple passwords.
The __or__
method is very important since that's how later password stores are joined.
"""Password store module for managing title-password associations."""
from __future__ import annotations
import copy
from collections import defaultdict
class PasswordStore:
"""A store that associates titles with multiple passwords using sets."""
def __init__(self) -> None:
self._store: dict[str, set[str]] = defaultdict(set)
def __getitem__(self, title: str) -> set[str]:
return self._store[title].copy()
def __contains__(self, title: str) -> bool:
return title in self._store
def __len__(self) -> int:
return len(self._store)
def add_password(self, title: str, password: str) -> None:
"""Add a password to the specified title."""
if title == "":
raise ValueError("Empty title")
if password == "":
raise ValueError("Empty password")
self._store[title].add(password)
def remove_password(self, title: str, password: str) -> bool:
"""Remove a password from the specified title.
Returns True if removed, False if not found."""
if title in self._store and password in self._store[title]:
self._store[title].remove(password)
if not self._store[title]:
del self._store[title]
return True
return False
def __iter__(self):
for title, passwords in self._store.items():
yield title, passwords
def __or__(self, p: PasswordStore) -> PasswordStore:
self_copy = copy.deepcopy(self)
for title, passwords in p:
for password in passwords:
self_copy.add_password(title, password)
return self_copy
def clear_passwords(self, title: str) -> None:
"""Clear all passwords for the specified title."""
if title in self._store:
del self._store[title]
def pretty_print(self) -> str:
"""Return a pretty-formatted string representation of the password store."""
if not self._store:
return "PasswordStore (empty)"
MAX_COL_WIDTH: int = 83
# Calculate column widths
max_title_length = min(
MAX_COL_WIDTH, max(len(title) for title in self._store.keys())
)
max_password_length = max(
max(len(password) for password in passwords)
for passwords in self._store.values()
)
title_width = max(max_title_length, len("Title"))
password_width = max(max_password_length, len("Password"))
top_line = f"┏━{'━' * title_width}━┳━{'━' * password_width}━┓"
header_line = (
f"┃ {'Title'.ljust(title_width)} ┃ {'Password'.ljust(password_width)} ┃"
)
header_separator_line = f"┣━{'━' * title_width}━╇━{'━' * password_width}━┫"
separator_line = f"┠─{'─' * title_width}─┼─{'─' * password_width}─┨"
bottom_line = f"┗━{'━' * title_width}━┷━{'━' * password_width}━┛"
lines = [top_line, header_line, header_separator_line]
first_entry = True
for title in sorted(self._store.keys()):
if len(title) > MAX_COL_WIDTH:
display_title = title[0 : (MAX_COL_WIDTH - 3)] + "..."
else:
display_title = title
passwords = sorted(self._store[title])
if not first_entry:
# no need to add a separator for the first line
lines.append(separator_line)
for i, password in enumerate(passwords):
if i == 0:
# First password for this title:
line = f"┃ {display_title.ljust(title_width)} │ {password.ljust(password_width)} ┃"
else:
# Additional passwords for same title:
line = f"┃ {' ' * title_width} │ {password.ljust(password_width)} ┃"
lines.append(line)
first_entry = False
lines.append(bottom_line)
return "\n".join(lines)
Here are some tests for the password store.
Since the passwords have no real security relevance, we save them in plain text.
Then we need "plugins" to actually gather the passwords, for which we defined an ABC (though maybe this would've been a good use case for protocols?):
"""Abstract base class for password extraction plugins."""
import abc
import typing
import hoarder.password_store
class PasswordPlugin(abc.ABC):
"""Abstract base class for password extraction plugins."""
@abc.abstractmethod
def __init__(self, config: dict[str, typing.Any]):
pass
@abc.abstractmethod
def extract_passwords(self) -> hoarder.password_store.PasswordStore:
"""Extract passwords from the file, returning a mapping of title -> passwords."""
pass
Now finally the first implementation of the PasswordPlugin
, the NZB password extractor. This plugin gets configured to search in certain directories for NZB files.
NZB is an XML-based file format for retrieving posts from Usenet, and the passwords can be either in the filename like filename{{password}}.nzb
or they are in a meta-tag of the header-section.
The "title" will then be the filename without the file extension and the password (if present).
There is still one complication: sometimes NZB files are compressed as RAR.
But, we already have a RarArchive class (basically a wrapper for 7z) that and implement a read method for it.
And finally we are ready for the NZB plugin:
"""NZB password extraction plugin."""
import logging
import os
import pathlib
import re
import traceback
import xml.etree.ElementTree as ET
import hoarder
from hoarder.password_plugin import PasswordPlugin
try:
from typing import override # type: ignore [attr-defined]
except ImportError:
from typing_extensions import override
logger = logging.getLogger("hoarder.nzb_password_plugin")
class NzbPasswordPlugin(PasswordPlugin):
"""Plugin to extract passwords from NZB filenames with {{password}} format."""
_nzb_paths: list[pathlib.Path]
@override
def __init__(self, config: dict[str, list[str]]):
if "nzb_paths" in config:
paths = [pathlib.Path(p) for p in config["nzb_paths"]]
invalid_paths = [p for p in paths if not p.is_dir()]
if len(invalid_paths) > 0:
raise FileNotFoundError(
f"No directory at {invalid_paths[0]}"
+ (
f" and {len(invalid_paths) - 1} other invalid paths"
if len(invalid_paths) > 1
else ""
)
)
else:
self._nzb_paths = paths
@staticmethod
def _extract_pw_from_nzb_filename(
file_path: pathlib.PurePath,
) -> tuple[str, str | None]:
filename = file_path.stem
# Extract the password from title{{password}}.nzb pattern
filename_passwords = re.findall(r"\{\{(.+?)\}\}", filename)
title = re.sub(r"\{\{.+?\}\}", "", filename).strip()
if len(filename_passwords) >= 2:
logger.error(f"Error when extracting password from {file_path}")
raise ValueError("Ambiguous passwords")
if len(filename_passwords) == 0:
return (title, None)
return (title, filename_passwords[0])
@staticmethod
def _extract_pw_from_nzb_file_content(content: bytes | str) -> str | None:
password: str | None = None
try:
logger.debug("Extracting password from file content")
root = ET.fromstring(content)
ns = {"nzb": "http://www.newzbin.com/DTD/2003/nzb"}
for meta in root.findall('.//nzb:meta[@type="password"]', ns):
if meta.text:
password = meta.text.strip()
break
except (ET.ParseError, OSError, UnicodeDecodeError):
logger.debug("Failure extracting password from content")
print(traceback.format_exc())
pass
return password
@staticmethod
def _process_directory(
nzb_directory: pathlib.Path,
) -> hoarder.password_store.PasswordStore:
dir_store = hoarder.password_store.PasswordStore()
content: str | bytes
for root, _, files in os.walk(nzb_directory):
for file in files:
title = password = None
full_path: pathlib.Path = nzb_directory / root / file
if full_path.suffix == ".nzb":
logger.debug(f"Processing NZB {full_path}")
title, password = NzbPasswordPlugin._extract_pw_from_nzb_filename(
full_path
)
if not password:
logger.debug("No password in filename, opening NZB file...")
with open(full_path) as f:
content = f.read()
password = (
NzbPasswordPlugin._extract_pw_from_nzb_file_content(
content
)
)
if password:
dir_store.add_password(title, password)
elif full_path.suffix == ".rar":
logger.debug(f"Processing RARed NZB(s) {full_path}")
rar_file: hoarder.RarArchive = hoarder.RarArchive.from_path(
full_path
)
for file_entry in rar_file.files:
logger.debug(f"Read {file_entry.path}... extracting passwords")
if file_entry.path.suffix == ".nzb":
(
title,
password,
) = NzbPasswordPlugin._extract_pw_from_nzb_filename(
file_entry.path
)
if not password:
content = rar_file.read_file(file_entry.path)
password = (
NzbPasswordPlugin._extract_pw_from_nzb_file_content(
content
)
)
if password:
dir_store.add_password(title, password)
return dir_store
@override
def extract_passwords(self) -> hoarder.password_store.PasswordStore:
password_store = hoarder.password_store.PasswordStore()
for p in self._nzb_paths:
password_store = password_store | NzbPasswordPlugin._process_directory(p)
return password_store
There are also some tests for it which can be found here.
The strategy is to first look at the filename and try the extraction on it. If that fails, open the file and try to extract it from the parsed XML.
5 Answers 5
Here's my review of your code:
class PasswordStore
:
add_password()
:- Consider if you should be validating the arguments:
- Are they strings?
- Are they entirely whitespace?
- Do they start/end with whitespace?
- Consider if you should be validating the arguments:
__iter__()
:- Inconsistent behavior between this method and
__getitem__()
: Copy the set rather than returning the original.
- Inconsistent behavior between this method and
You might want to consider implementing an optimized
__ior__()
method so that your other code can use thea |= b
augmented assignment instead ofa = a | b
. You can then use this to implement the non-augmented__or__()
method.
class NzbPasswordPlugin
:
Generally, even your private functions should still have docstrings for future-you :)
__init__()
:- You need to raise an appropriate exception if
nzb_paths
is not in the config; otherwise, the call toextract_passwords()
will result in anAttributeError
. - Use
NotADirectoryError
instead ofFileNotFoundError
- You can just do
if invalid_paths:
to check if it's not empty. - No
else
needed since theif
block raises an exception and will not fall through.
- You need to raise an appropriate exception if
_extract_pw_from_nzb_filename()
:- For future-you documentation, the first comment about extracting the password from the pattern should indicate that the
.nzb
suffix has already been stripped due to the.stem
access. - Since the regexs are used frequently, compile them at the class level and reference them as attributes:
...class NzbPasswordPlugin(PasswordPlugin): # ... _nzb_filename_password_re = re.compile(r"\{\{(.+?)\}\}") # ...
filename_passwords = _nzb_filename_password_re.findall(filename) title = _nzb_filename_password_re.sub("", filename)
- The end logic can be optimized somewhat: (check for non-empty is faster than calculating length and comparing)
# find the filename password; raise an error if more than one. if filename_passwords: filename_password = filename_passwords.pop() else: filename_password = None if filename_passwords: raise ValueError() # return the stripped title and filename password (if any). return (title, filename_password)
- For future-you documentation, the first comment about extracting the password from the pattern should indicate that the
_extract_pw_from_nzb_file_content()
:- No real comments except to add more comments for future-you. Expect that when you come back to this code, you won't remember why you did anything, so explain it to yourself :)
_process_directory()
:- No type annotation given for
title
andpassword
. - Same comment as above - add more comments for future you.
- This method is a bit long. You might want to extract the file processing into a separate method. You can abstract that method so that it can work with either a straight file or RAR-file-entry by passing in a
read_file_content
lambda, resulting in something like this:title = password = None if full_path.suffix == ".nzb": title, password = _process_file( full_path, read_file_content = lambda: open(full_path).read() ) elif full_path.suffix == ".rar": rar_file = ... for file_entry in rar_file.files: if file_entry.path.suffix == ".nzb": title, password = _process_file( file_entry.path, read_file_content = ( lambda: rar_file.read_file(file_entry.path) ) ) if title and password: dir_store.add_password(title, password)
- No type annotation given for
Logging
Minor point:
Instead of:
logger.debug("Failure extracting password from content")
print(traceback.format_exc())
You can do this to include the traceback automatically:
logger.exception("Failure extracting password from content")
Then everything goes to the same logging destination, in the same format.
Factory Method Pattern
Function _process_directory
is fairly long, and could get bigger if you decide to handle more file types later on. And indentation in Python is tricky. Here is an interesting article that explains how you could refactor this function: The Factory Method Pattern and Its Implementation in Python.
Better tuple
Function _extract_pw_from_nzb_filename
returns a tuple, which I don't like very much because the order matters. You can use a namedtuple which remains compatible. Again from the aforementioned website: Write Pythonic and Clean Code With namedtuple
A few suggestions:
def __iter__(self):
for title, passwords in self._store.items():
yield title, passwords
This can be simplified to:
def __iter__(self):
yield from self._store.items()
Also here:
max_title_length = min(
MAX_COL_WIDTH, max(len(title) for title in self._store.keys())
)
Can be shortened by using the key
argument to len
:
max_title_length = min(
MAX_COL_WIDTH, len(max(self._store.keys(), key=len))
)
-
\$\begingroup\$ The first simplication probably seems only possible because there was a mistake in the original code. We actually want a copy of
passwords
. \$\endgroup\$viuser– viuser2025年09月05日 13:39:31 +00:00Commented Sep 5 at 13:39
This method is problematic:
def __iter__(self):
for title, passwords in self._store.items():
yield title, passwords
First, unlike other methods, it doesn't have a return type annotation. There's no reason for this inconsistency, so let's just add it: Iterator[tuple[str, set[str]]]
.
Second, it seems to violate the contract of PasswordStore
. You have made it quite clear that a storage is not meant to be mutated directly, by defining public *_password()
methods and by making __getitem__()
return copies of the sets. However, the sets yielded by __iter__()
are the original ones, not copies.
On that same note, __getitem__()
is normally expected to return a reference to the value, not a copy. To signify external-immutability, you should instead return frozenset(self._store[title])
. Same for __iter__()
.
In addition to the excellent points in the previous answer, here are some other considerations.
In the PasswordStore
class, the pretty_print
function seems like it
would serve the same purpose as a standard
__str__
function of the class. Or, if you want to retain the pretty_print
name,
it could then call __str__
.
If the pretty_print
function creates a general table of data, consider
other table Python packages listed in
this Stack Overflow answer,
such as tabulate
, prettytable
, etc.
-
\$\begingroup\$ Yes,
pretty_print
outputs a table using UTF-8 box drawing characters. The question is if__str__
is meant for outputting such complex formatting or more for simpler representations. Andtabulate
adds a third-party lib which was avoided for now (except for the tests, that usepytest
). \$\endgroup\$viuser– viuser2025年08月02日 09:02:52 +00:00Commented Aug 2 at 9:02 -
\$\begingroup\$ The python answer would be to create a separate class or module for outputting data in a table. This would allow it to be re-used, and removes unrelated functionality (output formatting) from the PasswordStore class. \$\endgroup\$J Earls– J Earls2025年08月04日 02:53:54 +00:00Commented Aug 4 at 2:53
-
\$\begingroup\$ @JEarls I wonder what the interface for this should be then. A dictionary? \$\endgroup\$viuser– viuser2025年09月05日 13:19:05 +00:00Commented Sep 5 at 13:19
-
\$\begingroup\$ @viuser that would be up to the implementation, but i'd probably end up with the tabular-output classes accepting a list of dictionaries (e.g.
[{'title': 'infomercial central', 'password': 'foo1'},{'title': 'spatula city', 'password': 'weird'}]
(etc.)). the abstract base class would have methods to extract the field names and maximum field widths from all the dictionaries, and possibly some abstract method definitions (e.g.header(self) -> str
,row(self, row:dict[str,str]) -> str
,row_sep(self) -> str
, andfooter(self) -> str
) that would be used by a method to return the full table. \$\endgroup\$J Earls– J Earls2025年09月05日 14:36:47 +00:00Commented Sep 5 at 14:36