4
\$\begingroup\$

First the PasswordStore, which is pretty straight-forward. It stores title-password association, but it is important that a title can have multiple passwords.

The __or__ method is very important since that's how later password stores are joined.

"""Password store module for managing title-password associations."""
from __future__ import annotations
import copy
from collections import defaultdict
class PasswordStore:
 """A store that associates titles with multiple passwords using sets."""
 def __init__(self) -> None:
 self._store: dict[str, set[str]] = defaultdict(set)
 def __getitem__(self, title: str) -> set[str]:
 return self._store[title].copy()
 def __contains__(self, title: str) -> bool:
 return title in self._store
 def __len__(self) -> int:
 return len(self._store)
 def add_password(self, title: str, password: str) -> None:
 """Add a password to the specified title."""
 if title == "":
 raise ValueError("Empty title")
 if password == "":
 raise ValueError("Empty password")
 self._store[title].add(password)
 def remove_password(self, title: str, password: str) -> bool:
 """Remove a password from the specified title.
 Returns True if removed, False if not found."""
 if title in self._store and password in self._store[title]:
 self._store[title].remove(password)
 if not self._store[title]:
 del self._store[title]
 return True
 return False
 def __iter__(self):
 for title, passwords in self._store.items():
 yield title, passwords
 def __or__(self, p: PasswordStore) -> PasswordStore:
 self_copy = copy.deepcopy(self)
 for title, passwords in p:
 for password in passwords:
 self_copy.add_password(title, password)
 return self_copy
 def clear_passwords(self, title: str) -> None:
 """Clear all passwords for the specified title."""
 if title in self._store:
 del self._store[title]
 def pretty_print(self) -> str:
 """Return a pretty-formatted string representation of the password store."""
 if not self._store:
 return "PasswordStore (empty)"
 MAX_COL_WIDTH: int = 83
 # Calculate column widths
 max_title_length = min(
 MAX_COL_WIDTH, max(len(title) for title in self._store.keys())
 )
 max_password_length = max(
 max(len(password) for password in passwords)
 for passwords in self._store.values()
 )
 title_width = max(max_title_length, len("Title"))
 password_width = max(max_password_length, len("Password"))
 top_line = f"┏━{'━' * title_width}━┳━{'━' * password_width}━┓"
 header_line = (
 f"┃ {'Title'.ljust(title_width)} ┃ {'Password'.ljust(password_width)} ┃"
 )
 header_separator_line = f"┣━{'━' * title_width}━╇━{'━' * password_width}━┫"
 separator_line = f"┠─{'─' * title_width}─┼─{'─' * password_width}─┨"
 bottom_line = f"┗━{'━' * title_width}━┷━{'━' * password_width}━┛"
 lines = [top_line, header_line, header_separator_line]
 first_entry = True
 for title in sorted(self._store.keys()):
 if len(title) > MAX_COL_WIDTH:
 display_title = title[0 : (MAX_COL_WIDTH - 3)] + "..."
 else:
 display_title = title
 passwords = sorted(self._store[title])
 if not first_entry:
 # no need to add a separator for the first line
 lines.append(separator_line)
 for i, password in enumerate(passwords):
 if i == 0:
 # First password for this title:
 line = f"┃ {display_title.ljust(title_width)} │ {password.ljust(password_width)} ┃"
 else:
 # Additional passwords for same title:
 line = f"┃ {' ' * title_width} │ {password.ljust(password_width)} ┃"
 lines.append(line)
 first_entry = False
 lines.append(bottom_line)
 return "\n".join(lines)

Here are some tests for the password store.

Since the passwords have no real security relevance, we save them in plain text.


Then we need "plugins" to actually gather the passwords, for which we defined an ABC (though maybe this would've been a good use case for protocols?):

"""Abstract base class for password extraction plugins."""
import abc
import typing
import hoarder.password_store
class PasswordPlugin(abc.ABC):
 """Abstract base class for password extraction plugins."""
 @abc.abstractmethod
 def __init__(self, config: dict[str, typing.Any]):
 pass
 @abc.abstractmethod
 def extract_passwords(self) -> hoarder.password_store.PasswordStore:
 """Extract passwords from the file, returning a mapping of title -> passwords."""
 pass

Now finally the first implementation of the PasswordPlugin, the NZB password extractor. This plugin gets configured to search in certain directories for NZB files.

NZB is an XML-based file format for retrieving posts from Usenet, and the passwords can be either in the filename like filename{{password}}.nzb or they are in a meta-tag of the header-section.

The "title" will then be the filename without the file extension and the password (if present).

There is still one complication: sometimes NZB files are compressed as RAR.

But, we already have a RarArchive class (basically a wrapper for 7z) that and implement a read method for it.

And finally we are ready for the NZB plugin:

"""NZB password extraction plugin."""
import logging
import os
import pathlib
import re
import traceback
import xml.etree.ElementTree as ET
import hoarder
from hoarder.password_plugin import PasswordPlugin
try:
 from typing import override # type: ignore [attr-defined]
except ImportError:
 from typing_extensions import override
logger = logging.getLogger("hoarder.nzb_password_plugin")
class NzbPasswordPlugin(PasswordPlugin):
 """Plugin to extract passwords from NZB filenames with {{password}} format."""
 _nzb_paths: list[pathlib.Path]
 @override
 def __init__(self, config: dict[str, list[str]]):
 if "nzb_paths" in config:
 paths = [pathlib.Path(p) for p in config["nzb_paths"]]
 invalid_paths = [p for p in paths if not p.is_dir()]
 if len(invalid_paths) > 0:
 raise FileNotFoundError(
 f"No directory at {invalid_paths[0]}"
 + (
 f" and {len(invalid_paths) - 1} other invalid paths"
 if len(invalid_paths) > 1
 else ""
 )
 )
 else:
 self._nzb_paths = paths
 @staticmethod
 def _extract_pw_from_nzb_filename(
 file_path: pathlib.PurePath,
 ) -> tuple[str, str | None]:
 filename = file_path.stem
 # Extract the password from title{{password}}.nzb pattern
 filename_passwords = re.findall(r"\{\{(.+?)\}\}", filename)
 title = re.sub(r"\{\{.+?\}\}", "", filename).strip()
 if len(filename_passwords) >= 2:
 logger.error(f"Error when extracting password from {file_path}")
 raise ValueError("Ambiguous passwords")
 if len(filename_passwords) == 0:
 return (title, None)
 return (title, filename_passwords[0])
 @staticmethod
 def _extract_pw_from_nzb_file_content(content: bytes | str) -> str | None:
 password: str | None = None
 try:
 logger.debug("Extracting password from file content")
 root = ET.fromstring(content)
 ns = {"nzb": "http://www.newzbin.com/DTD/2003/nzb"}
 for meta in root.findall('.//nzb:meta[@type="password"]', ns):
 if meta.text:
 password = meta.text.strip()
 break
 except (ET.ParseError, OSError, UnicodeDecodeError):
 logger.debug("Failure extracting password from content")
 print(traceback.format_exc())
 pass
 return password
 @staticmethod
 def _process_directory(
 nzb_directory: pathlib.Path,
 ) -> hoarder.password_store.PasswordStore:
 dir_store = hoarder.password_store.PasswordStore()
 content: str | bytes
 for root, _, files in os.walk(nzb_directory):
 for file in files:
 title = password = None
 full_path: pathlib.Path = nzb_directory / root / file
 if full_path.suffix == ".nzb":
 logger.debug(f"Processing NZB {full_path}")
 title, password = NzbPasswordPlugin._extract_pw_from_nzb_filename(
 full_path
 )
 if not password:
 logger.debug("No password in filename, opening NZB file...")
 with open(full_path) as f:
 content = f.read()
 password = (
 NzbPasswordPlugin._extract_pw_from_nzb_file_content(
 content
 )
 )
 if password:
 dir_store.add_password(title, password)
 elif full_path.suffix == ".rar":
 logger.debug(f"Processing RARed NZB(s) {full_path}")
 rar_file: hoarder.RarArchive = hoarder.RarArchive.from_path(
 full_path
 )
 for file_entry in rar_file.files:
 logger.debug(f"Read {file_entry.path}... extracting passwords")
 if file_entry.path.suffix == ".nzb":
 (
 title,
 password,
 ) = NzbPasswordPlugin._extract_pw_from_nzb_filename(
 file_entry.path
 )
 if not password:
 content = rar_file.read_file(file_entry.path)
 password = (
 NzbPasswordPlugin._extract_pw_from_nzb_file_content(
 content
 )
 )
 if password:
 dir_store.add_password(title, password)
 return dir_store
 @override
 def extract_passwords(self) -> hoarder.password_store.PasswordStore:
 password_store = hoarder.password_store.PasswordStore()
 for p in self._nzb_paths:
 password_store = password_store | NzbPasswordPlugin._process_directory(p)
 return password_store

There are also some tests for it which can be found here.

The strategy is to first look at the filename and try the extraction on it. If that fails, open the file and try to extract it from the parsed XML.

toolic
14.4k5 gold badges29 silver badges201 bronze badges
asked Jul 31 at 7:44
\$\endgroup\$

5 Answers 5

6
\$\begingroup\$

Here's my review of your code:


class PasswordStore:

  • add_password():

    • Consider if you should be validating the arguments:
      • Are they strings?
      • Are they entirely whitespace?
      • Do they start/end with whitespace?
  • __iter__():

    • Inconsistent behavior between this method and __getitem__(): Copy the set rather than returning the original.
  • You might want to consider implementing an optimized __ior__() method so that your other code can use the a |= b augmented assignment instead of a = a | b. You can then use this to implement the non-augmented __or__() method.

class NzbPasswordPlugin:

Generally, even your private functions should still have docstrings for future-you :)

  • __init__():

    • You need to raise an appropriate exception if nzb_paths is not in the config; otherwise, the call to extract_passwords() will result in an AttributeError.
    • Use NotADirectoryError instead of FileNotFoundError
    • You can just do if invalid_paths: to check if it's not empty.
    • No else needed since the if block raises an exception and will not fall through.
  • _extract_pw_from_nzb_filename():

    • For future-you documentation, the first comment about extracting the password from the pattern should indicate that the .nzb suffix has already been stripped due to the .stem access.
    • Since the regexs are used frequently, compile them at the class level and reference them as attributes:
      class NzbPasswordPlugin(PasswordPlugin):
       # ...
       _nzb_filename_password_re = re.compile(r"\{\{(.+?)\}\}")
       # ...
      
      ...
       filename_passwords = _nzb_filename_password_re.findall(filename)
       title = _nzb_filename_password_re.sub("", filename)
      
    • The end logic can be optimized somewhat: (check for non-empty is faster than calculating length and comparing)
       # find the filename password; raise an error if more than one.
       if filename_passwords:
       filename_password = filename_passwords.pop()
       else:
       filename_password = None
       if filename_passwords:
       raise ValueError()
       # return the stripped title and filename password (if any).
       return (title, filename_password)
      
  • _extract_pw_from_nzb_file_content():

    • No real comments except to add more comments for future-you. Expect that when you come back to this code, you won't remember why you did anything, so explain it to yourself :)
  • _process_directory():

    • No type annotation given for title and password.
    • Same comment as above - add more comments for future you.
    • This method is a bit long. You might want to extract the file processing into a separate method. You can abstract that method so that it can work with either a straight file or RAR-file-entry by passing in a read_file_content lambda, resulting in something like this:
      title = password = None
      if full_path.suffix == ".nzb":
       title, password = _process_file(
       full_path,
       read_file_content = lambda: open(full_path).read()
       )
      elif full_path.suffix == ".rar":
       rar_file = ...
       for file_entry in rar_file.files:
       if file_entry.path.suffix == ".nzb":
       title, password = _process_file(
       file_entry.path,
       read_file_content = (
       lambda: rar_file.read_file(file_entry.path)
       )
       )
      if title and password:
       dir_store.add_password(title, password)
      
toolic
14.4k5 gold badges29 silver badges201 bronze badges
answered Jul 31 at 14:42
\$\endgroup\$
4
\$\begingroup\$

Logging

Minor point:

Instead of:

logger.debug("Failure extracting password from content")
print(traceback.format_exc())

You can do this to include the traceback automatically:

logger.exception("Failure extracting password from content")

Then everything goes to the same logging destination, in the same format.

Factory Method Pattern

Function _process_directory is fairly long, and could get bigger if you decide to handle more file types later on. And indentation in Python is tricky. Here is an interesting article that explains how you could refactor this function: The Factory Method Pattern and Its Implementation in Python.

Better tuple

Function _extract_pw_from_nzb_filename returns a tuple, which I don't like very much because the order matters. You can use a namedtuple which remains compatible. Again from the aforementioned website: Write Pythonic and Clean Code With namedtuple

answered Aug 1 at 0:04
\$\endgroup\$
4
\$\begingroup\$

A few suggestions:

 def __iter__(self):
 for title, passwords in self._store.items():
 yield title, passwords

This can be simplified to:

 def __iter__(self):
 yield from self._store.items()

Also here:

 max_title_length = min(
 MAX_COL_WIDTH, max(len(title) for title in self._store.keys())
 )

Can be shortened by using the key argument to len:

 max_title_length = min(
 MAX_COL_WIDTH, len(max(self._store.keys(), key=len))
 )
answered Jul 31 at 23:14
\$\endgroup\$
1
  • \$\begingroup\$ The first simplication probably seems only possible because there was a mistake in the original code. We actually want a copy of passwords. \$\endgroup\$ Commented Sep 5 at 13:39
3
\$\begingroup\$

This method is problematic:

def __iter__(self):
 for title, passwords in self._store.items():
 yield title, passwords

First, unlike other methods, it doesn't have a return type annotation. There's no reason for this inconsistency, so let's just add it: Iterator[tuple[str, set[str]]].

Second, it seems to violate the contract of PasswordStore. You have made it quite clear that a storage is not meant to be mutated directly, by defining public *_password() methods and by making __getitem__() return copies of the sets. However, the sets yielded by __iter__() are the original ones, not copies.

On that same note, __getitem__() is normally expected to return a reference to the value, not a copy. To signify external-immutability, you should instead return frozenset(self._store[title]). Same for __iter__().

answered Aug 1 at 2:22
\$\endgroup\$
2
\$\begingroup\$

In addition to the excellent points in the previous answer, here are some other considerations.

In the PasswordStore class, the pretty_print function seems like it would serve the same purpose as a standard __str__ function of the class. Or, if you want to retain the pretty_print name, it could then call __str__.

If the pretty_print function creates a general table of data, consider other table Python packages listed in this Stack Overflow answer, such as tabulate, prettytable, etc.

answered Jul 31 at 21:29
\$\endgroup\$
4
  • \$\begingroup\$ Yes, pretty_print outputs a table using UTF-8 box drawing characters. The question is if __str__ is meant for outputting such complex formatting or more for simpler representations. And tabulate adds a third-party lib which was avoided for now (except for the tests, that use pytest). \$\endgroup\$ Commented Aug 2 at 9:02
  • \$\begingroup\$ The python answer would be to create a separate class or module for outputting data in a table. This would allow it to be re-used, and removes unrelated functionality (output formatting) from the PasswordStore class. \$\endgroup\$ Commented Aug 4 at 2:53
  • \$\begingroup\$ @JEarls I wonder what the interface for this should be then. A dictionary? \$\endgroup\$ Commented Sep 5 at 13:19
  • \$\begingroup\$ @viuser that would be up to the implementation, but i'd probably end up with the tabular-output classes accepting a list of dictionaries (e.g. [{'title': 'infomercial central', 'password': 'foo1'},{'title': 'spatula city', 'password': 'weird'}] (etc.)). the abstract base class would have methods to extract the field names and maximum field widths from all the dictionaries, and possibly some abstract method definitions (e.g. header(self) -> str, row(self, row:dict[str,str]) -> str, row_sep(self) -> str, and footer(self) -> str) that would be used by a method to return the full table. \$\endgroup\$ Commented Sep 5 at 14:36

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.