I'm working on a replacement for make written in Python. Instead of a Makefile, you create a Wormfile.py. This file gets imported dynamically and a Context object is passed to which you can add your goals (akin to targets in make). Contexts can hold variables and even inherit variables from parent contexts.
Goal is an abstract base class. At a minimum, a subclass needs to implement the exists method. It can also implement compute_last_built which returns the time when the goal was last built (None if the goal doesn't exist or if a build time doesn't make sense for this goal).
What follows is just the definition of Goal and Context. The code assumes Python 3.10+.
What I'm looking for primarily is the identification of any edge cases. Is there something I didn't consider that's going to bite me later?
It may seem superfluous to implement __hash__ and __eq__ in ways that mimic what object already does. However, other parts of my project require these particular implementations and I want typing.final to warn the user not to override them.
from __future__ import annotations
import abc
import collections.abc
import importlib.util
import logging
import os
import pathlib
import typing
_logger = logging.getLogger("sandworm.core")
T = typing.TypeVar("T", bound="Goal")
Builder = collections.abc.Callable[[T], bool]
class Goal(abc.ABC):
"""Goal to be built.
Attributes:
name (str): Name of the goal. Used in logging.
context (Context): Associated context.
"""
def __init__(self: T, name: str, builder: Builder | None = None) -> None:
"""
Arguments:
name (str): Name of the goal.
builder (Callable[[Goal], bool] | None): Function to build the goal.
"""
self._name = name
self._builder = builder
self._dirty = False
self._ctx: Context | None = None
self._dependencies: list[Goal] = []
self._build_time: float | int | None = None
self._build_time_calculated = False
def __repr__(self) -> str:
return self._name
@typing.final
def __eq__(self, other: object) -> bool:
return self is other
@typing.final
def __hash__(self) -> int:
return id(self)
@property
def name(self) -> str:
return self._name
@typing.final
@property
def context(self) -> Context:
if self._ctx is None:
raise Exception("Context has not been set.")
return self._ctx
@typing.final
def dependencies(self) -> collections.abc.Iterator[Goal]:
"""Iterate over the dependencies.
Returns:
Iterator[Goal]
"""
yield from self._dependencies
@typing.final
def add_dependency(self, dependency: Goal) -> None:
"""Add a dependency.
Arguments:
goal (Goal): Dependency.
"""
if self._ctx is not None:
dependency.set_context(self._ctx)
self._dependencies.append(dependency)
@typing.final
def set_context(self, ctx: Context) -> None:
"""Set the context (if unset) on this goal and all of its dependencies (recursively).
Arguments:
ctx (Caontext)
"""
if self._ctx is None:
self._ctx = ctx
for dependency in self._dependencies:
dependency.set_context(ctx)
@abc.abstractmethod
def exists(self) -> bool:
"""Does the goal already exist?
Returns:
bool
"""
raise NotImplementedError
def compute_last_built(self) -> float | int | None:
"""Determine the last time the goal was built.
Returns:
float | int | None: Unix epoch timestamp or `None` if the goal doesn't exist or if a build time
doesn't make sense.
"""
return None
@typing.final
def needs_building(self) -> bool:
"""Does the goal need to be built?
Returns:
bool
"""
last_built = self._last_built()
for dependency in self._dependencies:
if dependency._dirty:
_logger.debug(f"{self} needs to be built because dependency {dependency} is dirty")
return True
if (
last_built is not None
and (dep_last_built := dependency._last_built()) is not None
and dep_last_built > last_built
):
_logger.debug(f"{self} needs to be built because dependency {dependency} is newer")
return True
if missing := not self.exists():
_logger.debug(f"{self} needs to be built because it does not exist")
return missing
@typing.final
def build(self) -> bool:
"""Build the goal.
This method should not be called by the user directly. Instead, run `sandworm.build(goal)`.
Returns:
bool: Was the build successful?
Raises:
Exception: The context has not been set.
"""
if self._ctx is None:
raise Exception("Context is not set.")
if not self._builder:
if not self.exists():
_logger.error(f"No logic exists to build {self}")
return False
self._finish()
return True
_logger.info(f"Building {self}")
if success := self._builder(self):
_logger.debug(f"{self} built successfully")
else:
_logger.error(f"Failed to build {self}")
self._finish()
return success
def _last_built(self) -> float | int | None:
if self._build_time_calculated:
return self._build_time
return self._recompute_last_built()
def _recompute_last_built(self) -> float | int | None:
self._build_time = self.compute_last_built()
self._build_time_calculated = True
return self._build_time
def _finish(self) -> None:
self._dirty = True
self._recompute_last_built()
class FileGoal(Goal):
"""Represents a file to be created.
Attributes:
path (pathlib.Path): Path to the file.
"""
def __init__(self: T, path: str | pathlib.Path, builder: Builder | None = None) -> None:
if isinstance(path, str):
name = path
path = pathlib.Path(path)
else:
name = str(path)
super().__init__(name, builder)
self._path = path
@property
def path(self) -> pathlib.Path:
return self._path
def exists(self) -> bool:
return self._path.exists()
def compute_last_built(self) -> float | None:
return self._path.stat().st_mtime if self._path.exists() else None
class ThinGoal(Goal):
"""Goal that always registers as existing.
This is meant merely to be a wrapper around its dependencies as it is only built if one or more of its
dependencies are dirty.
"""
def exists(self) -> bool:
return True
@typing.final
class Context:
"""Build context.
Attributes:
basedir (pathlib.Path): Base directory for its goals.
main_goal (Goal | None): If not `None`, then the goal that will be run when no goal is
specified for `sandworm.build`.
"""
def __init__(self, directory: str | pathlib.Path, *, parent: Context | None = None) -> None:
if isinstance(directory, str):
directory = pathlib.Path(directory)
if not directory.is_dir():
raise NotADirectoryError(directory)
self._basedir = directory.resolve()
self._parent = parent
self._children: list[Context] = []
self._main_goal: Goal | None = None
self._variables: dict[str, str] = {}
self._goals: dict[str, Goal] = {}
self._cleaners: list[collections.abc.Callable[[Context], None]] = []
if parent is not None:
parent._children.insert(0, self)
@classmethod
def from_directory(cls, directory: str | pathlib.Path, parent: Context | None = None) -> Context:
"""Create a context by loading a Wormfile.
Arguments:
directory (str | pathlib.Path): Directory containing Wormfile.py.
parent (sandworm.Context | None): Optional parent from which the new context will inherit.
Returns:
sandworm.Context
Raises:
FileNotFoundError: The Wormfile could not be found.
ImportError: The Wormfile couldn't be loaded.
"""
if isinstance(directory, str):
directory = pathlib.Path(directory)
wormfile = directory / "Wormfile.py"
if not wormfile.is_file():
raise FileNotFoundError(wormfile)
_logger.debug(f"Loading {wormfile}")
spec = importlib.util.spec_from_file_location("Wormfile", wormfile)
if spec is None:
raise ImportError(str(wormfile))
module = importlib.util.module_from_spec(spec)
if spec.loader is None:
raise ImportError(str(wormfile))
spec.loader.exec_module(module)
ctx = Context(directory, parent=parent)
module.setup_context(ctx)
return ctx
@property
def basedir(self) -> pathlib.Path:
return self._basedir
@property
def main_goal(self) -> Goal | None:
return self._main_goal
def create_child(self) -> Context:
"""Create a child context with the same base directory.
Returns:
Context
"""
return Context(self._basedir, parent=self)
def variables(self) -> dict[str, str]:
"""Iterate over the exposed variables.
This includes any ancestor context's variables but not environment variables.
Returns:
Iterator[tuple[str, str]]: Iterator of key/value pairs.
"""
variables = self._parent.variables() if self._parent is not None else {}
variables.update(self._variables)
return variables
def add_goal(self, goal: Goal, *, name: str | None = None, main: bool = False) -> None:
"""Add a goal to be managed by this context.
Arguments:
goal (Goal): Goal to be added.
name (str | None): Name to reference the goal by. If `None`, then the goal's own name will be
used.
main (bool): Is this to be the context's main goal?
Raises:
ValueError: A duplicate goal name was specified.
"""
if name is None:
name = goal.name
if name in self._goals:
raise ValueError(f"Duplicate goal name: {name}")
goal.set_context(self)
self._goals[name] = goal
if main:
self._main_goal = goal
def lookup_goal(self, name: str) -> Goal | None:
"""Look up a registered goal by name.
Arguments:
name (str)
Returns:
Goal | None
"""
return self._goals.get(name)
def goals(self) -> collections.abc.Iterator[tuple[str, Goal]]:
"""Iterate over the registered goals.
Returns:
Iterator[tuple[str, Goal]]: Iterator of names and goals.
"""
yield from self._goals.items()
def add_cleaner(self, cleaner: collections.abc.Callable[[Context], None]) -> None:
"""Register a cleaner function to be called by `self.clean()`.
Arguments:
Callable[[Context], None]
"""
self._cleaners.insert(0, cleaner)
def clean(self) -> None:
"""Runs all of the registered cleaners.
All child contexts will be cleaned first (in the reverse order that they were added). For each
context, the cleaners will be called in the reverse order that they were added.
"""
for child in self._children:
child.clean()
for cleaner in self._cleaners:
cleaner(self)
def get(self, key: str, default: str | None = None) -> str | None:
"""Look up a variable safely.
First, this context is searched. If the variable is not found, then the parent is searched (if there
is one) and so on. At the end, if the variable is still not found, the environment is searched.
Arguments:
key (str): Name of the variable.
default (str | None): Value to return if the key isn't found.
Returns:
str | None: The value of the variable if it was found and the default otherwise.
"""
if (value := self._variables.get(key)) is not None:
return value
if self._parent is not None:
return self._parent.get(key, default)
return os.environ.get(key, default)
def __contains__(self, key: str) -> bool:
"""Is the variable set?
Like `get`, the ancestry and environment are included in the search.
Arguments:
key (str): Name of the variable.
Returns:
bool
"""
return self.get(key) is not None
def __getitem__(self, key: str) -> str:
"""Look up a variable.
Like `get`, the ancestry and environment are included in the search.
Arguments:
key (str): Name of the variable.
Returns:
str: Vale of the variable.
Raises:
KeyError: The variable was not found.
"""
if (value := self.get(key)) is None:
raise KeyError(key)
return value
def __setitem__(self, key: str, value: str) -> None:
"""Set a variable.
Arguments:
key (str): Name of the variable.
value (str): Value of the variable.
"""
self._variables[key] = value
def set_if_unset(self, key: str, value: str) -> None:
"""Set a variable only if it hasn't already been set.
Like `get`, the ancestry and environment are included in the search.
Arguments:
key (str): Name of the variable.
value (str): Value of the variable.
"""
if key not in self:
self[key] = value
-
4\$\begingroup\$ Have you read about sconstruct? \$\endgroup\$Reinderien– Reinderien2025年11月02日 07:22:44 +00:00Commented 2 days ago
-
6\$\begingroup\$ I like the premise of your question, but as the answer indicates the question would be much improved by adding example usage. Even if just the happy path, it would greatly help the reception. Please keep this in mind if you ever consider posting a follow-up question. \$\endgroup\$Mast– Mast ♦2025年11月02日 15:23:17 +00:00Commented 2 days ago
3 Answers 3
narrative vs. code
Docstrings and comments sometimes are not closely in sync with the code they describe. I don't know how to parse this:
class Goal(abc.ABC):
"""Goal to be built.
Attributes:
name (str): Name of the goal. Used in logging.
context (Context): Associated context.
"""
The class does not offer public attributes such
as Goal.name or Goal.context.
Please delete this docstring,
as it does not describe the code that executes.
In contrast, the instance attributes self._name
and self._ctx are well defined.
will set...() actually set the context?
def set_context(self, ctx: Context) -> None: ...
if self._ctx is None:
self._ctx = ctx
I find this behavior surprising.
There is a Domain concept here that either you have failed to teach, or I have failed to learn. I do not understand why setting to A and then setting to B would not leave us with B.
Also, I have not yet learned what a context is all about.
How could it be optional?
My understanding of the Goal class is that we
create an instance in order to call .build() on it.
But we can't do that if we lack a _ctx.
The OP does not include any automated
unit tests,
nor indeed, any code which exercises the Goal library code.
When revising the automated tests, please be sure to include
code paths which accomplish useful work
while both setting and ignoring the self._ctx attribute.
Or consider insisting that the contructor shall accept
a mandatory Context argument, so it shall always be the case that
self._ctx is not None.
unhelpful docstring
def __init__(self: T, name: str, builder: Builder | None = None) -> None:
"""
Arguments:
name (str): Name of the goal.
builder (Callable[[Goal], bool] | None): Function to build the goal.
"""
In the signature and in the docstring we see that builder
is more or less the same thing.
But the docstring really isn't helpful, here.
Better to just stick with the signature, which type checks with pyright.
Also, I don't understand why it's undesirable to let the app author pass a context in to the constructor. Maybe there are use cases where having a null context is useful? I'm just not seeing them. In the sense of, there are literally no automated unit tests, no call sites, which illustrate that Use Case.
numeric return
def compute_last_built(self) -> float | int | None:
Please don't do that.
Is an int an instance of a float? No.
Python can represent vastly more integers
than a mere 64-bit float could ever hope to do.
Nonetheless, to be readable and concise, please tag them as float.
PEP 484 explains that
when an argument is annotated as having type float, an argument of type int is acceptable
Similarly for _last_rebuilt and _recompute_last_built.
redundant docstring
def needs_building(self) -> bool:
"""Does the goal need to be built?
Returns:
bool
"""
This is a four-line docstring, with the second line being blank.
I don't understand why the last two lines are helpful, given that the signature already told us what will be returned. Please delete the final lines.
pathlib
class FileGoal(Goal):
...
def __init__(self: T, path: str | pathlib.Path, ...
I am skeptical that accepting str pathnames is
a boon to a calling app developer.
Insist on Path and be done with it.
This applies to several public methods.
vague docstring
class Context:
"""Build context.
Yeah. That's just not helpful.
We're modeling somthing here. Tell us what we're modeling. What are the class invariants? What meaning do we assign to the mandatory instance attributes?
Attributes:
basedir (pathlib.Path): Base directory for its goals.
main_goal (Goal | None): If not `None`, then the goal that will be run when no goal is
specified for `sandworm.build`.
"""
def __init__(self, directory: str | pathlib.Path, *, parent: Context | None = None) -> None:
Ok, I get the basedir thing, even though you don't call it that,
you choose to put directory in the signature,
I'm just going to move past that.
But what is going on with this main_goal attribute?!?
Why do you list it in the docstring,
as an essential concept for calling code to understand?
The type calculus makes it look like such a goal shall not
be a parent Context.
Maybe this bit of documentation was accurate in some
ancient git commit. But it does not appear to be
accurate now. Please revise before merging down to main.
It is much better to not write something in
a # comment or """docstring""",
than to write something which turns out not to be accurate.
automated tests
The OP includes no tests, and no examples of how to call into the proposed library code along the "happy path". It would benefit from some unit or integration tests. The documentation value would be far greater than what we see in the current OP source code.
-
\$\begingroup\$ Re:
float | int | None, this pattern might come from numpy usage, where mypy will reject a call tofoo(x: np.floating)whenxis annp.int_(for example). The PEP you quoted applies to function arguments, not return values, and it says only that functions that take floats should also accept ints, not that it's wrong to be more explicit. In some codebases, returning a float and not an int is something that does matter, e.g. because pandas will coerce these to different column datatypes with different missing-data representations. \$\endgroup\$Max– Max2025年11月04日 22:06:16 +00:00Commented 3 hours ago -
\$\begingroup\$ Hullo, @Max. I find this fascinating (and not just because of the whole thing with the PEP observing an int is not a float, yet encouraging shorthand signatures that mypy will respect). In many cases an explicit
return float(result)orreturn np.float32(result)would suffice for correctness and to make it lint cleanly. As far as NaN for an integer column goes, it was long a sore spot. But modern pandas offers pd.NA, which eases that quite a lot, at the expense of having more object pointers. I'm interested and would love to see blog post URLs, especially about setting one int value to NA. \$\endgroup\$J_H– J_H2025年11月04日 22:41:57 +00:00Commented 3 hours ago
As I'm writing this, you've already accepted J_H's answer. This is meant to complement that excellent answer with some additional things you might want to take into account.
str and repr
For Goal, you implement a __repr__ that to me feels like more like a __str__; if it needs a method at all. Is there a reason a subclass may want to override that? If not, you may want to consider replacing things like f'{self}' with f'{self.name}' and make a more standard __repr__, like:
return f'{type(self).__name__}({self.name!r}, {self._builder!r})'
You may also want to consider somehow including a goal's dependencies and its other state in the repr.
Imagine someone using your software, trying to figure out why their Wormfile doesn't work. I know I would appreciate it if I could do:
>>> from Wormfile import *
>>> my_goal
... and get more information than just the name.
Context as a MutableMapping
Context almost implements MutableMapping[str, str]. Your users might appreciate it if they can use it as one.
Because you want some non-standard behaviour related to os.environ and chaining, so I personally would either leave the connection implicit or add collections.abc.MutableMapping.register(Context), rather than inheriting from MutableMapping.
Here are some example implementations for the missing methods:
def __len__(self) -> int:
return len(self.variables())
def __iter__(self) -> collections.abc.Iterator[str]:
return iter(self.variables())
def __delitem__(self, key: str) -> None:
del self._variables[key]
setdefault = set_if_unset
def pop(self, key: str, default: str | None = None) -> None:
if default is None: # dict.pop(key, None) is different from dict.pop(key)
return self._variables.pop(key)
return self._variables.pop(key, default)
def popitem(self) -> tuple[str, str]:
return self._variables.popitem()
def clear(self) -> None:
return self._variables.clear()
def update(self, other: collections.abc.Iterable[str] | collections.abc.Mapping[str, str]=(), /, **kwds: str) -> None:
self._variables.update(other, **kwds)
I'll only address a few things that look particularly wrong here.
Code/docs balance
You code has a lot of documentation, yet I won't call it well-documented. Docstrings repeating args that you already annotate in the signature? Including args section only makes sense when you have some deeper insights to share about the role or values of the parameter. Normally you just don't need that.
At the same time the docstrings fail to capture the business meaning of the documented objects.
@typing.final
class Context:
"""Build context.
Attributes:
basedir (pathlib.Path): Base directory for its goals.
main_goal (Goal | None): If not `None`, then the goal that will be run when no goal is
specified for `sandworm.build`.
"""
...thanks? And what does "build context" mean in your application? I know the concept of "build context" in Docker, for example, but I don't see any directory tarballs passed around here. It has some specific meaning in your domain and seems rather important, so a few sentences detailing that would be really helpful.
@typing.final
def add_dependency(self, dependency: Goal) -> None:
"""Add a dependency.
Arguments:
goal (Goal): Dependency.
"""
...or you could have just omitted this docstring entirely.
If you still insist on such verbosity, use the available sections wisely: for example, this Returns is unusual:
def goals(self) -> collections.abc.Iterator[tuple[str, Goal]]:
"""Iterate over the registered goals.
Returns:
Iterator[tuple[str, Goal]]: Iterator of names and goals.
"""
yield from self._goals.items()
There is a Yields: section for this case:
def goals(self) -> collections.abc.Iterator[tuple[str, Goal]]:
"""Iterate over the registered goals.
Yields:
tuple[str, Goal]: Iterator of names and goals.
"""
yield from self._goals.items()
(but again, your type hint already says that - better remove the type altogether)
On the other hand, this is almost perfect (modulo raising Exception instead of some reasonably specific exc class- I'd probably use a specific subclass like ValueError or RuntimeError, so that nobody may need to say except Exception). The docstring explains the boolean, details the possible exception and gives a helpful usage notice:
@typing.final
def build(self) -> bool:
"""Build the goal.
This method should not be called by the user directly. Instead, run `sandworm.build(goal)`.
Returns:
bool: Was the build successful?
Raises:
Exception: The context has not been set.
"""
Typing
T = typing.TypeVar("T", bound="Goal")
Builder = collections.abc.Callable[[T], bool]
... # snip
def __init__(self: T, path: str | pathlib.Path, builder: Builder | None = None) -> None:
Please run mypy --strict or pyright on your code. Omitted generics become Any, aliases do not magically carry type variables along - you still need to say builder: Builder[T] | None = None.
Respect others' muscle memory
class Context:
def get(self, key: str, default: str | None = None) -> str | None:
if (value := self._variables.get(key)) is not None:
return value
if self._parent is not None:
return self._parent.get(key, default)
return os.environ.get(key, default)
def __contains__(self, key: str) -> bool:
return self.get(key) is not None
def __getitem__(self, key: str) -> str:
if (value := self.get(key)) is None:
raise KeyError(key)
return value
def __setitem__(self, key: str, value: str) -> None:
self._variables[key] = value
def set_if_unset(self, key: str, value: str) -> None:
if key not in self:
self[key] = value
This quacks like a dict or a MutableMapping. You do not support item deletion - is that intentional? Even if yes, please call set_if_unset the name we're all used to - setdefault. Your class still won't implement a dict interface, but at least the common methods will be consistent.
Infinite recursion potential
If you have two goals that end up having a circular dependency, calling set_context on one of them will die in infinite recursion. You may declare that cyclic dependencies are not supported, and that might be a sane decision, but then it has to be enforced - it is very easy to build a large graph of goals and accidentally introduce a cycle there. Your program should either detect and reject that early or be able to handle cyclic definitions.
Validation
It's fairly difficult to tell from the code alone, but... is Wormfile.py something created by your users? If yes, then this block lacks validation:
@classmethod
def from_directory(cls, directory: str | pathlib.Path, parent: Context | None = None) -> Context:
"""Create a context by loading a Wormfile.
Arguments:
directory (str | pathlib.Path): Directory containing Wormfile.py.
parent (sandworm.Context | None): Optional parent from which the new context will inherit.
Returns:
sandworm.Context
Raises:
FileNotFoundError: The Wormfile could not be found.
ImportError: The Wormfile couldn't be loaded.
"""
if isinstance(directory, str):
directory = pathlib.Path(directory)
wormfile = directory / "Wormfile.py"
if not wormfile.is_file():
raise FileNotFoundError(wormfile)
_logger.debug(f"Loading {wormfile}")
spec = importlib.util.spec_from_file_location("Wormfile", wormfile)
if spec is None:
raise ImportError(str(wormfile))
module = importlib.util.module_from_spec(spec)
if spec.loader is None:
raise ImportError(str(wormfile))
spec.loader.exec_module(module)
ctx = Context(directory, parent=parent)
module.setup_context(ctx)
return ctx
setup_context may be undefined, or defined with a wrong signature, or screwed up in some other way. Please add a try/except around it to report a helpful error explaining what setup_context should look like.