This Python 3 program outputs a list of all pathnames in the filesystem that match a list of rules taken from a file. You can add and remove sets of pathnames.
The original purpose was to generate lists of pathnames to feed to
cpio
for backups. A simplified version of one of my backup rulefiles
should clarify how it works:
# Entire home directory
+ /home/tom
# Remove hidden files and other junk
- /home/tom/.*
- /home/tom/java
# Then add back stuff that I do want
+ /home/tom/.bash*
# Remove stuff that goes in another backup
- /home/tom/inactive
# Add stuff from another location
+ /large/bkups/patterns
# Remove junk
- *~
- *.o
This would produce output such as:
/home/tom
/home/tom/.bashrc
/home/tom/bin
/home/tom/bin/fpmr
/large/bkups/patterns
/large/bkups/patterns/current
/large/bkups/patterns/inactive
fpmr -h
gives a short help text. fpmr -?
would normally give a long
help text, but since it's quite long and the contents don't affect the
code, I removed it from the code posted below. I'll keep it available
here for a while:
(削除) http://tomzy.ch/code/fpmr_detailed_help.txt (削除ここまで) (this is now a dead link)
Aside from general code review, I'm particularly interested in the following:
- I use a couple of global flags. It seemed the best way. Thoughts?
- Can anyone think of cases I've overlooked?
- Does any code seem to need more comments? I find most Python code clear enough to be self-documenting.
- It was developed on Linux. Is there anything that wouldn't work right on other Unices?
- It lists regular files, directories, and symlinks. It ignores named pipes, sockets, and device files. A Solaris filesystem can contain something called a "door". Can any Solaris users verify that it will ignore doors too?
#!/usr/bin/env python3
import sys
try:
import argparse
except ImportError:
print('The "argparse" module is not available. '
'Use Python 3.2 or better.', file=sys.stderr)
sys.exit(1)
import fnmatch
import glob
import os
import os.path as osp
import re
detailed_help = """
Omitted for brevity. Will be available here for a while:
http://tomzy.ch/code/fpmr_detailed_help.txt
""".strip()
# Global flags. Usually bad practice, but I think justified in this
# instance. Values are set once in main(), based on args, and are
# constant thereafter; they are read deep in call trees and would be too
# inconvenient to pass around.
GLOBAL_verbose = None
GLOBAL_null = None
def main():
args = process_args()
global GLOBAL_verbose
GLOBAL_verbose = args.verbose
global GLOBAL_null
GLOBAL_null = args.null
rule_list = read_rules(args.rulefile)
if not rule_list:
error_quit('No valid rules')
vmsg('{} valid rules'.format(len(rule_list)))
dirs = set()
nondirs = set()
for action, pattern in rule_list:
vmsg('=====\nApplying rule {} {}'
''.format(action, pattern))
if action == '+':
add_matches(dirs, nondirs, pattern)
else:
remove_matches(dirs, nondirs, pattern)
report_descend_failures(dirs)
paths = dirs | nondirs
if not paths:
error_quit('No matches were found')
if len(paths) != len(dirs) + len(nondirs):
error_quit(
'Something weird happened. The following pathnames\n'
'are listed as both directories and non-directories:\n'
' {}'.format('\n '.join(sorted(dirs & nondirs))))
line_end = '0円' if args.null else '\n'
plist = [p + line_end for p in paths]
if args.relative:
plist = [p.lstrip(os.sep) for p in plist]
plist.sort()
args.outfile.writelines(plist)
def process_args():
parser = argparse.ArgumentParser(
description='fpmr: Find Pathnames that Match Rules',
epilog='For more information, run "fpmr -?".')
parser.add_argument(
'rulefile', type=argparse.FileType('r'),
help='rule file (use "-" to read from stdin)')
parser.add_argument(
'-?', '--details', action=detail_action,
nargs=0, default=argparse.SUPPRESS,
help='show more detailed help')
parser.add_argument(
'-r', '--relative', action='store_true',
help='output relative pathnames (default is absolute)')
parser.add_argument(
'-0', '--null', action='store_true',
help='end pathnames with null character (default is newline)')
parser.add_argument(
'-v', '--verbose', action='store_true',
help='print extended debug info to stderr')
parser.add_argument(
'-o', action='store', dest='outfile', default='-',
type=argparse.FileType('w'),
help='write output to outfile (default is stdout)',
metavar='outfile')
return parser.parse_args()
class detail_action(argparse.Action):
def __call__(self, p, n, v, o=None):
print(detailed_help)
sys.exit(0)
def read_rules(rulefile):
# Return list of valid rules.
rlist = []
for lineno, line in enumerate(rulefile, 1):
line = line.strip()
if not line or line[:1] == '#':
continue
rule = parse_rule(line, lineno)
if rule:
rlist.append(rule)
vmsg('Rule added from line {}: {} {}'
''.format(lineno, rule[0], rule[1]))
return rlist
def parse_rule(line, lineno):
# Return a valid rule as a tuple. Issue a warning for an invalid
# rule and return None.
try:
mobj = re.match(r'([+-])\s*(.*)', line)
if not mobj:
raise ValueError
action, pattern = mobj.groups()
if not pattern:
raise ValueError
except ValueError:
warn('Malformed rule ignored at line {}'.format(lineno))
return None
if action == '+' and not osp.isabs(pattern):
warn('Invalid rule ignored at line {}\n'
' (additive rule with relative pathname)'
''.format(lineno))
return None
if pattern[-1:] == os.sep:
pattern = pattern.rstrip(os.sep)
warn('Trailing slashes removed from rule at line {}'
''.format(lineno))
return action, pattern
def add_matches(dirs, nondirs, pattern):
for path in glob.iglob(pattern):
if is_true_dir(path):
add_path(dirs, 'dirs', path)
add_tree(dirs, nondirs, path)
else:
add_path(nondirs, 'nondirs', path)
def add_tree(dirs, nondirs, top):
vmsg('<<< {} paths before walking {}'
''.format(len(dirs) + len(nondirs), top))
for wpath, wdirs, wfiles in os.walk(top, onerror=walk_error):
vmsg('<< {} paths before adding contents of {}'
''.format(len(dirs) + len(nondirs), wpath))
# wdirs includes links to true dirs.
for d in wdirs:
fullpath = osp.join(wpath, d)
if is_true_dir(fullpath):
add_path(dirs, 'dirs', fullpath)
else:
add_path(nondirs, 'nondirs', fullpath)
for f in wfiles:
add_path(nondirs, 'nondirs', osp.join(wpath, f))
vmsg('>> {} paths after adding contents of {}'
''.format(len(dirs) + len(nondirs), wpath))
vmsg('>>> {} paths after walking {}'
''.format(len(dirs) + len(nondirs), top))
def is_true_dir(path):
return osp.isdir(path) and not osp.islink(path)
def add_path(pset, setname, path):
# Add only files, directories, and links.
# Must check islink() to include broken links.
if osp.isfile(path) or osp.isdir(path) or osp.islink(path):
size = len(pset)
pset.add(path)
if '\n' in path and not GLOBAL_null:
warn('Pathname contains a newline; '
'consider using -0\n {!r}'.format(path))
if GLOBAL_verbose:
if len(pset) > size:
vmsg('Added to {}: {}'.format(setname, path))
else:
vmsg('Not added to {} because already there: {}'
''.format(setname, path))
else:
vmsg('Not added due to file type: {}'.format(path))
def remove_matches(dirs, nondirs, pattern):
vmsg('<<< {} paths before removing pattern {}'
''.format(len(dirs) + len(nondirs), pattern))
# Special rule: if pattern is a directory, remove it and change
# pattern to dir/* so subsequent code will remove all contents.
if pattern in dirs:
dirs.remove(pattern)
pattern += os.sep + '*'
for setname, pset in ('dirs', dirs), ('nondirs', nondirs):
to_remove = set(fnmatch.filter(pset, pattern))
if GLOBAL_verbose:
for p in sorted(to_remove):
vmsg('Removing from {}: {}'.format(setname, p))
pset -= to_remove
vmsg('>>> {} paths after removing pattern {}'
''.format(len(dirs) + len(nondirs), pattern))
def report_descend_failures(dirs):
# Try to descend into all directories, and report failures.
# os.walk() will have reported non-readable ones, but not those that
# are readable and not searchable.
saved_cwd = os.getcwd()
for d in dirs:
try:
os.chdir(d)
except:
warn('{} could not be entered; mode {}'
''.format(d, oct(os.stat(d).st_mode)[-3:]))
os.chdir(saved_cwd)
def walk_error(err):
warn('{}: {}'.format(err.filename, err.strerror))
def vmsg(msg):
if GLOBAL_verbose:
print(msg, file=sys.stderr)
def warn(msg):
errwarn(msg, 'Warning')
def error_quit(msg):
errwarn(msg, 'Error')
sys.exit(1)
def errwarn(msg, prefix):
progname = osp.basename(sys.argv[0])
print('{}: {}: {}'.format(progname, prefix, msg), file=sys.stderr)
main()
1 Answer 1
Some comments:
- Please use docstrings.
- Note that
argparse
can be installed from pypi for older python versions. - For the computation of relative paths, please have a look at
os.path.relpath
. - I don't like using global flags either. What about using a class to store that internal state?
- Instead of
vmsg
,errwarn
, etc., please use thelogging
module (logging.error
,logging.warning
, etc.). GLOBAL_verbose
could be replaced also usinglogging.debug
and setting log level based on the command line argument.Instead of calling
main
at the bottom of the script directly, please use the usual convention:if __name__ == '__main__': main()
Try to parse arguments before calling main and pass them to main. This way, it will be testable if you want to write test cases in the future:
if __name__ == '__main__': args = process_args() main(args)
line[:1] == '#'
could be reduced toline[0] == '#'
, but I find more readable this:line.startswith('#')
pattern[-1:] == os.sep
could be reduced topattern[-1] == os.sep
, but I find more readable this:pattern.endswith(os.sep)
I hope this helps.
Edit: Regarding using a class
instead of globals, what I meant more or less is following this pattern:
class Script(object):
"""Docstring."""
def __init__(self, verbose, null):
"""Docstring."""
self.verbose = verbose
self.null = null
...
def run(self):
"""Docstring."""
<put here the code in the original main function>
...
def add_path(self, pset, setname, path):
"""Docstring."""
...
if '\n' in path and not self.null:
...
if self.verbose:
...
if __name__ == '__main__':
args = process_args()
script = Script(args.verbose, args.null)
script.run()
Of course this is just a starting point. After some work in the script you might want to create a Rule
class or a RuleProcessor
class.
-
\$\begingroup\$ Lots of good suggestions here, thank you! I hadn't noticed
logging
, will check it out. For the class replacing the globals, do you mean I should pass an object around, or do you mean I should make a global object that is created inmain()
? Or something else? \$\endgroup\$Tom Zych– Tom Zych2014年08月13日 15:36:19 +00:00Commented Aug 13, 2014 at 15:36 -
\$\begingroup\$ Added an example to the response about how I'd start using a class to store internal state and get rid of globals. \$\endgroup\$jcollado– jcollado2014年08月13日 16:02:27 +00:00Commented Aug 13, 2014 at 16:02
-
\$\begingroup\$ Um. Not sure about that idea. If we only put the parts that use the flags into the class, it looks complicated. If we put everything into the class, then aren't we just replacing module globals with class globals? I don't see what we gain. \$\endgroup\$Tom Zych– Tom Zych2014年08月13日 16:25:55 +00:00Commented Aug 13, 2014 at 16:25
-
\$\begingroup\$ As I said, that's the starting point to refactor the code into a more object oriented approach. Anyway, if you are happy with the way it is right now, there's no reason to change it. \$\endgroup\$jcollado– jcollado2014年08月13日 16:29:35 +00:00Commented Aug 13, 2014 at 16:29
Explore related questions
See similar questions with these tags.