This is a function I wrote yesterday and have tested with lots of input. It's been committed and is in use, but no review was done, so it seems like a good candidate for Code Review: it could be more readable and maybe refactored into simpler functions.
Its purpose is to give the longest common path and a string listing filenames for lists of dictionaries with full file paths. It's used to process Bitbucket API results.
import os.path
import py # Used for other functions, listed for possible use in refactoring
def getpaths(files, listfiles=False):
"""For a list of files, a common path prefix and optionally list filenames
Returns a tuple (common_prefix, filenames):
common_prefix: Longest common path to all files in the input. If input
is a single file, contains full file path.
Slash-terminated if present and directory, empty string
otherwise.
filenames: String containing the names of modified files, in the format
" M(file1, file2)" if listfiles=True, empty string
if either listfiles=False or no file was modified.
"""
# Handle empty input
if not files:
return '', ''
files = [f['file'] for f in files]
if not any(files):
return '', ''
dirname = os.path.dirname
basename = os.path.basename
common_prefix = [dirname(f) for f in files]
# Single file, show its full path
if len(files) == 1:
common_prefix = files[0]
listfiles = False
else:
common_prefix = [path.split(os.sep) for path in common_prefix]
common_prefix = os.sep.join(os.path.commonprefix(common_prefix))
if common_prefix and not common_prefix.endswith('/'):
common_prefix += '/'
if listfiles:
filenames = [basename(f) for f in files if f and basename(f)]
filenames = ' M(%s)' % ', '.join(filenames)
else:
filenames = ''
return common_prefix, filenames
# Test suite
# Don't worry too much about test style, this is informative enough with py.test.
# Adequate coverage and readibility are the main concern.
# However, if you think a doctest or classic unittest is better, I'm open to change.
def test_getpaths():
d = dict
barefile = [d(file='file')]
distinct = [d(file='path1/file1'), d(file='path2/file2'),
d(file='path3/file')]
shared = [d(file='path/file1'), d(file='path/file2'),
d(file='path/file')]
deepfile = [d(file='a/long/path/to/deepfile.py')]
slashesfile = [d(file='/slashesfile/')]
slashleft = [d(file='/slashleft')]
slashright = [d(file='slashright/')]
nocommon = distinct + [d(file='path4/file')]
nocommonplusroot = distinct + barefile
common = [d(file='some/path/to/file'), d(file='some/path/to/deeper/file'),
d(file='some/path/to/anotherfile'), d(file='some/path/to/afile')]
commonplusroot = shared + barefile
empty = d(file='')
nocommonplusempty = distinct + [empty]
commonplusempty = shared + [empty]
nocommonplusslash = distinct + [d(file='path4/dir/')]
commonplusslash = shared + [d(file='path/dir/')]
pypydoubleslash = [d(file='pypy/jit/metainterp/opt/u.py'),
d(file='pypy/jit/metainterp/test/test_c.py'),
d(file='pypy/jit/metainterp/test/test_o.py')]
nothing = ('', '')
# (input, expected output) for listfiles=False
files_expected = [([], nothing),
([empty], nothing),
([empty, empty], nothing),
(barefile, ('file', '')),
(deepfile, ('a/long/path/to/deepfile.py', '')),
(slashesfile, ('/slashesfile/', '')),
(slashleft, ('/slashleft', '')),
(slashright, ('slashright/', '')),
(nocommon, nothing),
(nocommonplusroot, nothing),
(nocommonplusempty, nothing),
(common, ('some/path/to/', '')),
(commonplusroot, nothing),
(commonplusempty, nothing),
(nocommonplusslash, nothing),
(commonplusslash, ('path/', '')),
(pypydoubleslash, ('pypy/jit/metainterp/', '')),
]
for f, wanted in files_expected:
assert getpaths(f) == wanted
# (input, expected output) for listfiles=True
files_expected = [([], nothing),
([empty], nothing),
([empty, empty], nothing),
(barefile, ('file', '')),
(deepfile, ('a/long/path/to/deepfile.py', '')),
(slashesfile, ('/slashesfile/', '')),
(slashleft, ('/slashleft', '')),
(slashright, ('slashright/', '')),
(nocommon, ('', ' M(file1, file2, file, file)')),
(nocommonplusroot, ('', ' M(file1, file2, file, file)')),
(nocommonplusempty, ('',' M(file1, file2, file)')),
(common, ('some/path/to/',
' M(file, file, anotherfile, afile)')),
(commonplusroot, ('', ' M(file1, file2, file, file)')),
(commonplusempty, ('',' M(file1, file2, file)')),
(nocommonplusslash, ('',' M(file1, file2, file)')),
(commonplusslash, ('path/',' M(file1, file2, file)')),
(pypydoubleslash, ('pypy/jit/metainterp/',
' M(u.py, test_c.py, test_o.py)')),
]
for f, wanted in files_expected:
assert getpaths(f, listfiles=True) == wanted
Improvements for both the function and its tests are welcome. For some context, this is how it's used:
# Here's the kind of data we want to process. The return for listfiles=False
# should be ('bit/', ''). For True,
# ('bit/', ' M(hook.py, __init__.py, __init__.py, test_hook.py)')
commit = {
u'files': [{u'file': u'bit/hook.py', u'type':
u'modified'},
{u'file': u'bit/__init__.py',
u'type': u'added'},
{u'file': u'bit/test/__init__.py',
u'type': u'added'},
{u'file': u'bit/test/test_hook.py',
u'type': u'added'}]
}
files = commit.get('files', [])
common_prefix, filenames = getpaths(files, True)
common_prefix = '/' + common_prefix
print '%s%s: ' % (common_prefix, filenames)
1 Answer 1
dirname = os.path.dirname
basename = os.path.basename
This can be written as:
from os.path import dirname, basename
The from..import will check that os.path is imported (and import it if not), but is otherwise identical. I find it more clear than repeating the names – especially when you get to three or more.
filenames = [basename(f) for f in files if f and basename(f)]
This can be simplified, as basename on an empty string gives an empty string:
filenames = filter(None, (basename(f) for f in files))
# or
filenames = [x for x in (basename(f) for f in files) if x]
# or
filenames = [x for x in map(basename, files) if x]
commonpath
provided in Python'sos.path
: docs.python.org/3.5/library/os.path.html#os.path.commonpath \$\endgroup\$