15
\$\begingroup\$

This is a function I wrote yesterday and have tested with lots of input. It's been committed and is in use, but no review was done, so it seems like a good candidate for Code Review: it could be more readable and maybe refactored into simpler functions.

Its purpose is to give the longest common path and a string listing filenames for lists of dictionaries with full file paths. It's used to process Bitbucket API results.

import os.path
import py # Used for other functions, listed for possible use in refactoring
def getpaths(files, listfiles=False):
 """For a list of files, a common path prefix and optionally list filenames
 Returns a tuple (common_prefix, filenames):
 common_prefix: Longest common path to all files in the input. If input
 is a single file, contains full file path.
 Slash-terminated if present and directory, empty string
 otherwise.
 filenames: String containing the names of modified files, in the format
 " M(file1, file2)" if listfiles=True, empty string
 if either listfiles=False or no file was modified.
 """
 # Handle empty input
 if not files:
 return '', ''
 files = [f['file'] for f in files]
 if not any(files):
 return '', ''
 dirname = os.path.dirname
 basename = os.path.basename
 common_prefix = [dirname(f) for f in files]
 # Single file, show its full path
 if len(files) == 1:
 common_prefix = files[0]
 listfiles = False
 else:
 common_prefix = [path.split(os.sep) for path in common_prefix]
 common_prefix = os.sep.join(os.path.commonprefix(common_prefix))
 if common_prefix and not common_prefix.endswith('/'):
 common_prefix += '/'
 if listfiles:
 filenames = [basename(f) for f in files if f and basename(f)]
 filenames = ' M(%s)' % ', '.join(filenames)
 else:
 filenames = ''
 return common_prefix, filenames
# Test suite 
# Don't worry too much about test style, this is informative enough with py.test.
# Adequate coverage and readibility are the main concern.
# However, if you think a doctest or classic unittest is better, I'm open to change.
def test_getpaths():
 d = dict
 barefile = [d(file='file')]
 distinct = [d(file='path1/file1'), d(file='path2/file2'),
 d(file='path3/file')]
 shared = [d(file='path/file1'), d(file='path/file2'),
 d(file='path/file')]
 deepfile = [d(file='a/long/path/to/deepfile.py')]
 slashesfile = [d(file='/slashesfile/')]
 slashleft = [d(file='/slashleft')]
 slashright = [d(file='slashright/')]
 nocommon = distinct + [d(file='path4/file')]
 nocommonplusroot = distinct + barefile
 common = [d(file='some/path/to/file'), d(file='some/path/to/deeper/file'),
 d(file='some/path/to/anotherfile'), d(file='some/path/to/afile')]
 commonplusroot = shared + barefile
 empty = d(file='')
 nocommonplusempty = distinct + [empty]
 commonplusempty = shared + [empty]
 nocommonplusslash = distinct + [d(file='path4/dir/')]
 commonplusslash = shared + [d(file='path/dir/')]
 pypydoubleslash = [d(file='pypy/jit/metainterp/opt/u.py'),
 d(file='pypy/jit/metainterp/test/test_c.py'),
 d(file='pypy/jit/metainterp/test/test_o.py')]
 nothing = ('', '')
 # (input, expected output) for listfiles=False
 files_expected = [([], nothing),
 ([empty], nothing),
 ([empty, empty], nothing),
 (barefile, ('file', '')),
 (deepfile, ('a/long/path/to/deepfile.py', '')),
 (slashesfile, ('/slashesfile/', '')),
 (slashleft, ('/slashleft', '')),
 (slashright, ('slashright/', '')),
 (nocommon, nothing),
 (nocommonplusroot, nothing),
 (nocommonplusempty, nothing),
 (common, ('some/path/to/', '')),
 (commonplusroot, nothing),
 (commonplusempty, nothing),
 (nocommonplusslash, nothing),
 (commonplusslash, ('path/', '')),
 (pypydoubleslash, ('pypy/jit/metainterp/', '')),
 ]
 for f, wanted in files_expected:
 assert getpaths(f) == wanted
 # (input, expected output) for listfiles=True
 files_expected = [([], nothing),
 ([empty], nothing),
 ([empty, empty], nothing),
 (barefile, ('file', '')),
 (deepfile, ('a/long/path/to/deepfile.py', '')),
 (slashesfile, ('/slashesfile/', '')),
 (slashleft, ('/slashleft', '')),
 (slashright, ('slashright/', '')),
 (nocommon, ('', ' M(file1, file2, file, file)')),
 (nocommonplusroot, ('', ' M(file1, file2, file, file)')),
 (nocommonplusempty, ('',' M(file1, file2, file)')),
 (common, ('some/path/to/',
 ' M(file, file, anotherfile, afile)')),
 (commonplusroot, ('', ' M(file1, file2, file, file)')),
 (commonplusempty, ('',' M(file1, file2, file)')),
 (nocommonplusslash, ('',' M(file1, file2, file)')),
 (commonplusslash, ('path/',' M(file1, file2, file)')),
 (pypydoubleslash, ('pypy/jit/metainterp/',
 ' M(u.py, test_c.py, test_o.py)')),
 ]
 for f, wanted in files_expected:
 assert getpaths(f, listfiles=True) == wanted

Improvements for both the function and its tests are welcome. For some context, this is how it's used:

# Here's the kind of data we want to process. The return for listfiles=False
# should be ('bit/', ''). For True,
# ('bit/', ' M(hook.py, __init__.py, __init__.py, test_hook.py)')
commit = {
 u'files': [{u'file': u'bit/hook.py', u'type':
 u'modified'},
 {u'file': u'bit/__init__.py',
 u'type': u'added'},
 {u'file': u'bit/test/__init__.py',
 u'type': u'added'},
 {u'file': u'bit/test/test_hook.py',
 u'type': u'added'}]
 }
files = commit.get('files', [])
common_prefix, filenames = getpaths(files, True)
common_prefix = '/' + common_prefix
print '%s%s: ' % (common_prefix, filenames)
asked Jan 21, 2011 at 20:09
\$\endgroup\$
2
  • \$\begingroup\$ nice bit of code \$\endgroup\$ Commented Sep 5, 2013 at 5:12
  • \$\begingroup\$ Starting with Python 3.5 one should be compelled to use the function commonpath provided in Python's os.path: docs.python.org/3.5/library/os.path.html#os.path.commonpath \$\endgroup\$ Commented Dec 15, 2017 at 9:31

1 Answer 1

9
\$\begingroup\$
dirname = os.path.dirname
basename = os.path.basename

This can be written as:

from os.path import dirname, basename

The from..import will check that os.path is imported (and import it if not), but is otherwise identical. I find it more clear than repeating the names – especially when you get to three or more.


filenames = [basename(f) for f in files if f and basename(f)]

This can be simplified, as basename on an empty string gives an empty string:

filenames = filter(None, (basename(f) for f in files))
# or
filenames = [x for x in (basename(f) for f in files) if x]
# or
filenames = [x for x in map(basename, files) if x]
answered Jan 27, 2011 at 9:41
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.