I frequently write Python scripts that require three command line arguments:
config_file
— A CSV configuration fileinput_dir
— The directory containing the input files to be processedoutput_dir
— The directory where the output files should be stored
I found myself copying/pasting two functions all the time: is_valid_file
and is_valid_directory
.
Questions
- How can I consolidate the
is_valid_file
andis_valid_directory
functions to eliminate duplicate code? - Is there a better way to check that files/directories provided in CLI arguments actually exist?
Code
Here is the code for process_the_data.py
:
if __name__ == "__main__":
# Process the arguments
import argparse
import arghelper
parser = argparse.ArgumentParser(
description='Process the data.')
parser.add_argument(
'config_file',
help='CSV configuration file.',
metavar='FILE', type=lambda x: arghelper.is_valid_file(parser, x))
parser.add_argument(
'input_dir',
help='Directory containing the input files.',
metavar='DIR', type=lambda x: arghelper.is_valid_directory(parser, x))
parser.add_argument(
'output_dir',
help='Directory where the output files should be saved.',
metavar='DIR', type=lambda x: arghelper.is_valid_directory(parser, x))
args = parser.parse_args()
Here is the code for arghelper.py
:
import os
def is_valid_file(parser, arg):
if not os.path.isfile(arg):
parser.error('The file {} does not exist!'.format(arg))
else:
# File exists so return the filename
return arg
def is_valid_directory(parser, arg):
if not os.path.isdir(arg):
parser.error('The directory {} does not exist!'.format(arg))
else:
# File exists so return the directory
return arg
1 Answer 1
You can address some of this by subclassing ArgumentParser. This will allow you hide the lambda calls that may turn off some and leave you scratching your head if you come back to that file later.
Your Script
if __name__ == "__main__":
# Process the arguments
#import argparse
import arghelper
parser = arghelper.FileArgumentParser(
description='Process the data.')
parser.add_argument_with_check(
'config_file',
help='CSV configuration file.',
metavar='FILE')
parser.add_argument_with_check(
'input_dir',
help='Directory containing the input files.',
metavar='DIR')
parser.add_argument_with_check(
'output_dir',
help='Directory where the output files should be saved.',
metavar='DIR')
args = parser.parse_args()
print args
New ArgHelper
import argparse
import os
class FileArgumentParser(argparse.ArgumentParser):
def __is_valid_file(self, parser, arg):
if not os.path.isfile(arg):
parser.error('The file {} does not exist!'.format(arg))
else:
# File exists so return the filename
return arg
def __is_valid_directory(self, parser, arg):
if not os.path.isdir(arg):
parser.error('The directory {} does not exist!'.format(arg))
else:
# File exists so return the directory
return arg
def add_argument_with_check(self, *args, **kwargs):
# Look for your FILE or DIR settings
if 'metavar' in kwargs and 'type' not in kwargs:
if kwargs['metavar'] is 'FILE':
type=lambda x: self.__is_valid_file(self, x)
kwargs['type'] = type
elif kwargs['metavar'] is 'DIR':
type=lambda x: self.__is_valid_directory(self, x)
kwargs['type'] = type
self.add_argument(*args, **kwargs)
-
\$\begingroup\$ At line 8 self.error('The file {} does not exist!'.format(arg)) is enough. Line 9 "else" not needed. "parser" is same as self. \$\endgroup\$Denis Trofimov– Denis Trofimov2019年01月31日 15:08:17 +00:00Commented Jan 31, 2019 at 15:08