My MapReduce tester is clearly ported from Shell, short of args=None
for line in args or read_input()
, what's a better way of importing->testing the function outside of subprocess
?
Or does it not matter, i.e.: my "hack" is fine?
##test_mapreduce.py
test_mapreduce.py
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
##mapper.py
mapper.py
#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
for line in read_input():
data = line.strip().split('\t')
if len(data) != 6:
continue
date, time, store, item, cost, payment = data
print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
mapper()
PS: Should I refactor to use the map
and reduce
inbuilt functions?
My MapReduce tester is clearly ported from Shell, short of args=None
for line in args or read_input()
, what's a better way of importing->testing the function outside of subprocess
?
Or does it not matter, i.e.: my "hack" is fine?
##test_mapreduce.py
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
##mapper.py
#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
for line in read_input():
data = line.strip().split('\t')
if len(data) != 6:
continue
date, time, store, item, cost, payment = data
print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
mapper()
PS: Should I refactor to use the map
and reduce
inbuilt functions?
My MapReduce tester is clearly ported from Shell, short of args=None
for line in args or read_input()
, what's a better way of importing->testing the function outside of subprocess
?
Or does it not matter, i.e.: my "hack" is fine?
test_mapreduce.py
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
mapper.py
#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
for line in read_input():
data = line.strip().split('\t')
if len(data) != 6:
continue
date, time, store, item, cost, payment = data
print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
mapper()
PS: Should I refactor to use the map
and reduce
inbuilt functions?
from unittest import TestCase, main as unittest_main
from ossubprocess import path
from map_reduce_udacity.mappercheck_output importas mapperrun
from map_reduce_udacity.reduceros import reducerpath
class TestMapReduce(TestCase):
top_path = ''
@classmethod
map_reduce = lambda self, defmapper_name, setUpClass(cls)reducer_name, datafile_name: run(
if not['python', path.isfilejoin('setupself.py'top_path, reducer_name):
, # Reduce
cls.top_pathrun(['sort', = path.join('..'# Shuffle, '..')could be replaced with python `sorted`
if not path.isfilerun(['python', path.join(clsself.top_path, 'setup.py')mapper_name):
, # Map
raise AssertionError("Haven't found right directory to `cd`path.join(self.top_path, into"'data', datafile_name)])])])
@classmethod
def map_reducesetUpClass(self, datafile_namecls):
dif =not Nonepath.isfile('setup.py'):
with open( cls.top_path = path.join(self'.top_path, 'data'.', datafile_name)'..') as f:
dif =not tuplepath.isfile(mapperpath.join(line) for line incls.top_path, f'setup.readline(py')):
reducer(sorted raise AssertionError(d))
"Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
from unittest import TestCase, main as unittest_main
from os import path
from map_reduce_udacity.mapper import mapper
from map_reduce_udacity.reducer import reducer
class TestMapReduce(TestCase):
top_path = ''
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def map_reduce(self, datafile_name):
d = None
with open(path.join(self.top_path, 'data', datafile_name)) as f:
d = tuple(mapper(line) for line in f.readline())
reducer(sorted(d))
def test_with_student_test_posts(self):
print self.map_reduce('student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
from unittest import TestCase, main as unittest_main
from subprocessos import check_outputpath
from asmap_reduce_udacity.mapper runimport mapper
from osmap_reduce_udacity.reducer import pathreducer
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda@classmethod
self, mapper_name, reducer_name, datafile_name:def runsetUpClass(cls):
['python',if not path.joinisfile(self'setup.top_path, reducer_namepy'), # Reduce:
run(['sort', # Shuffle, could be replacedcls.top_path with= pythonpath.join('..', `sorted`'..')
if not runpath.isfile(['python', path.join(selfcls.top_path, mapper_name'setup.py'), # Map):
raise path.joinAssertionError(self.top_path,"Haven't 'data',found datafile_name)])])])
right directory to `cd` @classmethodinto")
def setUpClassmap_reduce(clsself, datafile_name):
ifd not= path.isfile('setup.py'):None
cls.top_path =with open(path.join('.self.'top_path, '..''data', datafile_name)) as f:
ifd not= path.isfiletuple(path.joinmapper(cls.top_path, 'setup.py')line):
for line in f.readline())
raise AssertionErrorreducer("Haven't found rightsorted(d))
directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
top_path = ''
map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
['python', path.join(self.top_path, reducer_name), # Reduce
run(['sort', # Shuffle, could be replaced with python `sorted`
run(['python', path.join(self.top_path, mapper_name), # Map
path.join(self.top_path, 'data', datafile_name)])])])
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def test_with_student_test_posts(self):
print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
unittest_main()
from unittest import TestCase, main as unittest_main
from os import path
from map_reduce_udacity.mapper import mapper
from map_reduce_udacity.reducer import reducer
class TestMapReduce(TestCase):
top_path = ''
@classmethod
def setUpClass(cls):
if not path.isfile('setup.py'):
cls.top_path = path.join('..', '..')
if not path.isfile(path.join(cls.top_path, 'setup.py')):
raise AssertionError("Haven't found right directory to `cd` into")
def map_reduce(self, datafile_name):
d = None
with open(path.join(self.top_path, 'data', datafile_name)) as f:
d = tuple(mapper(line) for line in f.readline())
reducer(sorted(d))
def test_with_student_test_posts(self):
print self.map_reduce('student_test_posts.csv')
if __name__ == '__main__':
unittest_main()