Return to Question

Commonmark migration

edited Jun 10, 2020 at 13:24

My MapReduce tester is clearly ported from Shell, short of args=None for line in args or read_input(), what's a better way of importing->testing the function outside of subprocess?

Or does it not matter, i.e.: my "hack" is fine?

##test_mapreduce.py

test_mapreduce.py

from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
 ['python', path.join(self.top_path, reducer_name), # Reduce
 run(['sort', # Shuffle, could be replaced with python `sorted`
 run(['python', path.join(self.top_path, mapper_name), # Map
 path.join(self.top_path, 'data', datafile_name)])])])
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
 raise AssertionError("Haven't found right directory to `cd` into")
 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

##mapper.py

mapper.py

#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
 for line in read_input():
 data = line.strip().split('\t')
 if len(data) != 6:
 continue
 date, time, store, item, cost, payment = data
 print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
 mapper()

PS: Should I refactor to use the map and reduce inbuilt functions?

My MapReduce tester is clearly ported from Shell, short of args=None for line in args or read_input(), what's a better way of importing->testing the function outside of subprocess?

Or does it not matter, i.e.: my "hack" is fine?

##test_mapreduce.py

from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
 ['python', path.join(self.top_path, reducer_name), # Reduce
 run(['sort', # Shuffle, could be replaced with python `sorted`
 run(['python', path.join(self.top_path, mapper_name), # Map
 path.join(self.top_path, 'data', datafile_name)])])])
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
 raise AssertionError("Haven't found right directory to `cd` into")
 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

##mapper.py

#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
 for line in read_input():
 data = line.strip().split('\t')
 if len(data) != 6:
 continue
 date, time, store, item, cost, payment = data
 print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
 mapper()

PS: Should I refactor to use the map and reduce inbuilt functions?

My MapReduce tester is clearly ported from Shell, short of args=None for line in args or read_input(), what's a better way of importing->testing the function outside of subprocess?

Or does it not matter, i.e.: my "hack" is fine?

test_mapreduce.py

from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
 ['python', path.join(self.top_path, reducer_name), # Reduce
 run(['sort', # Shuffle, could be replaced with python `sorted`
 run(['python', path.join(self.top_path, mapper_name), # Map
 path.join(self.top_path, 'data', datafile_name)])])])
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
 raise AssertionError("Haven't found right directory to `cd` into")
 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

mapper.py

#!/usr/bin/env python
from fileinput import input as read_input
def mapper():
 for line in read_input():
 data = line.strip().split('\t')
 if len(data) != 6:
 continue
 date, time, store, item, cost, payment = data
 print "{0}\t{1}".format(store, cost)
if __name__ == '__main__':
 mapper()

PS: Should I refactor to use the map and reduce inbuilt functions?

Rollback to Revision 1

Source Link

edited Aug 17, 2014 at 11:55

A T

edited Aug 17, 2014 at 11:55

A T

from unittest import TestCase, main as unittest_main
from ossubprocess import path
from map_reduce_udacity.mappercheck_output importas mapperrun
from map_reduce_udacity.reduceros import reducerpath
class TestMapReduce(TestCase):
 top_path = ''
 @classmethod
map_reduce = lambda self, defmapper_name, setUpClass(cls)reducer_name, datafile_name: run(
 if not['python', path.isfilejoin('setupself.py'top_path, reducer_name):
, # Reduce
 cls.top_pathrun(['sort', = path.join('..'# Shuffle, '..')could be replaced with python `sorted`
 if not path.isfilerun(['python', path.join(clsself.top_path, 'setup.py')mapper_name):
, # Map
 raise AssertionError("Haven't found right directory to `cd`path.join(self.top_path, into"'data', datafile_name)])])])
 @classmethod
  def map_reducesetUpClass(self, datafile_namecls):
 dif =not Nonepath.isfile('setup.py'):
 with open( cls.top_path = path.join(self'.top_path, 'data'.', datafile_name)'..') as f:
 dif =not tuplepath.isfile(mapperpath.join(line) for line incls.top_path, f'setup.readline(py')):
 reducer(sorted raise AssertionError(d))
"Haven't found right directory to `cd` into")

 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

from unittest import TestCase, main as unittest_main
from os import path
from map_reduce_udacity.mapper import mapper
from map_reduce_udacity.reducer import reducer
class TestMapReduce(TestCase):
 top_path = ''
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
 raise AssertionError("Haven't found right directory to `cd` into")
 def map_reduce(self, datafile_name):
 d = None
 with open(path.join(self.top_path, 'data', datafile_name)) as f:
 d = tuple(mapper(line) for line in f.readline())
 reducer(sorted(d))
 
 def test_with_student_test_posts(self):
 print self.map_reduce('student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
 ['python', path.join(self.top_path, reducer_name), # Reduce
 run(['sort', # Shuffle, could be replaced with python `sorted`
 run(['python', path.join(self.top_path, mapper_name), # Map
 path.join(self.top_path, 'data', datafile_name)])])])
 @classmethod
  def setUpClass(cls):
 if not path.isfile('setup.py'):
  cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
  raise AssertionError("Haven't found right directory to `cd` into")

 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

deleted 103 characters in body

Source Link

edited Aug 17, 2014 at 9:20

A T

edited Aug 17, 2014 at 9:20

A T

from unittest import TestCase, main as unittest_main
from subprocessos import check_outputpath
from asmap_reduce_udacity.mapper runimport mapper
from osmap_reduce_udacity.reducer import pathreducer
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda@classmethod
 self, mapper_name, reducer_name, datafile_name:def runsetUpClass(cls):
 ['python',if not path.joinisfile(self'setup.top_path, reducer_namepy'), # Reduce:
 run(['sort', # Shuffle, could be replacedcls.top_path with= pythonpath.join('..', `sorted`'..')
 if not runpath.isfile(['python', path.join(selfcls.top_path, mapper_name'setup.py'), # Map):
 raise path.joinAssertionError(self.top_path,"Haven't 'data',found datafile_name)])])])
right directory to `cd` @classmethodinto")

 def setUpClassmap_reduce(clsself, datafile_name):
 ifd not= path.isfile('setup.py'):None
 cls.top_path =with open(path.join('.self.'top_path, '..''data', datafile_name)) as f:
 ifd not= path.isfiletuple(path.joinmapper(cls.top_path, 'setup.py')line):
  for line in f.readline())
 raise AssertionErrorreducer("Haven't found rightsorted(d))
 directory to `cd` into")

 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

from unittest import TestCase, main as unittest_main
from subprocess import check_output as run
from os import path
class TestMapReduce(TestCase):
 top_path = ''
 map_reduce = lambda self, mapper_name, reducer_name, datafile_name: run(
 ['python', path.join(self.top_path, reducer_name), # Reduce
 run(['sort', # Shuffle, could be replaced with python `sorted`
 run(['python', path.join(self.top_path, mapper_name), # Map
 path.join(self.top_path, 'data', datafile_name)])])])
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
  raise AssertionError("Haven't found right directory to `cd` into")

 def test_with_student_test_posts(self):
 print self.map_reduce('mapper.py', 'reducer.py', 'student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

from unittest import TestCase, main as unittest_main
from os import path
from map_reduce_udacity.mapper import mapper
from map_reduce_udacity.reducer import reducer
class TestMapReduce(TestCase):
 top_path = ''
 @classmethod
 def setUpClass(cls):
 if not path.isfile('setup.py'):
 cls.top_path = path.join('..', '..')
 if not path.isfile(path.join(cls.top_path, 'setup.py')):
 raise AssertionError("Haven't found right directory to `cd` into")

 def map_reduce(self, datafile_name):
 d = None
 with open(path.join(self.top_path, 'data', datafile_name)) as f:
 d = tuple(mapper(line) for line in f.readline())
 reducer(sorted(d))
 
 def test_with_student_test_posts(self):
 print self.map_reduce('student_test_posts.csv')
if __name__ == '__main__':
 unittest_main()

Source Link

asked Aug 17, 2014 at 7:56

A T

asked Aug 17, 2014 at 7:56

A T

lang-py