Commit 4e43c0b

committed

Improved Documentation via Additional Comments

1 parent 78c2c23 commit 4e43c0bCopy full SHA for 4e43c0b

File tree

7 files changed

+88

-64

lines changed

hadoop
- webFinder/src/main/java/webFinder
- wordCount/src/main/java/wordCount
mpi
- structScatter.c

7 files changed

+88

-64

lines changed

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java‎`

Lines changed: 10 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -38,9 +38,9 @@ public int run(final String[] args) throws Exception {`
`38`	`38`	`final Job job;`
`39`	`39`
`40`	`40`	`conf = new Configuration();`
`41`		`- job = Job.getInstance(conf, "Your job name");`
	`41`	`+ job = Job.getInstance(conf, "WebFinder MapReduce");`
`42`	`42`
`43`		`- job.setJarByClass(WebFinderDriver.class);`
	`43`	`+ job.setJarByClass(WebFinderDriver.class);// use current jar`
`44`	`44`
`45`	`45`	`if (args.length < 2) {`
`46`	`46`	`return 1;`
`@@ -49,17 +49,16 @@ public int run(final String[] args) throws Exception {`
`49`	`49`	`conf.setInt("maxDepth", Integer.parseInt(args[2]));`
`50`	`50`	`}`
`51`	`51`
`52`		`- job.setMapperClass(WebFinderMapper.class);`
`53`		`- job.setMapOutputKeyClass(Text.class);`
`54`		`- job.setMapOutputValueClass(Text.class);`
	`52`	`+ job.setMapperClass(WebFinderMapper.class);// set mapper`
	`53`	`+ job.setMapOutputKeyClass(Text.class);// set mapper output key type`
	`54`	`+ job.setMapOutputValueClass(Text.class);// set mapper output value type`
`55`	`55`
`56`		`- job.setReducerClass(WebFinderReducer.class);`
	`56`	`+ job.setReducerClass(WebFinderReducer.class);// set reducer`
	`57`	`+ job.setOutputKeyClass(Text.class);// set reducer output key type`
	`58`	`+ job.setOutputValueClass(List.class);// set reducer output value`
`57`	`59`
`58`		`- job.setOutputKeyClass(Text.class);`
`59`		`- job.setOutputValueClass(List.class);`
`60`		`-`
`61`		`- job.setInputFormatClass(TextInputFormat.class);`
`62`		`- job.setOutputFormatClass(TextOutputFormat.class);`
	`60`	`+ job.setInputFormatClass(TextInputFormat.class);// set input format`
	`61`	`+ job.setOutputFormatClass(TextOutputFormat.class);// set output format`
`63`	`62`
`64`	`63`	`FileInputFormat.setInputPaths(job, new Path(args[0]));`
`65`	`64`	`FileOutputFormat.setOutputPath(job, new Path(args[1]));`

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java‎`

Lines changed: 23 additions & 22 deletions

Original file line number	Diff line number	Diff line change
`@@ -19,8 +19,8 @@`
`19`	`19`	`* resources that are loaded by a given website URL and emits tuples of`
`20`	`20`	`* kind {@code <resource URL, website URL>}.`
`21`	`21`	`*/`
`22`		`-public class WebFinderMapperextends`
`23`		`- Mapper<LongWritable, Text, Text, Text> {`
	`22`	`+public class WebFinderMapper`
	`23`	`+ extendsMapper<LongWritable, Text, Text, Text> {`
`24`	`24`
`25`	`25`	`/** the logger we use */`
`26`	`26`	`private static Logger LOGGER = Logger.getLogger(WebFinderMapper.class);`
`@@ -39,30 +39,31 @@ protected void map(final LongWritable offset, final Text line,`
`39`	`39`	`final HashSet<URL> done;`
`40`	`40`	`String str;`
`41`	`41`
`42`		`- maxDepth = context.getConfiguration().getInt("maxDepth", 1);`
`43`		`-`
`44`	`42`	`str = WebFinderMapper.__prepare(line.toString(), true);`
`45`		`- if (str == null) {`
	`43`	`+ if (str == null) {// prepare base url`
`46`	`44`	`return;`
`47`	`45`	`}`
	`46`	`+ // set maximum depth of spider`
	`47`	`+ maxDepth = context.getConfiguration().getInt("maxDepth", 1);`
`48`	`48`
`49`	`49`	`baseUri = URI.create(str).normalize();`
`50`	`50`	`baseUrl = baseUri.toURL();`
`51`		`- done = new HashSet<>();`
	`51`	`+ done = new HashSet<>();// URLs that have been processed`
`52`	`52`	`done.add(baseUrl);`
`53`	`53`	`try {`
`54`	`54`	`done.add(new URL(baseUrl.toString() + '/'));`
`55`	`55`	`} catch (@SuppressWarnings("unused") final Throwable error) {`
`56`	`56`	`// ignore`
`57`	`57`	`}`
`58`	`58`	`baseUrlText = new Text(baseUrl.toString());`
`59`		`- context.write(baseUrlText, baseUrlText);`
	`59`	`+ context.write(baseUrlText, baseUrlText);// url itself is done`
	`60`	`+ // now recursively spider resources`
`60`	`61`	`WebFinderMapper.__load(maxDepth, baseUrl, baseUrlText, baseUrl,`
`61`	`62`	`baseUri, new StringBuilder(), new char[16384], done, context);`
`62`	`63`	`}`
`63`	`64`
`64`	`65`	`/**`
`65`		`- * load a given URL`
	`66`	`+ * load a given URL of a HTML document`
`66`	`67`	`*`
`67`	`68`	`* @param remainingDepth`
`68`	`69`	`* how deep we can still go`
`@@ -105,7 +106,7 @@ private static final void __load(final int remainingDepth,`
`105`	`106`	`int read;`
`106`	`107`
`107`	`108`	`stringBuilder.setLength(0);`
`108`		`- uconn = loadUrl.openConnection();`
	`109`	`+ uconn = loadUrl.openConnection();// setup the connection`
`109`	`110`	`uconn.setConnectTimeout(10_000);`
`110`	`111`	`uconn.setReadTimeout(10_000);`
`111`	`112`	`uconn.setDoInput(true);`
`@@ -114,26 +115,26 @@ private static final void __load(final int remainingDepth,`
`114`	`115`	`uconn.setDefaultUseCaches(true);`
`115`	`116`	`try (final InputStream inputStream = loadUrl.openStream()) {`
`116`	`117`	`try (final InputStreamReader inputReader = new InputStreamReader(`
`117`		`- inputStream)) {`
	`118`	`+ inputStream)) {// load all the data of the text resource`
`118`	`119`	`while ((read = inputReader.read(buffer)) > 0) {`
`119`	`120`	`stringBuilder.append(buffer, 0, read);`
`120`	`121`	`}`
`121`	`122`	`}`
`122`	`123`	`}`
`123`	`124`
`124`		`- text = stringBuilder.toString().replace('\n', ' ')//`
	`125`	`+ text = stringBuilder.toString().replace('\n', ' ')// delete newlines`
`125`	`126`	`.replace('\r', ' ').replace('\t', ' ').replaceAll(" ", " ");`
`126`		`- lower = text.toLowerCase();`
	`127`	`+ lower = text.toLowerCase();// create a lower case version`
`127`	`128`
`128`	`129`	`nextDesc: for (final __LinkDesc desc : WebFinderMapper.DESCS) {`
`129`	`130`
`130`		`- last = 0;// find and load scripts`
`131`		`- findDesc: for (;;) {`
	`131`	`+ last = 0;// find and load other resources`
	`132`	`+ findDesc: for (;;) {// find begin tag`
`132`	`133`	`index1 = lower.indexOf(desc.m_begin, last);`
`133`	`134`	`if (index1 <= last) {`
`134`	`135`	`continue nextDesc;`
`135`	`136`	`}`
`136`		`- last = index1 + desc.m_begin.length();`
	`137`	`+ last = index1 + desc.m_begin.length();// find URL attribute`
`137`	`138`	`index1 = lower.indexOf(desc.m_urlIndicatorQuote, last);`
`138`	`139`	`index2 = lower.indexOf(desc.m_urlIndicatorPrime, last);`
`139`	`140`	`sep = '"';`
`@@ -144,7 +145,7 @@ private static final void __load(final int remainingDepth,`
`144`	`145`	`}`
`145`	`146`	`}`
`146`	`147`	`index2 = lower.indexOf('>', last);`
`147`		`- if (index1 <= last) {`
	`148`	`+ if (index1 <= last) {// check for problem with tag end`
`148`	`149`	`continue nextDesc;`
`149`	`150`	`}`
`150`	`151`	`if ((index2 < index1) && (index2 >= last)) {`
`@@ -157,7 +158,7 @@ private static final void __load(final int remainingDepth,`
`157`	`158`	`continue nextDesc;`
`158`	`159`	`}`
`159`	`160`
`160`		`- test = text.substring(last, index1);`
	`161`	`+ test = text.substring(last, index1);// take URL`
`161`	`162`	`last = index1;`
`162`	`163`	`test = WebFinderMapper.__prepare(test, desc.m_loadRecursive);`
`163`	`164`	`if (test == null) {`
`@@ -181,8 +182,8 @@ private static final void __load(final int remainingDepth,`
`181`	`182`	`error.addSuppressed(error2);`
`182`	`183`	`error.addSuppressed(error3);`
`183`	`184`	`if (WebFinderMapper.LOGGER != null) {`
`184`		`- WebFinderMapper.LOGGER.warn(`
`185`		`- "Error while trying to build URL with string '"`
	`185`	`+ WebFinderMapper.LOGGER`
	`186`	`+ .warn("Error while trying to build URL with string '"`
`186`	`187`	`+ test + "' under load URL '"`
`187`	`188`	`+ loadUrl.toString() + "' for base URL '"`
`188`	`189`	`+ baseUrl.toString() + "'.", error2);`
`@@ -225,8 +226,8 @@ private static final void __load(final int remainingDepth,`
`225`	`226`	`}`
`226`	`227`	`} catch (final Throwable error) {`
`227`	`228`	`if (WebFinderMapper.LOGGER != null) {`
`228`		`- WebFinderMapper.LOGGER.warn("Error while trying to load URL '"`
`229`		`- + loadUrl + "'.", error);`
	`229`	`+ WebFinderMapper.LOGGER.warn(`
	`230`	`+ "Error while trying to load URL '"+ loadUrl + "'.", error);`
`230`	`231`	`}`
`231`	`232`	`}`
`232`	`233`
`@@ -324,7 +325,7 @@ public static final void main(final String[] args) throws Throwable {`
`324`	`325`
`325`	`326`	`/** the link descriptions */`
`326`	`327`	`static final __LinkDesc[] DESCS = { //`
`327`		`- new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //`
	`328`	`+ new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //`
`328`	`329`	`new __LinkDesc(false, "<link rel='stylesheet'", "href="), //`
`329`	`330`	`new __LinkDesc(false, "<img", "src="), //`
`330`	`331`	`new __LinkDesc(false, "<script", "src="), //`

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java‎`

Lines changed: 11 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -19,14 +19,15 @@`
`19`	`19`	`* multiple websites. This reducer emits tuples of the form`
`20`	`20`	`* {@code <resource URL, list of website urls>}.`
`21`	`21`	`*/`
`22`		`-public class WebFinderReducerextends`
`23`		`- Reducer<Text, Text, Text, List<Text>> {`
	`22`	`+public class WebFinderReducer`
	`23`	`+ extendsReducer<Text, Text, Text, List<Text>> {`
`24`	`24`
`25`	`25`	`/**`
`26`	`26`	`* The actual reduction step: From the tuples of form`
`27`	`27`	`* {@code <resource URL, iterable of referencing website URLs>}, select`
`28`	`28`	`* all resources referenced by more than one unique website. For these,`
`29`		`- * output tuples of the form {@code <resource URL, list of website URLs>}.`
	`29`	`+ * output tuples of the form {@code <resource URL, list of website URLs>}`
	`30`	`+ * .`
`30`	`31`	`*/`
`31`	`32`	`@Override`
`32`	`33`	`protected void reduce(final Text key, final Iterable<Text> values,`
`@@ -40,7 +41,7 @@ protected void reduce(final Text key, final Iterable<Text> values,`
`40`	`41`
`41`	`42`	`set = new HashSet<>();`
`42`	`43`	`looper: for (final Text url : values) {`
`43`		`- string = url.toString();`
	`44`	`+ string = url.toString();// convert value to a URL`
`44`	`45`	`try {`
`45`	`46`	`add = new URI(string).normalize().toURL();`
`46`	`47`	`} catch (@SuppressWarnings("unused") final Throwable error) {`
`@@ -54,19 +55,19 @@ protected void reduce(final Text key, final Iterable<Text> values,`
`54`	`55`	`}`
`55`	`56`	`}`
`56`	`57`	`}`
`57`		`- set.add(add);`
	`58`	`+ set.add(add);// store value in set of URLs pointing to this resource`
`58`	`59`	`}`
`59`	`60`
`60`		`- if ((size = set.size()) > 1) {`
`61`		`- list = new ArrayList(size);`
	`61`	`+ if ((size = set.size()) > 1) {// multiple URLs point to key`
	`62`	`+ list = new ArrayList(size);// let's make a list of them`
`62`	`63`	`for (final URL found : set) {`
`63`	`64`	`list.add(found.toString());`
`64`	`65`	`}`
`65`		`- Collections.sort(list);`
`66`		`- for (index = list.size(); (--index) >= 0;) {`
	`66`	`+ Collections.sort(list);// and sort them`
	`67`	`+ for (index = list.size(); (--index) >= 0;) {// now convert to Text`
`67`	`68`	`list.set(index, new Text((String) (list.get(index))));`
`68`	`69`	`}`
`69`		`- context.write(key, list);`
	`70`	`+ context.write(key, list);// write <key, list of referers> tuple`
`70`	`71`	`}`
`71`	`72`	`}`
`72`	`73`	`}`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java‎`

Lines changed: 9 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -34,15 +34,17 @@ public int run(final String[] args) throws Exception {`
`34`	`34`	`return 1;`
`35`	`35`	`}`
`36`	`36`
`37`		`- job.setMapperClass(WordCountMapper.class);`
`38`		`- job.setReducerClass(WordCountReducer.class);`
`39`		`- job.setCombinerClass(WordCountReducer.class);`
	`37`	`+ job.setMapperClass(WordCountMapper.class);// set mapper`
	`38`	`+ job.setReducerClass(WordCountReducer.class);// set reducer`
	`39`	`+ // a combiner performs something like a reduction step right after`
	`40`	`+ // mapping, on the mapper's computer, before sending on the data`
	`41`	`+ job.setCombinerClass(WordCountReducer.class);// set combiner`
`40`	`42`
`41`		`- job.setOutputKeyClass(Text.class);`
`42`		`- job.setOutputValueClass(IntWritable.class);`
	`43`	`+ job.setOutputKeyClass(Text.class);// set output key class`
	`44`	`+ job.setOutputValueClass(IntWritable.class);// set output value class`
`43`	`45`
`44`		`- job.setInputFormatClass(TextInputFormat.class);`
`45`		`- job.setOutputFormatClass(TextOutputFormat.class);`
	`46`	`+ job.setInputFormatClass(TextInputFormat.class);// set input format`
	`47`	`+ job.setOutputFormatClass(TextOutputFormat.class);// set output format`
`46`	`48`
`47`	`49`	`FileInputFormat.setInputPaths(job, new Path(args[0]));`
`48`	`50`	`FileOutputFormat.setOutputPath(job, new Path(args[1]));`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java‎`

Lines changed: 13 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,12 @@`
`7`	`7`	`import org.apache.hadoop.io.Text;`
`8`	`8`	`import org.apache.hadoop.mapreduce.Mapper;`
`9`	`9`
	`10`	`+/**`
	`11`	`+ * This is the mapper part of the word count example. The mapper receives`
	`12`	`+ * lines of text. It first replaces all punctuation marks with spaces. Then`
	`13`	`+ * it breaks the line at the spaces into multiple words. For each word, we`
	`14`	`+ * emit a tuple of form {@code <WORD, 1>}.`
	`15`	`+ */`
`10`	`16`	`public class WordCountMapper`
`11`	`17`	`extends Mapper<LongWritable, Text, Text, IntWritable> {`
`12`	`18`
`@@ -15,16 +21,16 @@ public class WordCountMapper`
`15`	`21`	`@Override`
`16`	`22`	`protected void map(final LongWritable offset, final Text line,`
`17`	`23`	`final Context context) throws IOException, InterruptedException {`
`18`		`- for (String word : line.toString()//`
`19`		`- .replace('.', ' ').replace(',', ' ').replace('/', ' ')//`
`20`		`- .replace(']', ' ').replace('[', ' ').replace('_', ' ')//`
`21`		`- .replace(')', ' ').replace('(', ' ').replace('#', ' ')//`
`22`		`- .replace('!', ' ').replace('?', ' ').replace("-", "")//`
	`24`	`+ for (String word : line.toString()// replace punctuation and other`
	`25`	`+ .replace('.', ' ').replace(',', ' ').replace('/', ' ')// strange`
	`26`	`+ .replace(']', ' ').replace('[', ' ').replace('_', ' ')// chars`
	`27`	`+ .replace(')', ' ').replace('(', ' ').replace('#', ' ')// with`
	`28`	`+ .replace('!', ' ').replace('?', ' ').replace("-", "")// spaces`
`23`	`29`	`.replace("\"", "").replace("\'", "").replaceAll("[0-9]+", " ")//`
`24`	`30`	`.replace(':', ' ').replace('\t', ' ').replace('\f', ' ')//`
`25`		`- .split("\\s+")) {`
	`31`	`+ .split("\\s+")) {// iterate over all space-separated words`
`26`	`32`	`word = word.trim();`
`27`		`- if (word.length() > 0) {`
	`33`	`+ if (word.length() > 0) {// emit one tuple <WORD, 1> for each WORD`
`28`	`34`	`context.write(new Text(word.toLowerCase()), WordCountMapper.ONE);`
`29`	`35`	`}`
`30`	`36`	`}`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java‎`

Lines changed: 19 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -6,18 +6,33 @@`
`6`	`6`	`import org.apache.hadoop.io.Text;`
`7`	`7`	`import org.apache.hadoop.mapreduce.Reducer;`
`8`	`8`
	`9`	`+/**`
	`10`	`+ * This is the reducer class of the MapReduce Word Count example. It also`
	`11`	`+ * acts as combiner, i.e., it is applied two times:`
	`12`	`+ * <ol>`
	`13`	`+ * <li>As combiner, i.e., directly after the mapping process on each mapper`
	`14`	`+ * node. This way, the data is "reduced" before being sent on. During this`
	`15`	`+ * application, all input tuples of the reducer are of the form`
	`16`	`+ * {@code <WORD, List<1, 1, 1, ... 1>>}. The reducer here creates output`
	`17`	`+ * tuples of the form {@code <WORD, Sum(List...)>}. These tuples have the`
	`18`	`+ * same format as the mapper's output tuples, just with integers which`
	`19`	`+ * might be larger than 1. They are sent on to the real reducer steps.</li>`
	`20`	`+ * <li>As actual reducer: Here all the tuples produced by the combiners`
	`21`	`+ * arrive. There may be multiple entries in the lists of these tuples,`
	`22`	`+ * since the same word may have been found on different computers.</li>`
	`23`	`+ * </ol>`
	`24`	`+ */`
`9`	`25`	`public class WordCountReducer`
`10`	`26`	`extends Reducer<Text, IntWritable, Text, IntWritable> {`
`11`	`27`
`12`	`28`	`@Override`
`13`	`29`	`protected void reduce(final Text key, final Iterable<IntWritable> values,`
`14`	`30`	`final Context context) throws IOException, InterruptedException {`
`15`		`-`
	`31`	`+// we receive tuples of the type <WORD, IntWritable> for each WORD`
`16`	`32`	`int count = 0;`
`17`		`- for (final IntWritable current : values) {`
	`33`	`+ for (final IntWritable current : values) {// we add up all the ints`
`18`	`34`	`count += current.get();`
`19`	`35`	`}`
`20`		`- context.write(key, new IntWritable(count));`
	`36`	`+ context.write(key, new IntWritable(count));// and emit the final count`
`21`	`37`	`}`
`22`		`-`
`23`	`38`	`}`

`‎mpi/structScatter.c‎`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -52,17 +52,17 @@ int main(int argc, char *argv[]) {`
`52`	`52`	`MPI_Comm_size(MPI_COMM_WORLD, &size); // get number of processes`
`53`	`53`
`54`	`54`	`send = (myStruct)malloc(sizeof(myStruct) size); // allocate memory`
`55`		`- for(i = size; (--i) >= 0; ) {`
	`55`	`+ for(i = size; (--i) >= 0; ) {// store some dummy data`
`56`	`56`	`send[i].myIntA = rank;`
`57`	`57`	`send[i].myIntB = i;`
`58`	`58`	`send[i].myShort = (rank * rank) % size;`
`59`		`- send[i].myDouble = ((double)("Hi you!"));`
	`59`	`+ send[i].myDouble = ((double)("Hi you!"));// haha`
`60`	`60`	`send[i].myChar = 'V';`
`61`	`61`	`send[i].myFloat = (i / (float)size);`
`62`	`62`	`}`
`63`	`63`	`}`
`64`	`64`
`65`		`- MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);`
	`65`	`+ MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);//scatter the structs`
`66`	`66`
`67`	`67`	`printf("%d: received: myIntA=%d, myIntB=%d, myShort=%d, myDouble=\"%s\", myChar='%c', myFloat=%f.\n",`
`68`	`68`	`rank, data.myIntA, data.myIntB, data.myShort, (char*)&data.myDouble, data.myChar, data.myFloat);`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 4e43c0b

File tree

7 files changed

7 files changed

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java‎`

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java‎`

`‎hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java‎`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java‎`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java‎`

`‎hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java‎`

`‎mpi/structScatter.c‎`

0 commit comments