Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4e43c0b

Browse files
Improved Documentation via Additional Comments
1 parent 78c2c23 commit 4e43c0b

File tree

7 files changed

+88
-64
lines changed

7 files changed

+88
-64
lines changed

‎hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java‎

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ public int run(final String[] args) throws Exception {
3838
final Job job;
3939

4040
conf = new Configuration();
41-
job = Job.getInstance(conf, "Your job name");
41+
job = Job.getInstance(conf, "WebFinder MapReduce");
4242

43-
job.setJarByClass(WebFinderDriver.class);
43+
job.setJarByClass(WebFinderDriver.class);// use current jar
4444

4545
if (args.length < 2) {
4646
return 1;
@@ -49,17 +49,16 @@ public int run(final String[] args) throws Exception {
4949
conf.setInt("maxDepth", Integer.parseInt(args[2]));
5050
}
5151

52-
job.setMapperClass(WebFinderMapper.class);
53-
job.setMapOutputKeyClass(Text.class);
54-
job.setMapOutputValueClass(Text.class);
52+
job.setMapperClass(WebFinderMapper.class);// set mapper
53+
job.setMapOutputKeyClass(Text.class);// set mapper output key type
54+
job.setMapOutputValueClass(Text.class);// set mapper output value type
5555

56-
job.setReducerClass(WebFinderReducer.class);
56+
job.setReducerClass(WebFinderReducer.class);// set reducer
57+
job.setOutputKeyClass(Text.class);// set reducer output key type
58+
job.setOutputValueClass(List.class);// set reducer output value
5759

58-
job.setOutputKeyClass(Text.class);
59-
job.setOutputValueClass(List.class);
60-
61-
job.setInputFormatClass(TextInputFormat.class);
62-
job.setOutputFormatClass(TextOutputFormat.class);
60+
job.setInputFormatClass(TextInputFormat.class);// set input format
61+
job.setOutputFormatClass(TextOutputFormat.class);// set output format
6362

6463
FileInputFormat.setInputPaths(job, new Path(args[0]));
6564
FileOutputFormat.setOutputPath(job, new Path(args[1]));

‎hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java‎

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
* resources that are loaded by a given website URL and emits tuples of
2020
* kind {@code <resource URL, website URL>}.
2121
*/
22-
public class WebFinderMapperextends
23-
Mapper<LongWritable, Text, Text, Text> {
22+
public class WebFinderMapper
23+
extendsMapper<LongWritable, Text, Text, Text> {
2424

2525
/** the logger we use */
2626
private static Logger LOGGER = Logger.getLogger(WebFinderMapper.class);
@@ -39,30 +39,31 @@ protected void map(final LongWritable offset, final Text line,
3939
final HashSet<URL> done;
4040
String str;
4141

42-
maxDepth = context.getConfiguration().getInt("maxDepth", 1);
43-
4442
str = WebFinderMapper.__prepare(line.toString(), true);
45-
if (str == null) {
43+
if (str == null) {// prepare base url
4644
return;
4745
}
46+
// set maximum depth of spider
47+
maxDepth = context.getConfiguration().getInt("maxDepth", 1);
4848

4949
baseUri = URI.create(str).normalize();
5050
baseUrl = baseUri.toURL();
51-
done = new HashSet<>();
51+
done = new HashSet<>();// URLs that have been processed
5252
done.add(baseUrl);
5353
try {
5454
done.add(new URL(baseUrl.toString() + '/'));
5555
} catch (@SuppressWarnings("unused") final Throwable error) {
5656
// ignore
5757
}
5858
baseUrlText = new Text(baseUrl.toString());
59-
context.write(baseUrlText, baseUrlText);
59+
context.write(baseUrlText, baseUrlText);// url itself is done
60+
// now recursively spider resources
6061
WebFinderMapper.__load(maxDepth, baseUrl, baseUrlText, baseUrl,
6162
baseUri, new StringBuilder(), new char[16384], done, context);
6263
}
6364

6465
/**
65-
* load a given URL
66+
* load a given URL of a HTML document
6667
*
6768
* @param remainingDepth
6869
* how deep we can still go
@@ -105,7 +106,7 @@ private static final void __load(final int remainingDepth,
105106
int read;
106107

107108
stringBuilder.setLength(0);
108-
uconn = loadUrl.openConnection();
109+
uconn = loadUrl.openConnection();// setup the connection
109110
uconn.setConnectTimeout(10_000);
110111
uconn.setReadTimeout(10_000);
111112
uconn.setDoInput(true);
@@ -114,26 +115,26 @@ private static final void __load(final int remainingDepth,
114115
uconn.setDefaultUseCaches(true);
115116
try (final InputStream inputStream = loadUrl.openStream()) {
116117
try (final InputStreamReader inputReader = new InputStreamReader(
117-
inputStream)) {
118+
inputStream)) {// load all the data of the text resource
118119
while ((read = inputReader.read(buffer)) > 0) {
119120
stringBuilder.append(buffer, 0, read);
120121
}
121122
}
122123
}
123124

124-
text = stringBuilder.toString().replace('\n', ' ')//
125+
text = stringBuilder.toString().replace('\n', ' ')// delete newlines
125126
.replace('\r', ' ').replace('\t', ' ').replaceAll(" ", " ");
126-
lower = text.toLowerCase();
127+
lower = text.toLowerCase();// create a lower case version
127128

128129
nextDesc: for (final __LinkDesc desc : WebFinderMapper.DESCS) {
129130

130-
last = 0;// find and load scripts
131-
findDesc: for (;;) {
131+
last = 0;// find and load other resources
132+
findDesc: for (;;) {// find begin tag
132133
index1 = lower.indexOf(desc.m_begin, last);
133134
if (index1 <= last) {
134135
continue nextDesc;
135136
}
136-
last = index1 + desc.m_begin.length();
137+
last = index1 + desc.m_begin.length();// find URL attribute
137138
index1 = lower.indexOf(desc.m_urlIndicatorQuote, last);
138139
index2 = lower.indexOf(desc.m_urlIndicatorPrime, last);
139140
sep = '"';
@@ -144,7 +145,7 @@ private static final void __load(final int remainingDepth,
144145
}
145146
}
146147
index2 = lower.indexOf('>', last);
147-
if (index1 <= last) {
148+
if (index1 <= last) {// check for problem with tag end
148149
continue nextDesc;
149150
}
150151
if ((index2 < index1) && (index2 >= last)) {
@@ -157,7 +158,7 @@ private static final void __load(final int remainingDepth,
157158
continue nextDesc;
158159
}
159160

160-
test = text.substring(last, index1);
161+
test = text.substring(last, index1);// take URL
161162
last = index1;
162163
test = WebFinderMapper.__prepare(test, desc.m_loadRecursive);
163164
if (test == null) {
@@ -181,8 +182,8 @@ private static final void __load(final int remainingDepth,
181182
error.addSuppressed(error2);
182183
error.addSuppressed(error3);
183184
if (WebFinderMapper.LOGGER != null) {
184-
WebFinderMapper.LOGGER.warn(
185-
"Error while trying to build URL with string '"
185+
WebFinderMapper.LOGGER
186+
.warn("Error while trying to build URL with string '"
186187
+ test + "' under load URL '"
187188
+ loadUrl.toString() + "' for base URL '"
188189
+ baseUrl.toString() + "'.", error2);
@@ -225,8 +226,8 @@ private static final void __load(final int remainingDepth,
225226
}
226227
} catch (final Throwable error) {
227228
if (WebFinderMapper.LOGGER != null) {
228-
WebFinderMapper.LOGGER.warn("Error while trying to load URL '"
229-
+ loadUrl + "'.", error);
229+
WebFinderMapper.LOGGER.warn(
230+
"Error while trying to load URL '"+ loadUrl + "'.", error);
230231
}
231232
}
232233

@@ -324,7 +325,7 @@ public static final void main(final String[] args) throws Throwable {
324325

325326
/** the link descriptions */
326327
static final __LinkDesc[] DESCS = { //
327-
new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
328+
new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
328329
new __LinkDesc(false, "<link rel='stylesheet'", "href="), //
329330
new __LinkDesc(false, "<img", "src="), //
330331
new __LinkDesc(false, "<script", "src="), //

‎hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java‎

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
* multiple websites. This reducer emits tuples of the form
2020
* {@code <resource URL, list of website urls>}.
2121
*/
22-
public class WebFinderReducerextends
23-
Reducer<Text, Text, Text, List<Text>> {
22+
public class WebFinderReducer
23+
extendsReducer<Text, Text, Text, List<Text>> {
2424

2525
/**
2626
* The actual reduction step: From the tuples of form
2727
* {@code <resource URL, iterable of referencing website URLs>}, select
2828
* all resources referenced by more than one unique website. For these,
29-
* output tuples of the form {@code <resource URL, list of website URLs>}.
29+
* output tuples of the form {@code <resource URL, list of website URLs>}
30+
* .
3031
*/
3132
@Override
3233
protected void reduce(final Text key, final Iterable<Text> values,
@@ -40,7 +41,7 @@ protected void reduce(final Text key, final Iterable<Text> values,
4041

4142
set = new HashSet<>();
4243
looper: for (final Text url : values) {
43-
string = url.toString();
44+
string = url.toString();// convert value to a URL
4445
try {
4546
add = new URI(string).normalize().toURL();
4647
} catch (@SuppressWarnings("unused") final Throwable error) {
@@ -54,19 +55,19 @@ protected void reduce(final Text key, final Iterable<Text> values,
5455
}
5556
}
5657
}
57-
set.add(add);
58+
set.add(add);// store value in set of URLs pointing to this resource
5859
}
5960

60-
if ((size = set.size()) > 1) {
61-
list = new ArrayList(size);
61+
if ((size = set.size()) > 1) {// multiple URLs point to key
62+
list = new ArrayList(size);// let's make a list of them
6263
for (final URL found : set) {
6364
list.add(found.toString());
6465
}
65-
Collections.sort(list);
66-
for (index = list.size(); (--index) >= 0;) {
66+
Collections.sort(list);// and sort them
67+
for (index = list.size(); (--index) >= 0;) {// now convert to Text
6768
list.set(index, new Text((String) (list.get(index))));
6869
}
69-
context.write(key, list);
70+
context.write(key, list);// write <key, list of referers> tuple
7071
}
7172
}
7273
}

‎hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java‎

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,17 @@ public int run(final String[] args) throws Exception {
3434
return 1;
3535
}
3636

37-
job.setMapperClass(WordCountMapper.class);
38-
job.setReducerClass(WordCountReducer.class);
39-
job.setCombinerClass(WordCountReducer.class);
37+
job.setMapperClass(WordCountMapper.class);// set mapper
38+
job.setReducerClass(WordCountReducer.class);// set reducer
39+
// a combiner performs something like a reduction step right after
40+
// mapping, on the mapper's computer, before sending on the data
41+
job.setCombinerClass(WordCountReducer.class);// set combiner
4042

41-
job.setOutputKeyClass(Text.class);
42-
job.setOutputValueClass(IntWritable.class);
43+
job.setOutputKeyClass(Text.class);// set output key class
44+
job.setOutputValueClass(IntWritable.class);// set output value class
4345

44-
job.setInputFormatClass(TextInputFormat.class);
45-
job.setOutputFormatClass(TextOutputFormat.class);
46+
job.setInputFormatClass(TextInputFormat.class);// set input format
47+
job.setOutputFormatClass(TextOutputFormat.class);// set output format
4648

4749
FileInputFormat.setInputPaths(job, new Path(args[0]));
4850
FileOutputFormat.setOutputPath(job, new Path(args[1]));

‎hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java‎

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
import org.apache.hadoop.io.Text;
88
import org.apache.hadoop.mapreduce.Mapper;
99

10+
/**
11+
* This is the mapper part of the word count example. The mapper receives
12+
* lines of text. It first replaces all punctuation marks with spaces. Then
13+
* it breaks the line at the spaces into multiple words. For each word, we
14+
* emit a tuple of form {@code <WORD, 1>}.
15+
*/
1016
public class WordCountMapper
1117
extends Mapper<LongWritable, Text, Text, IntWritable> {
1218

@@ -15,16 +21,16 @@ public class WordCountMapper
1521
@Override
1622
protected void map(final LongWritable offset, final Text line,
1723
final Context context) throws IOException, InterruptedException {
18-
for (String word : line.toString()//
19-
.replace('.', ' ').replace(',', ' ').replace('/', ' ')//
20-
.replace(']', ' ').replace('[', ' ').replace('_', ' ')//
21-
.replace(')', ' ').replace('(', ' ').replace('#', ' ')//
22-
.replace('!', ' ').replace('?', ' ').replace("-", "")//
24+
for (String word : line.toString()// replace punctuation and other
25+
.replace('.', ' ').replace(',', ' ').replace('/', ' ')// strange
26+
.replace(']', ' ').replace('[', ' ').replace('_', ' ')// chars
27+
.replace(')', ' ').replace('(', ' ').replace('#', ' ')// with
28+
.replace('!', ' ').replace('?', ' ').replace("-", "")// spaces
2329
.replace("\"", "").replace("\'", "").replaceAll("[0-9]+", " ")//
2430
.replace(':', ' ').replace('\t', ' ').replace('\f', ' ')//
25-
.split("\\s+")) {
31+
.split("\\s+")) {// iterate over all space-separated words
2632
word = word.trim();
27-
if (word.length() > 0) {
33+
if (word.length() > 0) {// emit one tuple <WORD, 1> for each WORD
2834
context.write(new Text(word.toLowerCase()), WordCountMapper.ONE);
2935
}
3036
}

‎hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java‎

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,33 @@
66
import org.apache.hadoop.io.Text;
77
import org.apache.hadoop.mapreduce.Reducer;
88

9+
/**
10+
* This is the reducer class of the MapReduce Word Count example. It also
11+
* acts as combiner, i.e., it is applied two times:
12+
* <ol>
13+
* <li>As combiner, i.e., directly after the mapping process on each mapper
14+
* node. This way, the data is "reduced" before being sent on. During this
15+
* application, all input tuples of the reducer are of the form
16+
* {@code <WORD, List<1, 1, 1, ... 1>>}. The reducer here creates output
17+
* tuples of the form {@code <WORD, Sum(List...)>}. These tuples have the
18+
* same format as the mapper's output tuples, just with integers which
19+
* might be larger than 1. They are sent on to the real reducer steps.</li>
20+
* <li>As actual reducer: Here all the tuples produced by the combiners
21+
* arrive. There may be multiple entries in the lists of these tuples,
22+
* since the same word may have been found on different computers.</li>
23+
* </ol>
24+
*/
925
public class WordCountReducer
1026
extends Reducer<Text, IntWritable, Text, IntWritable> {
1127

1228
@Override
1329
protected void reduce(final Text key, final Iterable<IntWritable> values,
1430
final Context context) throws IOException, InterruptedException {
15-
31+
// we receive tuples of the type <WORD, IntWritable> for each WORD
1632
int count = 0;
17-
for (final IntWritable current : values) {
33+
for (final IntWritable current : values) {// we add up all the ints
1834
count += current.get();
1935
}
20-
context.write(key, new IntWritable(count));
36+
context.write(key, new IntWritable(count));// and emit the final count
2137
}
22-
2338
}

‎mpi/structScatter.c‎

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,17 @@ int main(int argc, char *argv[]) {
5252
MPI_Comm_size(MPI_COMM_WORLD, &size); // get number of processes
5353

5454
send = (myStruct*)malloc(sizeof(myStruct) * size); // allocate memory
55-
for(i = size; (--i) >= 0; ) {
55+
for(i = size; (--i) >= 0; ) {// store some dummy data
5656
send[i].myIntA = rank;
5757
send[i].myIntB = i;
5858
send[i].myShort = (rank * rank) % size;
59-
send[i].myDouble = *((double*)("Hi you!"));
59+
send[i].myDouble = *((double*)("Hi you!"));// haha
6060
send[i].myChar = 'V';
6161
send[i].myFloat = (i / (float)size);
6262
}
6363
}
6464

65-
MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);
65+
MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);//scatter the structs
6666

6767
printf("%d: received: myIntA=%d, myIntB=%d, myShort=%d, myDouble=\"%s\", myChar='%c', myFloat=%f.\n",
6868
rank, data.myIntA, data.myIntB, data.myShort, (char*)&data.myDouble, data.myChar, data.myFloat);

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /