In MapReduce applications, the input data is loaded into HDFS. The input file from HDFS is fed into mappers class for processing. Then output of map phase is sorted and merged then it is given to reducer class. The reducer class aggregates the input and writes the output to HDFS. In Map Reduce Programming, each job contains own class (mainclass) and also contains static sub-classes for Mapper and Reducer that overrides the processes in map() and reduce().
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class MainClass {
public static class SampleMapper extends Mapper<map input key, map input value, map output key, map output value> {
@override
public void map () {
}
}
public static class SampleReducer extends Reducer<map output key, map output value,reduce output key, reduce output value> {
@override
public void reduce () {
}
}
public static void main(String[] args) throws Exception {
Job job = new Job(conf, “Sample”);
Path inputPath = new Path(“hdfs://localhost:54310/sampleinput”);
Path outputDir = new Path(“hdfs://localhost:54310/sampleoutput”);
job.setOutputKeyClass(OutputKey.class);
job.setOutputValueClass(OutputValue.class);
job.setMapperClass(SampleMapper.class);
job.setReducerClass(SampleReducer.class); job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputDir);
}
}