This source code is applied when large dataset is required to split into subsets for efficient processing in single machine. Data into the different file is written using the MultipleOutputs Object available in hadoop.mapreduce package that writes the data depending on the reducer key. Before the output is written to a file, it is initialized in setup method.
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class ReducerClass extends Reducer<Text, Text, NullWritable, Text> {
private MultipleOutputs<NullWritable,Text> multioutput;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
multioutput= new MultipleOutputs<NullWritable,Text> (context);
}
@Override
protected void reduce(Text key,Iterable<Text> value,Context context) throws IOException, InterruptedException {
String fileName = key.toString()+Constants.FILE_NAME_PREFIX;
for (Text val : value) {
multioutput.write(NullWritable.get(), val, fileName);
}