countTheGivenWords.searchAndCountJob.java Source code

Java tutorial

Introduction

Here is the source code for countTheGivenWords.searchAndCountJob.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package countTheGivenWords;

import hdfsIO.fileInteractions;
import invertedIndex.lineIndexMapper;
import invertedIndex.lineIndexReducer;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.List;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Hdfs;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.WordCount;

/**
 *
 * @author madhatter
 */
public class searchAndCountJob {

    public static String SearchPhrase = "world";

    public static void start(String[] args) {
        try {

            JobConf conf = new JobConf(WordCount.class);
            conf.setJobName("wordcount");

            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(IntWritable.class);

            searchAndCountMapper Map = new searchAndCountMapper();
            conf.setMapperClass(Map.getClass());

            searchAndCountReducer Reduce = new searchAndCountReducer();
            conf.setCombinerClass(Reduce.getClass());
            conf.setReducerClass(Reduce.getClass());

            conf.setInputFormat(TextInputFormat.class);
            conf.setOutputFormat(TextOutputFormat.class);

            FileInputFormat.setInputPaths(conf, new Path(args[1]));

            Path outputDir = new Path(args[2]);

            outputDir.getFileSystem(conf).delete(outputDir, true);
            FileSystem fs = FileSystem.get(conf);
            fs.delete(outputDir, true);

            FileOutputFormat.setOutputPath(conf, outputDir);

            JobClient.runJob(conf);

            FileSystem FS = FileSystem.get(conf);

            Path src = new Path(FS.getWorkingDirectory() + "/output/part-00000");

            if (FS.exists(src)) {
                System.out.println("\t\t------ Results ------ ");
                /*
                 BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(src)));
                 String line;
                 line = br.readLine();
                 while (line != null) {
                 System.out.println("\t" + line);
                 line = br.readLine();
                 }
                 */

                List<String> FileList = (new fileInteractions()).readLines(src, conf);
                for (String LocString : FileList) {

                    System.out.println(LocString);
                }
            }

        } catch (Exception Exp) {

            Exp.printStackTrace();
        }
    }
}