Example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths

List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths.

Prototype

public static void setInputPaths(JobConf conf, Path... inputPaths) 

Source Link

Document

Set the array of Path s as the list of inputs for the map-reduce job.

Usage

From source file:edu.ldzm.analysis.AnalysisSummary.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job./*from   w w w. j a v a  2 s.  c o  m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AnalysisSummary.class);
    conf.setJobName("analysis_summery");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    boolean param = false;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param = true;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (!param) {
        System.out.println("-l namelist.txt");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.ldzm.average.AverageResponseTime.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job.//from  w  w w  .  j av a  2s  .c o m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AverageResponseTime.class);
    conf.setJobName("average_response_time");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    int param = 0;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param++;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else if ("-i".equals(args[i])) {
                param++;
                conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (param != 2) {
        System.out.println("-l  -i?");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.ncku.ikdd.ArtistAnalysis.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(ArtistAnalysis.class);
    conf.setJobName("artistanalysis");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/*ww w  .  j a v  a 2  s  .c  om*/
}

From source file:edu.ncku.ikdd.DataMining.java

public static void main(String[] argv) throws Exception {
    int candidateLength = 1;
    FileSystem dfs = FileSystem.get(new Configuration());
    do {/*from   www. j av a 2s  .c o m*/
        JobConf countConf = new JobConf(DataMining.class);

        countConf.setOutputKeyClass(Text.class);
        countConf.setOutputValueClass(IntWritable.class);

        countConf.setMapperClass(CountMap.class);
        countConf.setCombinerClass(CountCombine.class);
        countConf.setReducerClass(CountReduce.class);

        countConf.setInputFormat(TextInputFormat.class);
        countConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(countConf, new Path(argv[0]));
        FileOutputFormat.setOutputPath(countConf, new Path(count_path + String.valueOf(candidateLength)));
        countConf.setInt("minSupport", Integer.valueOf(argv[2]));
        countConf.setInt("candidateLength", candidateLength);
        JobClient.runJob(countConf);

        ++candidateLength;

        JobConf candidateConf = new JobConf(DataMining.class);

        candidateConf.setOutputKeyClass(Text.class);
        candidateConf.setOutputValueClass(Text.class);

        candidateConf.setMapperClass(CandidateMap.class);
        candidateConf.setReducerClass(CandidateReduce.class);

        candidateConf.setInputFormat(TextInputFormat.class);
        candidateConf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(candidateConf,
                new Path(count_path + String.valueOf(candidateLength - 1) + "/part-00000"));
        FileOutputFormat.setOutputPath(candidateConf,
                new Path(candidate_path + String.valueOf(candidateLength)));
        candidateConf.setInt("candidateLength", candidateLength);

        JobClient.runJob(candidateConf);

    } while (dfs.getFileStatus(new Path(candidate_path + String.valueOf(candidateLength) + "/part-00000"))
            .getLen() > 0);

    BufferedReader br;
    BufferedWriter bw = new BufferedWriter(
            new OutputStreamWriter(dfs.create(new Path(argv[1] + "/part-00000"))));
    String line;
    for (int i = 1; i < candidateLength; ++i) {
        br = new BufferedReader(
                new InputStreamReader(dfs.open(new Path(count_path + String.valueOf(i) + "/part-00000"))));
        while ((line = br.readLine()) != null) {
            bw.write(line + "\n");
        }
        br.close();
    }
    bw.close();
}

From source file:edu.ncku.ikdd.TempRecord.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(TempRecord.class);
    conf.setJobName("temprecord");

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);//from  w ww  .j a  va2s .c o  m
}

From source file:edu.ncku.ikdd.TitleParser.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(TitleParser.class);
    conf.setJobName("titleparser");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);/*from   ww  w . j  a v a  2 s. c  om*/
}

From source file:edu.ncku.ikdd.WordCount.java

public static void main(String[] argv) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(argv[0]));
    FileOutputFormat.setOutputPath(conf, new Path(argv[1]));

    JobClient.runJob(conf);//from ww w . j  a v  a  2 s  . c  o  m
}

From source file:edu.ub.ahstfg.indexer.Indexer.java

License:Open Source License

@Override
public int run(String[] arg0) throws Exception {
    LOG.info("Creating Hadoop job for Indexer.");
    JobConf job = new JobConf(getConf());
    job.setJarByClass(Indexer.class);

    LOG.info("Setting input path to '" + INPUT_PATH + "'");
    FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
    // Set filters if it's necessary.

    LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'");
    // Change URI to Path if it's necessary.
    FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job);

    if (fs.exists(new Path(OUTPUT_PATH))) {
        fs.delete(new Path(OUTPUT_PATH), true);
    }//ww  w. j  a v a2s  .c  o  m

    LOG.info("Setting output path to '" + OUTPUT_PATH + "'");
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Setting input format.");
    job.setInputFormat(ArcInputFormat.class);
    LOG.info("Setting output format.");
    job.setOutputFormat(IndexOutputFormat.class);

    LOG.info("Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IndexRecord.class);

    LOG.info("Setting mapper and reducer.");
    job.setMapperClass(IndexerMapper.class);
    job.setMapOutputValueClass(ParsedDocument.class);
    job.setReducerClass(IndexerReducer.class);

    if (JobClient.runJob(job).isSuccessful()) {
        return 0;
    } else {
        return 1;
    }
}

From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    LOG.info("Creating Hadoop job for ARC input files word count.");
    JobConf job = new JobConf(getConf());
    job.setJarByClass(WordCount.class);

    LOG.info("Setting input path to '" + inputPath + "'");
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    // Set filters if it's necessary.

    LOG.info("Clearing the output path at '" + outputPath + "'");
    // Change URI to Path if it's necessary.
    FileSystem fs = FileSystem.get(new URI(outputPath), job);

    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }//from   w  ww. j a v  a 2  s.com

    LOG.info("Setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Setting input format.");
    // job.setInputFormat(TextInputFormat.class);
    job.setInputFormat(ArcInputFormat.class);
    LOG.info("Setting output format.");
    job.setOutputFormat(TextOutputFormat.class);

    LOG.info("Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    LOG.info("Setting mapper and reducer.");
    // job.setMapperClass(WordCountTextInputMapper.class);
    job.setMapperClass(WordCountArcInputMapper.class);
    job.setReducerClass(LongSumReducer.class);

    if (JobClient.runJob(job).isSuccessful()) {
        return 0;
    } else {
        return 1;
    }
}

From source file:edu.ub.ahstfg.kmeans.KmeansIteration.java

License:Open Source License

@Override
public int run(String[] args) throws IOException, URISyntaxException {
    job = new JobConf(getConf());
    params.toJobConf(job);/*from w w  w . j  a  va2  s  .c  o  m*/
    job.setInt(ParamSet.N_ITERATION, nIter);

    LOG.info("Iteration " + nIter + " > Iniciating Kmeans iteration " + nIter);
    job.setJarByClass(KmeansIteration.class); // TODO may change for Clusterizer.class

    LOG.info("Iteration " + nIter + " > Setting input path to '" + inputPath + "'");
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileInputFormat.setInputPathFilter(job, SampleFilter.class);

    LOG.info("Iteration " + nIter + " > Clearing the output path at '" + outputPath + "'");
    FileSystem fs = FileSystem.get(new URI(outputPath), job);

    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }

    LOG.info("Iteration " + nIter + " > Setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    LOG.info("Iteration " + nIter + " > Setting input format.");
    job.setInputFormat(IndexInputFormat.class);
    LOG.info("Iteration " + nIter + " > Setting output format.");
    job.setOutputFormat(TextOutputFormat.class);

    LOG.info("Iteration " + nIter + " > Setting output data types.");
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    LOG.info("Iteration " + nIter + " > Setting mapper and reducer.");
    job.setMapperClass(KmeansMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DocumentDistance.class);
    job.setReducerClass(KmeansReducer.class);

    LOG.info("Iteration " + nIter + " > Running job...");
    boolean done = JobClient.runJob(job).isSuccessful();
    LOG.info("Iteration " + nIter + " > Job done.");
    if (done) {
        return 0;
    } else {
        return 1;
    }
}