Example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths

List of usage examples for org.apache.hadoop.mapred FileInputFormat setInputPaths

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths.

Prototype

public static void setInputPaths(JobConf conf, Path... inputPaths) 

Source Link

Document

Set the array of Path s as the list of inputs for the map-reduce job.

Usage

From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.FeatureCountHadoopDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 2) {
        System.out.println(/*from w  w  w  .ja  va  2 s .  c  om*/
                "Usage: " + this.getClass().getSimpleName() + " [hadoop-params] input output [job-params]");
        System.exit(1);
    }
    this.job = new JobConf(getConf(), DkproHadoopDriver.class);

    // set the factory class name
    this.job.set("dkpro.uima.countablefeatureextractor", getCountableFeatureExtractorClass().getName());

    final Path inputPath = new Path(args[0]);
    final Path outputPath = new Path(args[1]);

    // this is a sensible default for the UKP cluster
    int numMappers = 76;
    int numReducers = 76;

    FileInputFormat.setInputPaths(this.job, inputPath);
    FileOutputFormat.setOutputPath(this.job, outputPath);

    // setup some sensible defaults
    this.job.setMapperClass(CountableFeatureMapper.class);
    this.job.setCombinerClass(CountableFeatureReducer.class);
    this.job.setReducerClass(CountableFeatureReducer.class);
    this.job.setInputFormat(SequenceFileInputFormat.class);
    this.job.setOutputFormat(TextOutputFormat.class);
    this.job.setMapOutputKeyClass(Text.class);
    this.job.setMapOutputValueClass(LongWritable.class);
    this.job.setOutputKeyClass(Text.class);
    this.job.setOutputValueClass(LongWritable.class);
    this.job.setJobName(this.getClass().getSimpleName());
    this.job.setInt("mapred.job.map.memory.mb", 1280);
    this.job.setInt("mapred.job.reduce.memory.mb", 1280);
    this.job.setNumMapTasks(numMappers);
    this.job.setNumReduceTasks(numReducers);
    configure(this.job);

    // create symlinks for distributed resources
    DistributedCache.createSymlink(this.job);
    // sLogger.info("Running job "+job.getJobName());

    JobClient.runJob(this.job);

    return 0;
}

From source file:dinocode.SpeciesGraphBuilder.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Page-rank Species Graph Builder");
    final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result";
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    //conf.setOutputKeyClass(Text.class); 
    //conf.setOutputValueClass(Text.class); 
    conf.setMapperClass(SpeciesGraphBuilderMapperd.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); 
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 
    conf.setReducerClass(SpeciesGraphBuilderReducerd.class);
    //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); 

    //conf.setInputPath(new Path("graph1")); 
    //conf.setOutputPath(new Path("graph2")); 
    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    client.setConf(conf);/*from w ww . j  a va  2  s .  com*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000";
    for (int i = 0; i < 500; i++) {
        client = new JobClient();
        conf = new JobConf(SpeciesDriver.class);
        conf.setJobName("Species Iter");

        int count = i + 1;
        outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count;
        conf.setNumReduceTasks(5);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(conf, new Path(inFiles));
        FileOutputFormat.setOutputPath(conf, new Path(outFiles));

        conf.setMapperClass(SpeciesIterMapper2d.class);
        conf.setReducerClass(SpeciesIterReducer2d.class);
        conf.setCombinerClass(SpeciesIterReducer2d.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
        inFiles = outFiles;

    }

    //Viewer
    client = new JobClient();
    conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Species Viewer");

    conf.setOutputKeyClass(FloatWritable.class);
    conf.setOutputValueClass(Text.class);

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000";
    outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal";

    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    conf.setMapperClass(SpeciesViewerMapperd.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    client.setConf(conf);
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:drivers.CalculatePageRank.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, this.getClass());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("Fiqie|Calculate");
    job.setMapperClass(CalculatePageRank1Mapper.class);
    job.setReducerClass(CalculatePageRank1Reducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    JobClient.runJob(job);//from w  w  w . j av a2  s . co  m
    return 0;
}

From source file:drivers.FinishPageRank.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, this.getClass());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("Fiqie|Finish");
    job.setOutputKeyComparatorClass(DecreasingComparator.class);
    job.setMapperClass(SortingPageRankMapper.class);
    job.setReducerClass(SortingPageRankReducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    JobClient.runJob(job);/*from  www. j  a v  a  2 s .co m*/
    return 0;
}

From source file:drivers.InitPageRank.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobConf job = new JobConf(conf, this.getClass());
    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("Fiqie|Init");
    job.setMapperClass(InitPageRankMapper.class);
    job.setReducerClass(InitPageRankReducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    JobClient.runJob(job);//ww  w. j ava 2s. co m
    return 0;
}

From source file:edu.berkeley.amplab.adam.modules.CountReads.java

License:Apache License

@Override
public int moduleRun() throws Exception {
    JobConf conf = new JobConf(getConf(), CountReads.class);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    AvroJob.setMapperClass(conf, CountReadsMapper.class);
    AvroJob.setCombinerClass(conf, CountReadsReducer.class);
    AvroJob.setReducerClass(conf, CountReadsReducer.class);

    // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
    // relevant config options such as input/output format, map output
    // classes, and output key class.
    AvroJob.setInputSchema(conf, ADAMRecord.SCHEMA$);
    AvroJob.setOutputSchema(conf,/*w  w w.  j a va 2  s .  c  o  m*/
            Pair.getPairSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)));
    JobClient.runJob(conf);
    return 0;
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runCombine() throws Exception {
    if (this.last_job == null) {
        throw new NullPointerException("ERROR: Last job is Null");
    }//ww w . java2s .  c o m
    JobConf job = new JobConf(this.conf, this.benchmarkClass);
    job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine");
    job.setMapperClass(IdentityMapper.class);
    job.setNumMapTasks(0);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(1); // this is needed to get a single output file

    // Input
    FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job));
    job.setInputFormat(KeyValueTextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job,
            new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    JobConf real_last_job = this.last_job;
    this.runJob(job);
    this.last_job = real_last_job;
    return;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);

    // -------------------------------------------
    // Phase #1// w w  w.  j a  v  a  2 s .co m
    // -------------------------------------------
    JobConf p1_job = base.getJobConf();
    p1_job.setJobName(p1_job.getJobName() + ".Phase1");
    Path p1_output = new Path(base.getOutputPath().toString() + "/phase1");
    FileOutputFormat.setOutputPath(p1_job, p1_output);

    //
    // Make sure we have our properties
    //
    String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE };
    for (String req : required) {
        if (!base.getOptions().containsKey(req)) {
            System.err.println("ERROR: The property '" + req + "' is not set");
            System.exit(1);
        }
    } // FOR

    p1_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p1_job.setOutputFormat(SequenceFileOutputFormat.class);
    p1_job.setOutputKeyClass(Text.class);
    p1_job.setOutputValueClass(Text.class);
    p1_job.setMapperClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class);
    p1_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class);
    p1_job.setCompressMapOutput(base.getCompress());

    // -------------------------------------------
    // Phase #2
    // -------------------------------------------
    JobConf p2_job = base.getJobConf();
    p2_job.setJobName(p2_job.getJobName() + ".Phase2");
    p2_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p2_job.setOutputFormat(SequenceFileOutputFormat.class);
    p2_job.setOutputKeyClass(Text.class);
    p2_job.setOutputValueClass(Text.class);
    p2_job.setMapperClass(IdentityMapper.class);
    p2_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class);
    p2_job.setCompressMapOutput(base.getCompress());
    p2_job.setNumMapTasks(60);

    // -------------------------------------------
    // Phase #3
    // -------------------------------------------
    JobConf p3_job = base.getJobConf();
    p3_job.setJobName(p3_job.getJobName() + ".Phase3");
    p3_job.setNumReduceTasks(1);
    p3_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    p3_job.setOutputKeyClass(Text.class);
    p3_job.setOutputValueClass(Text.class);
    //p3_job.setMapperClass(Phase3Map.class);
    p3_job.setMapperClass(IdentityMapper.class);
    p3_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class);

    //
    // Execute #1
    //
    base.runJob(p1_job);

    //
    // Execute #2
    //
    Path p2_output = new Path(base.getOutputPath().toString() + "/phase2");
    FileOutputFormat.setOutputPath(p2_job, p2_output);
    FileInputFormat.setInputPaths(p2_job, p1_output);
    base.runJob(p2_job);

    //
    // Execute #3
    //
    Path p3_output = new Path(base.getOutputPath().toString() + "/phase3");
    FileOutputFormat.setOutputPath(p3_job, p3_output);
    FileInputFormat.setInputPaths(p3_job, p2_output);
    base.runJob(p3_job);

    // There does need to be a combine if (base.getCombine()) base.runCombine();

    return 0;
}

From source file:edu.brown.cs.mapreduce.demo.OrderSum.java

License:Open Source License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the job tracker.
 *//*from   w  ww .  ja v a2 s.  c  om*/
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(this.getConf(), OrderSum.class);
    conf.setJobName(OrderSum.class.getSimpleName());

    // Input File Format
    conf.setInputFormat(KeyValueTextInputFormat.class);

    // Output Key/Value Types
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    // Map/Reduce Classes
    conf.setMapperClass(OrderSum.OrderSumMapper.class);
    conf.setReducerClass(OrderSum.OrderSumReducer.class);

    // Input/Output Paths (HDFS)
    FileInputFormat.setInputPaths(conf, "/demo/input/");
    FileOutputFormat.setOutputPath(conf, new Path("/demo/output/"));

    /***** Additional Features *****/
    // Compression
    //conf.setCompressMapOutput(true);

    // Combine
    //conf.setCombinerClass(OrderSum.OrderSumReducer.class);

    // Create a single output file
    conf.setNumReduceTasks(1);

    // Pass search date on command-line
    /* uncomment configure!
    if (args.length == 1) {
       conf.set("edu.brown.cs.pavlo.search_date", args[0]);
    }*/

    // Bombs away!
    JobClient.runJob(conf);

    return 0;
}

From source file:edu.iit.marketbasket.MarketBasket.java

public static void main(String[] args) throws IOException {

    JobConf conf = new JobConf(MarketBasket.class);
    conf.setJobName("MarketBasket");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    //conf.setJarByClass(MarketBasket.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    JobClient.runJob(conf).waitForCompletion();
}