Example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileInputFormat setInputPaths.

Prototype

public static void setInputPaths(JobConf conf, Path... inputPaths)

Source Link

Document

Set the array of Path s as the list of inputs for the map-reduce job.

Usage

From source file:adept.mapreduce.MapReduce.java

License:Apache License

public JobConf getConfiguration(String inputPath, String outputPath, String mapClass) throws Exception {
    //Configuration conf = getConf();
    Class thisclass = getClass();
    JobConf job = new JobConf(new Configuration(), thisclass);

    try {//w ww.jav  a2  s . c o  m
        Path in = new Path(inputPath);

        Path out = new Path(outputPath);
        FileInputFormat.setInputPaths(job, in);
        FileOutputFormat.setOutputPath(job, out);

        job.setJobName("Algorithm Map-Reduce");
        job.setMapperClass((Class<? extends Mapper>) Class.forName(mapClass));

    } catch (Exception e) {
        throw new RuntimeException("Exception occurred: " + e.getMessage());
    }

    job.setReducerClass(AdeptReducer.class);
    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.set("key.value.separator.in.input.line", "\t");

    return job;
}

From source file:adept.mapreduce.MapReduceExample.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    //Configuration conf = new Configuration();

    JobConf job = new JobConf(conf, MapReduceExample.class);

    Path in = new Path(args[0]);

    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);

    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("MapReduecExample");

    job.setMapperClass(MapClass.class);

    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);

    job.setOutputFormat(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);

    job.setOutputValueClass(Text.class);

    job.set("key.value.separator.in.input.line", ",");

    JobClient.runJob(job);/* w w  w  .  j  a  va 2 s  .c  om*/

    return 0;

}

From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java

License:Apache License

private void runIOTest(Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, Path outputDir)
        throws IOException {
    JobConf job = new JobConf(mConfig, DFSIOIntegrationTest.class);

    FileInputFormat.setInputPaths(job, getControlDir(mConfig));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);/* ww  w . j  a  va  2 s . c o  m*/
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
}

From source file:arrestsbyyear.ArrestsByYear.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, ArrestsByYear.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("ArrestsByYear");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);//from  w w w .  j  ava  2  s . co  m

    return 0;
}

From source file:average.AverageDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(average.AverageDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(average.AverageMapper.class);
    conf.setReducerClass(average.AverageReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);/*from w w  w  . j  a  va  2s  .c  o  m*/
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:averageprocessingtimesbytype.AverageProcessingTimesByType.java

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    JobConf job = new JobConf(conf, AverageProcessingTimesByType.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("AverageProcessingTimesByType");
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormat(KeyValueTextInputFormat.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //   job.set("key.value.separator.in.input.line", "");

    JobClient.runJob(job);/* ww  w  . j  a v a2s . c  o  m*/

    return 0;
}

From source file:boa.datagen.SeqSort.java

License:Apache License

/**
 * The main driver for sort program.//from  w ww .j  a v a  2 s  .  c om
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println(inPath);

    JobConf jobConf = new JobConf(getConf(), SeqSort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(BytesWritable.class);

    SequenceFileOutputFormat.setCompressOutput(jobConf, true);
    SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK);

    // Make sure there are exactly 2 parameters left.
    FileInputFormat.setInputPaths(jobConf, inPath);
    FileOutputFormat.setOutputPath(jobConf, new Path(outPath));

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:br.eti.kinoshita.hadoop.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJarByClass(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca."));
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*from  w w  w . jav a  2  s  .co m*/
}

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(IndexerMain.class);
    conf.setJobName("indexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IndexerMap.class);
    conf.setCombinerClass(IndexerReduce.class);
    conf.setReducerClass(IndexerReduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/*www .  ja v a  2  s.c om*/
}

From source file:BU.MET.CS755.SpeciesIterDriver2.java

static boolean MRGraphBuilder(String args[], int iterCnt) {
    Job theJob = null;/*  w  w  w.ja  v  a2s  .c o m*/

    conf = new JobConf(SpeciesIterDriver2.class);
    conf.setJobName("Species Graph Builder");
    conf.setNumReduceTasks(5);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(SpeciesGraphBuilderMapper.class);
    conf.setReducerClass(SpeciesGraphBuilderReducer.class);

    // Reading in XML.
    conf.setInputFormat(StreamInputFormat.class);
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");

    // Look for the <page> record in the XML.
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");

    inputpath = args[0];
    outputpath = args[1] + iterCnt;

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    try {
        theJob = new Job(conf, "SpeciesIter");
        theJob.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

    try {
        if (theJob != null) {
            theJob.waitForCompletion(true);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return true;
}