Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.TopDomainCounter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance();
    job.setJarByClass(TopDomainCounter.class);

    job.setJobName(TopDomainCounter.class.getName());

    // mapper/*w w  w  .jav  a 2  s .c om*/
    job.setMapperClass(DomainMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // combiner + reducer
    job.setCombinerClass(TextLongCountingReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.vocabulary.WordDistributionStatisticsCollector.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    job.setJarByClass(WordDistributionStatisticsCollector.class);
    job.setJobName(WordDistributionStatisticsCollector.class.getName());

    // mapper/*from   ww  w . ja  v a  2  s  .  co m*/
    job.setMapperClass(getMapperClass());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // reducer
    job.setReducerClass(SumReducer.class);
    job.setInputFormatClass(getInputFormatClass());
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.statistics.WARCRecordCounter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    System.out.println("Other args: " + Arrays.toString(otherArgs));

    Job job = Job.getInstance(conf);
    job.setJarByClass(WARCRecordCounter.class);

    job.setJobName(WARCRecordCounter.class.getName());

    // mapper/*from   w  w  w  .j  a  v  a2 s. c o m*/
    job.setMapperClass(ResponseMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // combiner + reducer
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.PagesByURLExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    for (Map.Entry<String, String> next : job.getConfiguration()) {
        System.out.println(next.getKey() + ": " + next.getValue());
    }//  w  w w  .  ja  v  a 2s . c  o m

    job.setJarByClass(PagesByURLExtractor.class);
    job.setJobName(PagesByURLExtractor.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // input
    job.setInputFormatClass(WARCInputFormat.class);

    // output
    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setCompressOutput(job, true);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    // load IDs to be searched for
    job.getConfiguration().set(MAPREDUCE_MAPPER_URLS, loadURLs(args[2]));

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.utils.TextToSentencesSplitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf);
    job.setJarByClass(TextToSentencesSplitter.class);

    job.setJobName(TextToSentencesSplitter.class.getName());

    // mapper/*w ww . j  a v  a  2s . c  om*/
    job.setMapperClass(TextToSentencesSplitter.MapperClass.class);
    job.setInputFormatClass(WARCInputFormat.class);

    // reducer
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.ClueWebTRECIdFileExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());

    for (Map.Entry<String, String> next : job.getConfiguration()) {
        System.out.println(next.getKey() + ": " + next.getValue());
    }// w  w  w.  j  a v a2 s .co  m

    job.setJarByClass(ClueWebTRECIdFileExtractor.class);
    job.setJobName(ClueWebTRECIdFileExtractor.class.getName());

    // mapper
    job.setMapperClass(MapperClass.class);

    // input
    job.setInputFormatClass(WARCInputFormat.class);

    // output
    job.setOutputFormatClass(WARCOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(WARCWritable.class);
    FileOutputFormat.setCompressOutput(job, true);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    // load IDs to be searched for
    job.getConfiguration().set(MAPREDUCE_MAPPER_TREC_IDS, loadTrecIds(args[2]));

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:de.tudarmstadt.ukp.experiments.dip.hadoop.OriginalURLGrep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.hadoop.conf.Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    System.out.println("Other args: " + Arrays.toString(otherArgs));

    Job job = Job.getInstance();
    job.setJarByClass(OriginalURLGrep.class);

    job.setJobName(OriginalURLGrep.class.getName());
    job.setMapperClass(OrigURLGrepMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    // cache file - IDs for index
    String idFile = args[2];/*w  ww  .  j ava 2 s. com*/
    System.err.println("idFile: " + idFile);
    job.addCacheFile(new URI(idFile + "#" + NODE_IDS));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];
    System.err.println("commaSeparatedInputFiles: " + commaSeparatedInputFiles);
    System.err.println("outputPath: " + outputPath);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:demo.SsJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf, "secondary sort");

    job.setJarByClass(SsJob.class);
    job.setPartitionerClass(NaturalKeyPartitioner.class);
    job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
    job.setSortComparatorClass(CompositeKeyComparator.class);

    job.setMapOutputKeyClass(StockKey.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(SsMapper.class);
    job.setReducerClass(SsReducer.class);

    job.waitForCompletion(true);/*from   w  ww . j  a  v  a2  s  . c  o  m*/

    return 0;
}

From source file:diamondmapreduce.DiamondMapReduce.java

License:Apache License

int launchHamond(String[] arguments) throws Exception {

    //extract diamond, query, reference and output from array
    String diamond = arguments[0];
    String query = arguments[1];/*w w  w.j a va2 s. c o m*/
    String dataBase = arguments[2];
    String outPut = arguments[3];

    //set Hadoop configuration
    Job job = Job.getInstance(getConf(), "DIAMOND");
    Configuration conf = job.getConfiguration();
    SetConf.setHadoopConf(conf);

    //get user name
    userName = HadoopUser.getHadoopUser();

    //delete all existing DIAMOND files under current Hadoop user
    DeleteHDFSFiles.deleteAllFiles(userName);

    //make Hamond directory on HDFS
    MakeHamondHDFSdir.makedir(conf, userName);

    //make DIAMOND database on local then copy to HDFS with query and delete local database
    MakeDB.makeDB(diamond, dataBase);

    //copy DIAMOND bin, query and local database file to HDFS
    CopyFromLocal.copyFromLocal(conf, diamond, query, dataBase, userName);

    //pass query name and database name to mappers
    conf.set(QUERY, query);
    conf.set(DATABASE, dataBase + ".dmnd");
    String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length);
    conf.setStrings("DIAMOND-arguments", subArgs);
    conf.setStrings(OUTPUT, outPut);

    //add DIAMOND bin and database into distributed cache
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond"));
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd"));

    //set job input and output paths
    FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName()));
    FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out"));

    //set job driver and mapper
    job.setJarByClass(DiamondMapReduce.class);
    job.setMapperClass(DiamondMapper.class);

    //set job input format into customized multilines format
    job.setInputFormatClass(CustomNLineFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    return job.waitForCompletion(true) ? 0 : 1;

}