Example usage for org.apache.hadoop.mapreduce Job Job

List of usage examples for org.apache.hadoop.mapreduce Job Job

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job Job.

Prototype

Job(JobStatus status, JobConf conf) throws IOException 

Source Link

Usage

From source file:com.alectenharmsel.research.MoabLogSearch.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLogSearch <input> <output>");
        System.exit(-1);//from  w  ww  .j a  v  a2 s  .  c  o m
    }

    Job job = new Job(getConf(), "MoabLogSearch");
    job.setJarByClass(MoabLogSearch.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MoabLogSearchMapper.class);
    job.setReducerClass(MoabLogSearchReducer.class);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    Configuration check = job.getConfiguration();
    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.alectenharmsel.research.SrcTok.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MoabLicenses <input> <output>");
        System.exit(-1);/* w  w  w .j a  v  a  2s  .co m*/
    }

    Configuration conf = getConf();
    Job job = new Job(conf, "SrcTok");
    job.setJarByClass(SrcTok.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SrcTokMapper.class);
    job.setReducerClass(SrcTokReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean success = job.waitForCompletion(true);

    return success ? 0 : 1;
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);//from   ww  w. j a  v  a 2 s  . c om
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.antbrains.crf.hadoop.FeatureCounter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcount <in> <out> <templatefile>");
        System.exit(2);/*from w ww.  jav a  2 s.  c o m*/
    }

    String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]);
    conf.set("templates", strArr2Str(templates));

    Job job = new Job(conf, FeatureCounter.class.getSimpleName());

    job.setJarByClass(FeatureCounter.class);
    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(SumReducer.class);
    job.setReducerClass(SumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.FeatureFilter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("Usage: wordcount <in> <out> filterRuleFile statOnly");
        System.exit(-1);/*from w  w w  .  j a v  a 2  s  .  c  o  m*/
    }

    boolean statOnly = true;
    if (otherArgs[3].equalsIgnoreCase("false")) {
        statOnly = false;
    }
    conf.set("statOnly", statOnly + "");

    String rules = FileTools.readFile(otherArgs[2], "UTF8");
    conf.set("rules", rules);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, FeatureFilter.class.getSimpleName());

    job.setJarByClass(FeatureFilter.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.FeatureStat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out> ");
        System.exit(2);//from  w  w  w .  jav a 2s  .c  o m
    }

    Job job = new Job(conf, FeatureStat.class.getSimpleName());

    job.setJarByClass(FeatureStat.class);
    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.InstanceGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("InstanceGenerator <in> <out> <featuredict> <template>");
        System.exit(-1);//from   w ww  .  ja  va2s . co  m
    }
    Template template = new Template(otherArgs[3], "UTF8");
    conf.set("template", object2String(template));
    // conf.set("tc", object2String(tc));

    DistributedCache.addCacheFile(new URI(otherArgs[2]), conf);
    conf.set("dict", otherArgs[2]);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, InstanceGenerator.class.getSimpleName());

    job.setJarByClass(InstanceGenerator.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>");
        System.exit(-1);//from  ww  w  .jav  a  2s .c  om
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));

    conf.set("pt.iterate", "1");
    conf.set("pt.featureCount", featureCount + "");

    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));
    conf.set("pt.params", object2String(params));

    Job job = new Job(conf, ParallelTraining.class.getSimpleName());

    job.setJarByClass(ParallelTraining.class);
    job.setMapperClass(TrainingMapper.class);
    job.setReducerClass(TrainingReducer.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TrainingWeights.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining2.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));

    if (otherArgs.length != 5) {
        System.err.println(/*from   w w w . j  av a 2s .c  o m*/
                "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>");
        System.exit(-1);
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));
    int outIter = Integer.valueOf(otherArgs[4]);

    String prevOutDir = "";
    for (int i = 1; i <= outIter; i++) {
        System.out.println("iterator: " + i);
        conf.set("pt.iterate", i + "");
        conf.set("pt.featureCount", featureCount + "");

        conf.set("pt.params", object2String(params));
        String outDir = otherArgs[1] + "/result" + i;

        if (i > 1) {
            conf.set("paramDir", prevOutDir);
        }
        prevOutDir = outDir;
        fs.delete(new Path(outDir), true);

        Job job = new Job(conf, ParallelTraining2.class.getSimpleName());

        job.setJarByClass(ParallelTraining2.class);
        job.setMapperClass(TrainingMapper.class);
        job.setReducerClass(TrainingReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        System.out.println("outDir: " + outDir);
        FileOutputFormat.setOutputPath(job, new Path(outDir));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("iter " + i + " failed");
            break;
        }
    }
}

From source file:com.antbrains.crf.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//from   w  w w .ja va2 s  . c  o m
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}