Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored, Configuration conf) throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster and given Configuration .

Usage

From source file:diamondmapreduce.DiamondMapReduce.java

License:Apache License

int launchHamond(String[] arguments) throws Exception {

    //extract diamond, query, reference and output from array
    String diamond = arguments[0];
    String query = arguments[1];//from   w  w w  . j a  v  a2s.  c  om
    String dataBase = arguments[2];
    String outPut = arguments[3];

    //set Hadoop configuration
    Job job = Job.getInstance(getConf(), "DIAMOND");
    Configuration conf = job.getConfiguration();
    SetConf.setHadoopConf(conf);

    //get user name
    userName = HadoopUser.getHadoopUser();

    //delete all existing DIAMOND files under current Hadoop user
    DeleteHDFSFiles.deleteAllFiles(userName);

    //make Hamond directory on HDFS
    MakeHamondHDFSdir.makedir(conf, userName);

    //make DIAMOND database on local then copy to HDFS with query and delete local database
    MakeDB.makeDB(diamond, dataBase);

    //copy DIAMOND bin, query and local database file to HDFS
    CopyFromLocal.copyFromLocal(conf, diamond, query, dataBase, userName);

    //pass query name and database name to mappers
    conf.set(QUERY, query);
    conf.set(DATABASE, dataBase + ".dmnd");
    String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length);
    conf.setStrings("DIAMOND-arguments", subArgs);
    conf.setStrings(OUTPUT, outPut);

    //add DIAMOND bin and database into distributed cache
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond"));
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd"));

    //set job input and output paths
    FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName()));
    FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out"));

    //set job driver and mapper
    job.setJarByClass(DiamondMapReduce.class);
    job.setMapperClass(DiamondMapper.class);

    //set job input format into customized multilines format
    job.setInputFormatClass(CustomNLineFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(0);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:diamondmapreduce.DiamondMapReduce.java

License:Apache License

int launchHamondAWS(String[] arguments) throws Exception {

    //extract diamond, query, reference and output from array
    String diamond = arguments[0];
    String query = arguments[1];//ww w .j a  va2s.  co m
    String dataBase = arguments[2];
    String outPut = arguments[3];

    //set Hadoop configuration
    Job job = Job.getInstance(getConf(), "DIAMOND");
    Configuration conf = job.getConfiguration();
    SetConf.setHadoopConf(conf);

    //get user name
    userName = HadoopUser.getHadoopUser();

    //delete all existing DIAMOND files under current Hadoop user
    DeleteHDFSFiles.deleteAllFiles(userName);

    //make local Hamond dir
    awshamondsidefunctions.MakeHamondDir.make();

    //copy DIAMOND, query, reference from S3 to master local
    awshamondsidefunctions.CopyFromS3.copyFromS3(diamond, query, dataBase);

    //make Hamond directory on HDFS
    MakeHamondHDFSdir.makedir(conf, userName);

    //make DIAMOND database on local then copy to HDFS with query and delete local database
    MakeDB.makeDB("/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(dataBase).getName());

    //copy DIAMOND bin, query and local database file to HDFS
    CopyFromLocal.copyFromLocal(conf, "/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(query).getName(),
            "/mnt/Hamond/" + new Path(dataBase).getName(), userName);

    //pass query name and database name to mappers
    conf.set(QUERY, query);
    conf.set(DATABASE, dataBase);
    conf.set(OUTPUT, outPut);
    String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length);
    conf.setStrings("DIAMOND-arguments", subArgs);
    conf.setStrings(OUTPUT, outPut);

    //add DIAMOND bin and database into distributed cache
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond"));
    job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd"));

    //set job input and output paths
    FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName()));
    FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out"));

    //set job driver and mapper
    job.setJarByClass(DiamondMapReduce.class);
    job.setMapperClass(DiamondMapper.class);
    job.setReducerClass(AWSDiamondReducer.class);

    //set job input format into customized multilines format
    job.setInputFormatClass(CustomNLineFileInputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:drdoobs.Dictionary.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "dictionary");
    job.setJarByClass(Dictionary.class);
    job.setMapperClass(ProjectionMapper.class);
    job.setReducerClass(LongSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(Text.class);

    //job.setInputFormatClass(KeyValueTextInputFormat.class);
    //job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    System.exit(result ? 0 : 1);/*from   w  ww .  j a  va2  s .c  om*/
}

From source file:dz.lab.mapred.counter.StartsWithCountJob_PrintCounters.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // the following property will enable mapreduce to use its packaged local job runner
    //conf.set("mapreduce.framework.name", "local");

    Job job = Job.getInstance(conf, "StartsWithCountJob");
    job.setJarByClass(getClass());//from www .j av a  2 s .com

    // configure output and input source
    TextInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    // configure mapper and reducer
    job.setMapperClass(StartsWithCountMapper.class);
    job.setCombinerClass(StartsWithCountReducer.class);
    job.setReducerClass(StartsWithCountReducer.class);

    // configure output
    TextOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    int resultCode = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("Job is complete! Printing Counters:");
    Counters counters = job.getCounters();

    for (String groupName : counters.getGroupNames()) {
        CounterGroup group = counters.getGroup(groupName);
        System.out.println(group.getDisplayName());

        for (Counter counter : group.getUnderlyingGroup()) {
            System.out.println(" " + counter.getDisplayName() + "=" + counter.getValue());
        }
    }
    return resultCode;
}

From source file:dz.lab.mapred.exclude.StartsWithCountJob_DistCacheAPI.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    // the following property will enable mapreduce to use its packaged local job runner
    //conf.set("mapreduce.framework.name", "local");

    Job job = Job.getInstance(conf, "StartsWithCountJob");
    job.setJarByClass(getClass());//  w  ww  .ja v a  2s .  c o  m

    // configure output and input source
    TextInputFormat.addInputPath(job, new Path(args[0]));
    job.setInputFormatClass(TextInputFormat.class);

    // configure mapper and reducer
    job.setMapperClass(StartsWithCountMapper.class);
    job.setCombinerClass(StartsWithCountReducer.class);
    job.setReducerClass(StartsWithCountReducer.class);

    // configure output
    TextOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    Path toCache = new Path("/training/data/startWithExcludeFile.txt");
    // add file to cache
    job.addCacheFile(toCache.toUri());
    // create symbolic links for all files in DistributedCache; without the links you would have to use fully qualified path
    job.createSymlink();

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:dz.lab.mapred.hbase.custom_input.StartsWithCountJob_HBaseInput.java

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), "StartsWithCount-FromHBase");
    job.setJarByClass(getClass());/*from   w  w w. j a v a2 s  .com*/

    // set HBase InputFormat
    job.setInputFormatClass(TableInputFormat.class);
    // new mapper to handle data from HBase
    job.setMapperClass(StartsWithCountMapper_HBase.class);

    // add hbase configuration
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    TableMapReduceUtil.addDependencyJars(job);

    // specify table and column to read from
    conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME);
    conf.set(TableInputFormat.SCAN_COLUMNS, "count:word");

    // configure mapper and reducer
    job.setCombinerClass(StartsWithCountReducer.class);
    job.setReducerClass(StartsWithCountReducer.class);

    // configure output
    TextOutputFormat.setOutputPath(job, new Path(args[0]));
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:dz.lab.mapred.hbase.custom_output.StartsWithCountJob_HBase.java

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), "StartsWithCount-HBase");
    job.setJarByClass(getClass());//from  w w  w.j  a va 2 s. c o  m

    Scan scan = new Scan();
    scan.addColumn(toBytes(FAMILY), toBytes(INPUT_COLUMN));
    // set up job with hbase utils
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, StartsWithCountMapper_HBase.class, Text.class,
            IntWritable.class, job);
    TableMapReduceUtil.initTableReducerJob(TABLE_NAME, StartsWithCountReducer_HBase.class, job);

    job.setNumReduceTasks(1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:edu.bigdata.training.core.mapreduce.WordCount.java

public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException {
    {/*from w  ww .  ja  v a  2s  . c  om*/

        System.out.println("arg[0]-->" + args[0]);
        System.out.println("arg[1]-->" + args[1]);

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(SimpleMapper.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

        System.out.println("Total Words:" + job.getCounters().findCounter(METRICS.TOTAL_WORDS).getValue());
    }
}

From source file:edu.bigdata.training.mrcassandra.MapReduceExample.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "MR Keying");
    job.setJarByClass(MapReduceExample.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path("/user/root/input/all-shakespeare.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/user/root/output/"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.columbia.hs2807.Sentiment.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "sentiment");

    job.setJarByClass(Sentiment.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Combine.class);
    job.setReducerClass(Reduce.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LongArrayWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}