List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:diamondmapreduce.DiamondMapReduce.java
License:Apache License
int launchHamond(String[] arguments) throws Exception { //extract diamond, query, reference and output from array String diamond = arguments[0]; String query = arguments[1];//from w w w . j a v a2s. c om String dataBase = arguments[2]; String outPut = arguments[3]; //set Hadoop configuration Job job = Job.getInstance(getConf(), "DIAMOND"); Configuration conf = job.getConfiguration(); SetConf.setHadoopConf(conf); //get user name userName = HadoopUser.getHadoopUser(); //delete all existing DIAMOND files under current Hadoop user DeleteHDFSFiles.deleteAllFiles(userName); //make Hamond directory on HDFS MakeHamondHDFSdir.makedir(conf, userName); //make DIAMOND database on local then copy to HDFS with query and delete local database MakeDB.makeDB(diamond, dataBase); //copy DIAMOND bin, query and local database file to HDFS CopyFromLocal.copyFromLocal(conf, diamond, query, dataBase, userName); //pass query name and database name to mappers conf.set(QUERY, query); conf.set(DATABASE, dataBase + ".dmnd"); String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length); conf.setStrings("DIAMOND-arguments", subArgs); conf.setStrings(OUTPUT, outPut); //add DIAMOND bin and database into distributed cache job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond")); job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd")); //set job input and output paths FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName())); FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out")); //set job driver and mapper job.setJarByClass(DiamondMapReduce.class); job.setMapperClass(DiamondMapper.class); //set job input format into customized multilines format job.setInputFormatClass(CustomNLineFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : 1; }
From source file:diamondmapreduce.DiamondMapReduce.java
License:Apache License
int launchHamondAWS(String[] arguments) throws Exception { //extract diamond, query, reference and output from array String diamond = arguments[0]; String query = arguments[1];//ww w .j a va2s. co m String dataBase = arguments[2]; String outPut = arguments[3]; //set Hadoop configuration Job job = Job.getInstance(getConf(), "DIAMOND"); Configuration conf = job.getConfiguration(); SetConf.setHadoopConf(conf); //get user name userName = HadoopUser.getHadoopUser(); //delete all existing DIAMOND files under current Hadoop user DeleteHDFSFiles.deleteAllFiles(userName); //make local Hamond dir awshamondsidefunctions.MakeHamondDir.make(); //copy DIAMOND, query, reference from S3 to master local awshamondsidefunctions.CopyFromS3.copyFromS3(diamond, query, dataBase); //make Hamond directory on HDFS MakeHamondHDFSdir.makedir(conf, userName); //make DIAMOND database on local then copy to HDFS with query and delete local database MakeDB.makeDB("/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(dataBase).getName()); //copy DIAMOND bin, query and local database file to HDFS CopyFromLocal.copyFromLocal(conf, "/mnt/Hamond/diamond", "/mnt/Hamond/" + new Path(query).getName(), "/mnt/Hamond/" + new Path(dataBase).getName(), userName); //pass query name and database name to mappers conf.set(QUERY, query); conf.set(DATABASE, dataBase); conf.set(OUTPUT, outPut); String[] subArgs = Arrays.copyOfRange(arguments, 4, arguments.length); conf.setStrings("DIAMOND-arguments", subArgs); conf.setStrings(OUTPUT, outPut); //add DIAMOND bin and database into distributed cache job.addCacheFile(new URI("/user/" + userName + "/Hamond/diamond")); job.addCacheFile(new URI("/user/" + userName + "/Hamond/" + new Path(dataBase).getName() + ".dmnd")); //set job input and output paths FileInputFormat.addInputPath(job, new Path("/user/" + userName + "/Hamond/" + new Path(query).getName())); FileOutputFormat.setOutputPath(job, new Path("/user/" + userName + "/Hamond/out")); //set job driver and mapper job.setJarByClass(DiamondMapReduce.class); job.setMapperClass(DiamondMapper.class); job.setReducerClass(AWSDiamondReducer.class); //set job input format into customized multilines format job.setInputFormatClass(CustomNLineFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:drdoobs.Dictionary.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "dictionary"); job.setJarByClass(Dictionary.class); job.setMapperClass(ProjectionMapper.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //job.setMapOutputKeyClass(Text.class); //job.setMapOutputValueClass(Text.class); //job.setInputFormatClass(KeyValueTextInputFormat.class); //job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1);/*from w ww . j a va2 s .c om*/ }
From source file:dz.lab.mapred.counter.StartsWithCountJob_PrintCounters.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // the following property will enable mapreduce to use its packaged local job runner //conf.set("mapreduce.framework.name", "local"); Job job = Job.getInstance(conf, "StartsWithCountJob"); job.setJarByClass(getClass());//from www .j av a 2 s .com // configure output and input source TextInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); // configure mapper and reducer job.setMapperClass(StartsWithCountMapper.class); job.setCombinerClass(StartsWithCountReducer.class); job.setReducerClass(StartsWithCountReducer.class); // configure output TextOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); int resultCode = job.waitForCompletion(true) ? 0 : 1; System.out.println("Job is complete! Printing Counters:"); Counters counters = job.getCounters(); for (String groupName : counters.getGroupNames()) { CounterGroup group = counters.getGroup(groupName); System.out.println(group.getDisplayName()); for (Counter counter : group.getUnderlyingGroup()) { System.out.println(" " + counter.getDisplayName() + "=" + counter.getValue()); } } return resultCode; }
From source file:dz.lab.mapred.exclude.StartsWithCountJob_DistCacheAPI.java
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // the following property will enable mapreduce to use its packaged local job runner //conf.set("mapreduce.framework.name", "local"); Job job = Job.getInstance(conf, "StartsWithCountJob"); job.setJarByClass(getClass());// w ww .ja v a 2s . c o m // configure output and input source TextInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); // configure mapper and reducer job.setMapperClass(StartsWithCountMapper.class); job.setCombinerClass(StartsWithCountReducer.class); job.setReducerClass(StartsWithCountReducer.class); // configure output TextOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); Path toCache = new Path("/training/data/startWithExcludeFile.txt"); // add file to cache job.addCacheFile(toCache.toUri()); // create symbolic links for all files in DistributedCache; without the links you would have to use fully qualified path job.createSymlink(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:dz.lab.mapred.hbase.custom_input.StartsWithCountJob_HBaseInput.java
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "StartsWithCount-FromHBase"); job.setJarByClass(getClass());/*from w w w. j a v a2 s .com*/ // set HBase InputFormat job.setInputFormatClass(TableInputFormat.class); // new mapper to handle data from HBase job.setMapperClass(StartsWithCountMapper_HBase.class); // add hbase configuration Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); TableMapReduceUtil.addDependencyJars(job); // specify table and column to read from conf.set(TableInputFormat.INPUT_TABLE, TABLE_NAME); conf.set(TableInputFormat.SCAN_COLUMNS, "count:word"); // configure mapper and reducer job.setCombinerClass(StartsWithCountReducer.class); job.setReducerClass(StartsWithCountReducer.class); // configure output TextOutputFormat.setOutputPath(job, new Path(args[0])); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:dz.lab.mapred.hbase.custom_output.StartsWithCountJob_HBase.java
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "StartsWithCount-HBase"); job.setJarByClass(getClass());//from w w w.j a va 2 s. c o m Scan scan = new Scan(); scan.addColumn(toBytes(FAMILY), toBytes(INPUT_COLUMN)); // set up job with hbase utils TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, StartsWithCountMapper_HBase.class, Text.class, IntWritable.class, job); TableMapReduceUtil.initTableReducerJob(TABLE_NAME, StartsWithCountReducer_HBase.class, job); job.setNumReduceTasks(1); return job.waitForCompletion(true) ? 0 : 1; }
From source file:edu.bigdata.training.core.mapreduce.WordCount.java
public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException { {/*from w ww . ja v a 2s . c om*/ System.out.println("arg[0]-->" + args[0]); System.out.println("arg[1]-->" + args[1]); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(SimpleMapper.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); System.out.println("Total Words:" + job.getCounters().findCounter(METRICS.TOTAL_WORDS).getValue()); } }
From source file:edu.bigdata.training.mrcassandra.MapReduceExample.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "MR Keying"); job.setJarByClass(MapReduceExample.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/user/root/input/all-shakespeare.txt")); FileOutputFormat.setOutputPath(job, new Path("/user/root/output/")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:edu.columbia.hs2807.Sentiment.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "sentiment"); job.setJarByClass(Sentiment.class); job.setMapperClass(Map.class); job.setCombinerClass(Combine.class); job.setReducerClass(Reduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(LongArrayWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }