List of usage examples for org.apache.hadoop.mapreduce Job getInstance
@Deprecated public static Job getInstance(Cluster ignored, Configuration conf) throws IOException
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if (!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2);/*www .ja va2 s. c om*/ } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }
From source file:BinningByState.Driver.java
public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "BinningByState"); MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); job.setJarByClass(Driver.class); job.setMapperClass(BinningMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0);//w w w .java 2s. c o m // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:bulkload.ImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w ww .java 2 s . c o m * * @param conf * The current configuration. * @param args * The command line parameters. * @return The newly created job. * @throws IOException * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Job job = null; try (Connection connection = ConnectionFactory.createConnection(conf)) { try (Admin admin = connection.getAdmin()) { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(SEPARATOR_CONF_KEY); if (actualSeparator != null) { conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes())); } TableName tableName = TableName.valueOf(args[0]); if (!admin.tableExists(tableName)) { String errorMsg = format("Table '%s' does not exist.", tableName); LOG.error(errorMsg); throw new TableNotFoundException(errorMsg); } Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(TsvImporter.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TsvImporter.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); if (hfileOutPath != null) { try (HTable table = (HTable) connection.getTable(tableName)) { Path outputDir = new Path(hfileOutPath); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputDir)) { if (!fs.delete(outputDir, true)) { throw new IllegalStateException("delete path:" + outputDir + " failed"); } } FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setReducerClass(PutSortReducer.class); HFileOutputFormat2.configureIncrementalLoad(job, table, table); } } else { // No reducers. Just write straight to table. Call // initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0); // TableMapReduceUtil.addDependencyJars(job); // TableMapReduceUtil.addDependencyJars(job.getConfiguration(), // com.google.common.base.Function.class /* Guava used by TsvParser */); } // Workaround to remove unnecessary hadoop dependencies String[] jars = job.getConfiguration().get("tmpjars").split(",", -1); StringBuilder filteredJars = new StringBuilder(); for (String j : jars) { String[] parts = j.split("/", -1); String fileName = parts[parts.length - 1]; if (fileName.indexOf("hadoop-") != 0) { filteredJars.append(j); filteredJars.append(","); } } job.getConfiguration().set("tmpjars", filteredJars.toString()); } } return job; }
From source file:cityhub.CityHub.java
@Override public int run(String[] strings) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "ReduceJoin"); job.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job, new Path(strings[0]), TextInputFormat.class, JoinMapper1.class); MultipleInputs.addInputPath(job, new Path(strings[1]), TextInputFormat.class, JoinMapper2.class); job.getConfiguration().set("join.type", "innerjoin"); job.setReducerClass(JoinReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(strings[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean complete = job.waitForCompletion(true); Configuration conf1 = new Configuration(); Job job2 = Job.getInstance(conf1, "chaining"); if (complete) { job2.setJarByClass(CityHub.class); MultipleInputs.addInputPath(job2, new Path(strings[2]), TextInputFormat.class, JoinMapper3.class); MultipleInputs.addInputPath(job2, new Path(strings[3]), TextInputFormat.class, JoinMapper4.class); job2.getConfiguration().set("join.type", "innerjoin"); job2.setReducerClass(JoinReducer1.class); job2.setOutputFormatClass(TextOutputFormat.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); TextOutputFormat.setOutputPath(job2, new Path(strings[4])); }/*from w w w .j av a 2 s. co m*/ boolean success = job2.waitForCompletion(true); return success ? 0 : 4; }
From source file:cityhubpartitioningcountry.CityHubPartitioning.java
/** * @param args the command line arguments */// w ww . java 2 s .co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "partitioner"); job.setJarByClass(CityHubPartitioning.class); job.setMapperClass(PartitionMonthMapper.class); job.setReducerClass(countryReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(PartitionCountryPartitioner.class); job.setNumReduceTasks(27); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:cityhubtopten.CityHubTopTen.java
/** * @param args the command line arguments */// w w w. j av a 2 s . c om public static void main(String[] args) { try { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Top10"); job.setJarByClass(CityHubTopTen.class); job.setMapperClass(Top10Mapper.class); job.setReducerClass(Top10Reducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IOException | InterruptedException | ClassNotFoundException ex) { System.out.println("Erorr Message" + ex.getMessage()); } }
From source file:clustering.init.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s input_dir output_dir [column_splitter] [dict_path]\n", this.getClass().getSimpleName()); System.exit(1);//from w w w . java2s. com } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.set("column.splitter", args[2]); } else { conf.set("column.splitter", ","); } if (args.length > 3) { conf.set("dict.path", args[3]); } else { conf.set("dict.path", "./dicts"); } Job job = Job.getInstance(conf, "Initialization job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(WordSepMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); long starttime = System.currentTimeMillis(); boolean complete = job.waitForCompletion(true); long endtime = System.currentTimeMillis(); System.out.println("Initialization job finished in: " + (endtime - starttime) / 1000 + " seconds"); return complete ? 0 : 1; }
From source file:clustering.inverted_index.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s tf_idf_result_dir output_dir" + "[decimal_number] [pruning_threshold]\n", getClass().getSimpleName()); System.exit(1);/*from www . j a va 2 s . co m*/ } Path normDir = new Path(args[1] + "/normed"); Path resultDir = new Path(args[1] + "/result"); Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); if (args.length > 2) { conf.setInt("deci.number", Integer.valueOf(args[2])); } else { conf.setInt("deci.number", 4); } if (args.length > 3) { conf.setBoolean("pruning", true); conf.setDouble("pruning.threshold", Double.valueOf(args[3])); } else { conf.setBoolean("pruning", false); } JobControl jobControl = new JobControl("inverted-index jobs"); /* step 1, normalize the vector lenth of each document */ Job job1 = Job.getInstance(conf, "tf idf normalizer job"); job1.setJarByClass(Driver.class); FileInputFormat.addInputPath(job1, new Path(args[0])); job1.setInputFormatClass(KeyValueTextInputFormat.class); job1.setMapperClass(Mapper.class); job1.setReducerClass(NormalizerReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job1, normDir); ControlledJob controlledJob1 = new ControlledJob(conf); controlledJob1.setJob(job1); jobControl.addJob(controlledJob1); /* step 2, calculate inverted index */ Job job2 = Job.getInstance(conf, "inverted index job"); job2.setJarByClass(Driver.class); FileInputFormat.addInputPath(job2, normDir); job2.setInputFormatClass(KeyValueTextInputFormat.class); job2.setMapperClass(Mapper.class); job2.setReducerClass(InvertedIndexReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job2, resultDir); ControlledJob controlledJob2 = new ControlledJob(conf); controlledJob2.setJob(job2); controlledJob2.addDependingJob(controlledJob1); jobControl.addJob(controlledJob2); MapReduceUtils.runJobs(jobControl); return job2.waitForCompletion(true) ? 0 : 1; }
From source file:clustering.link_back.pre.Driver.java
License:Apache License
public Job configJob(String[] args) throws Exception { if (args.length < 2) { System.err.printf("usage: %s input_dir output_dir\n", getClass().getSimpleName()); System.exit(1);/* www. j av a2 s .co m*/ } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "linkback pre step"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(AttachMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job; }
From source file:clustering.link_back.step1.Driver.java
License:Apache License
public Job configJob(String[] args) throws Exception { if (args.length < 3) { System.err.printf("usage: %s mst_result_dir simhash_result_file output_dir\n", getClass().getSimpleName()); System.exit(1);/*from w w w .j a v a2 s .c om*/ } Configuration conf = getConf(); conf = MapReduceUtils.initConf(conf); Job job = Job.getInstance(conf, "link back step 1 job"); job.setJarByClass(Driver.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapperClass(SetKeyMapper.class); job.setMapOutputKeyClass(Step1KeyWritable.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(JoinPartitioner.class); job.setGroupingComparatorClass(Step1GroupComparator.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job; }