List of usage examples for org.apache.hadoop.mapred FileInputFormat addInputPath
public static void addInputPath(JobConf conf, Path path)
From source file:contrail.stages.GraphToFasta.java
License:Open Source License
@Override public RunningJob runJob() throws Exception { String inputPath = (String) stage_options.get("inputpath"); String outputPath = (String) stage_options.get("outputpath"); sLogger.info(" - inputpath: " + inputPath); sLogger.info(" - outputpath: " + outputPath); JobConf conf = new JobConf(GraphToFasta.class); AvroJob.setInputSchema(conf, GraphNodeData.SCHEMA$); initializeJobConfiguration(conf);/*from w w w . j a v a2 s . c o m*/ FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); AvroInputFormat<GraphNodeData> input_format = new AvroInputFormat<GraphNodeData>(); conf.setInputFormat(input_format.getClass()); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); // Make it mapper only. conf.setNumReduceTasks(0); conf.setMapperClass(GraphToFastqMapper.class); if (stage_options.containsKey("writeconfig")) { writeJobConfig(conf); } else { // Delete the output directory if it exists already Path out_path = new Path(outputPath); if (FileSystem.get(conf).exists(out_path)) { // TODO(jlewi): We should only delete an existing directory // if explicitly told to do so. sLogger.info("Deleting output path: " + out_path.toString() + " " + "because it already exists."); FileSystem.get(conf).delete(out_path, true); } long starttime = System.currentTimeMillis(); RunningJob result = JobClient.runJob(conf); long endtime = System.currentTimeMillis(); float diff = (float) ((endtime - starttime) / 1000.0); System.out.println("Runtime: " + diff + " s"); return result; } return null; }
From source file:Corrector.FindError.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, int idx, String hkmerlist) throws Exception { sLogger.info("Tool name: FindError"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(FindError.class); conf.setJobName("FindError " + inputPath + " " + Config.K); conf.setLong("IDX", idx); //\\/* w w w .j a v a 2 s .c om*/ DistributedCache.addCacheFile(new URI(hkmerlist), conf); //\\ Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FindErrorMapper.class); conf.setReducerClass(FindErrorReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.Graph2Fasta.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Fasta [7/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Fasta.class); conf.setJobName("Graph2Fasta " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2FastaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0);//from ww w. j ava 2s . c o m //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.Graph2Sfa.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Graph2Sfa [0/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Graph2Sfa.class); conf.setJobName("Graph2Sfa " + inputPath); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Graph2SfaMapper.class); Config.initializeConfiguration(conf); conf.setNumReduceTasks(0);/* w w w . j a va2s .c o m*/ //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.IdentifyTrustedReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, long kmer_threshold) throws Exception { sLogger.info("Tool name: IdentifyTrustedReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(IdentifyTrustedReads.class); conf.setJobName("IdentifyTrustedReads " + inputPath + " " + Config.K); conf.setLong("KmerThreshold", kmer_threshold); // conf.setLong("AllKmer", allkmer); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(IdentifyTrustedReads.IdentifyTrustedReadsMapper.class); conf.setReducerClass(IdentifyTrustedReads.IdentifyTrustedReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.KmerFrequencyOfReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: KmerFrequencyOfReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(KmerFrequencyOfReads.class); conf.setJobName("KmerFrequencyOfReads " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); //conf.setMapOutputValueClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(KmerFrequencyOfReads.KmerFrequencyOfReadsMapper.class); conf.setReducerClass(KmerFrequencyOfReads.KmerFrequencyOfReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.PreCorrect.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, int idx, String hkmerlist) throws Exception { sLogger.info("Tool name: PreCorrect"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(PreCorrect.class); conf.setJobName("PreCorrect " + inputPath + " " + Config.K); conf.setLong("IDX", idx); //\\//from w w w . j a va 2 s . c om DistributedCache.addCacheFile(new URI(hkmerlist), conf); //\\ Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(PreCorrectMapper.class); conf.setReducerClass(PreCorrectReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.PreProcessReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: PreProcessReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(PreProcessReads.class); conf.setJobName("PreProcessReads " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(PreProcessReadsMapper.class); //conf.setReducerClass(PreProcessReadsReducer.class); conf.setNumReduceTasks(0);/*w w w . ja v a 2 s . c o m*/ //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.DkproHadoopDriver.java
License:Apache License
/** * Runs the UIMA pipeline./*from w w w. j a v a 2 s. c om*/ * * @return 0 if Hadoop job succeeded, 1 if job failed, 2 if it was killed, otherwise 3 * * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ @Override public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println( "Usage: " + this.getClass().getSimpleName() + " [hadoop-params] input output [job-params]"); System.exit(1); } this.job = new JobConf(getConf(), DkproHadoopDriver.class); final FileSystem fs = FileSystem.get(this.job); // set the factory class name this.job.set("dkpro.uima.factory", this.getClass().getName()); Path inputPath; if (args[0].contains(",")) { String[] inputPaths = args[0].split(","); inputPath = new Path(inputPaths[0]); for (String path : inputPaths) FileInputFormat.addInputPath(job, new Path(path)); } else { inputPath = new Path(args[0]); // input FileInputFormat.setInputPaths(this.job, inputPath); } final Path outputPath = new Path(args[1]);// output final CollectionReader reader = buildCollectionReader(); // if a collection reader was defined, import data into hdfs // try { // final Class<?> c = Class.forName("org.apache.hadoop.io.compress.SnappyCodec"); // FileOutputFormat.setOutputCompressorClass(this.job, // (Class<? extends CompressionCodec>) c); // } // catch (final Exception e) { // // } if (reader != null) { final AnalysisEngine xcasWriter = AnalysisEngineFactory.createEngine( CASWritableSequenceFileWriter.class, // createTypeSystemDescription(), CASWritableSequenceFileWriter.PARAM_PATH, inputPath.toString(), CASWritableSequenceFileWriter.PARAM_COMPRESS, true, CASWritableSequenceFileWriter.PARAM_FS, job.get(("fs.default.name"), "file:/")); runPipeline(reader, xcasWriter); } // cleanup previous output fs.delete(outputPath, true); // this is a sensible default for the UKP cluster int numMappers = 256; // if (args.length > 2) { // numMappers = Integer.parseInt(args[2]); // } FileOutputFormat.setOutputPath(this.job, outputPath); // SequenceFileOutputFormat.setCompressOutput(this.job, true); if (this.job.get("mapred.output.compress") == null) { this.job.setBoolean("mapred.output.compress", true); } // Just in case compression is on this.job.set("mapred.output.compression.type", "BLOCK"); if (this.job.getBoolean("dkpro.output.plaintext", false)) { this.job.setOutputFormat(TextOutputFormat.class); } else { this.job.setOutputFormat(SequenceFileOutputFormat.class); } // this.job.set("mapred.output.compression.codec", // "org.apache.hadoop.io.compress.GzipCodec"); // use compression // setup some sensible defaults this.job.setMapperClass(this.mapperClass); this.job.setReducerClass(this.reducerClass); if (getInputFormatClass() != null) { this.job.setInputFormat(getInputFormatClass()); } else { this.job.setInputFormat(SequenceFileInputFormat.class); } // this.job.setOutputFormat(TextOutputFormat.class); this.job.setMapOutputKeyClass(Text.class); this.job.setMapOutputValueClass(BinCasWithTypeSystemWritable.class); this.job.setOutputKeyClass(Text.class); this.job.setOutputValueClass(BinCasWithTypeSystemWritable.class); this.job.setJobName(this.getClass().getSimpleName()); // this.job.set("mapred.child.java.opts", "-Xmx1g"); this.job.setInt("mapred.job.map.memory.mb", 1280); this.job.setInt("mapred.job.reduce.memory.mb", 1280); this.job.setNumMapTasks(numMappers); this.job.setNumReduceTasks(0); configure(this.job); // create symlinks for distributed resources DistributedCache.createSymlink(this.job); // sLogger.info("Running job "+job.getJobName()); RunningJob runningJob = JobClient.runJob(this.job); runningJob.waitForCompletion(); int status = runningJob.getJobState(); if (status == JobStatus.SUCCEEDED) { return 0; } else if (status == JobStatus.FAILED) { return 1; } else if (status == JobStatus.KILLED) { return 2; } else { return 3; } }
From source file:drdoobs.AggregateJob.java
public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJarByClass(getClass());// ww w . ja v a 2 s .co m job.setJobName(getClass().getSimpleName()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ProjectionMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); return job.waitForCompletion(true) ? 0 : 1; }