List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(URI uri, Configuration conf) throws IOException
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { String pipeline = ""; if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { HalvadeConf.setIsPass2(halvadeConf, true); HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = RNA_PASS2;//w w w . j a va 2s.co m } else if (jobType == HalvadeResourceManager.DNA) { HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false, halvadeOpts.useBamInput); pipeline = DNA; } HalvadeConf.setOutDir(halvadeConf, tmpOutDir); FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf); if (outFs.exists(new Path(tmpOutDir))) { Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } if (halvadeOpts.useBamInput) setHeaderFile(halvadeOpts.in, halvadeConf); Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline); halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries)); halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class); addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob); FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir)); if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class); } else if (jobType == HalvadeResourceManager.DNA) { halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]); halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class); } halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class); halvadeJob.setMapOutputValueClass(SAMRecordWritable.class); halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class); halvadeJob.setOutputKeyClass(Text.class); if (halvadeOpts.mergeBam) { halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class); halvadeJob.setOutputValueClass(SAMRecordWritable.class); } else { halvadeJob.setPartitionerClass(ChrRgPartitioner.class); halvadeJob.setSortComparatorClass(ChrRgSortComparator.class); halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class); halvadeJob.setOutputValueClass(VariantContextWritable.class); } if (halvadeOpts.justAlign) halvadeJob.setNumReduceTasks(0); else if (halvadeOpts.mergeBam) { halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class); halvadeJob.setNumReduceTasks(1); } else halvadeJob.setNumReduceTasks(halvadeOpts.reduces); if (halvadeOpts.useBamInput) { halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class); halvadeJob.setInputFormatClass(BAMInputFormat.class); } return runTimedJob(halvadeJob, "Halvade Job"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if (!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2);/*from w ww .j a v a2s . co m*/ } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected void setHeaderFile(String input, Configuration conf) throws IOException, URISyntaxException { FileSystem fs = FileSystem.get(new URI(input), conf); String headerFile = null;/*from ww w . ja va2s . c om*/ if (fs.getFileStatus(new Path(input)).isDirectory()) { FileStatus[] files = fs.listStatus(new Path(input)); if (files.length > 0) headerFile = files[0].getPath().toString(); } else headerFile = input; if (headerFile != null) HalvadeConf.setHeaderFile(conf, headerFile); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected void addInputFiles(String input, Configuration conf, Job job) throws URISyntaxException, IOException { FileSystem fs = FileSystem.get(new URI(input), conf); Logger.DEBUG("adding input files from " + input); if (fs.getFileStatus(new Path(input)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(input)); for (FileStatus file : files) { if (!file.isDirectory()) { FileInputFormat.addInputPath(job, file.getPath()); }/* ww w .j a v a 2s .c o m*/ } } else FileInputFormat.addInputPath(job, new Path(input)); }
From source file:be.ugent.intec.halvade.MapReduceRunner.java
License:Open Source License
protected void addInputFiles(String input, Configuration conf, Job job, String filter) throws URISyntaxException, IOException { FileSystem fs = FileSystem.get(new URI(input), conf); if (fs.getFileStatus(new Path(input)).isDirectory()) { // add every file in directory FileStatus[] files = fs.listStatus(new Path(input)); for (FileStatus file : files) { if (!file.isDirectory() && file.getPath().getName().endsWith(filter)) { FileInputFormat.addInputPath(job, file.getPath()); }// ww w . ja v a 2 s. c om } } else { FileInputFormat.addInputPath(job, new Path(input)); } }
From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java
License:Open Source License
private int processFiles() throws IOException, InterruptedException, URISyntaxException, Throwable { Timer timer = new Timer(); timer.start();//from w ww. j a v a 2 s . c o m AWSUploader upl = null; FileSystem fs = null; // write to s3? boolean useAWS = false; if (outputDir.startsWith("s3")) { useAWS = true; String existingBucketName = outputDir.replace("s3://", "").split("/")[0]; outputDir = outputDir.replace("s3://" + existingBucketName + "/", ""); upl = new AWSUploader(existingBucketName, SSE, profile); } else { Configuration conf = getConf(); fs = FileSystem.get(new URI(outputDir), conf); Path outpath = new Path(outputDir); if (fs.exists(outpath) && !fs.getFileStatus(outpath).isDirectory()) { Logger.DEBUG("please provide an output directory"); return 1; } } FileReaderFactory factory = FileReaderFactory.getInstance(mthreads); if (manifest != null) { Logger.DEBUG("reading input files from " + manifest); // read from file BufferedReader br = new BufferedReader(new FileReader(manifest)); String line; while ((line = br.readLine()) != null) { String[] files = line.split("\t"); if (files.length == 2) { factory.addReader(files[0], files[1], false); } else if (files.length == 1) { factory.addReader(files[0], null, isInterleaved); } } } else if (file1 != null && file2 != null) { Logger.DEBUG("Paired-end read input in 2 files."); factory.addReader(file1, file2, false); } else if (file1 != null) { if (isInterleaved) Logger.DEBUG("Single-end read input in 1 files."); else Logger.DEBUG("Paired-end read input in 1 files."); factory.addReader(file1, null, isInterleaved); } else { Logger.DEBUG("Incorrect input, use either a manifest file or give both file1 and file2 as input."); } // start reading (new Thread(factory)).start(); int bestThreads = mthreads; long maxFileSize = getBestFileSize(); if (useAWS) { AWSInterleaveFiles[] fileThreads = new AWSInterleaveFiles[bestThreads]; // start interleaveFile threads for (int t = 0; t < bestThreads; t++) { fileThreads[t] = new AWSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, upl, t, codec); fileThreads[t].start(); } for (int t = 0; t < bestThreads; t++) fileThreads[t].join(); if (upl != null) upl.shutDownNow(); } else { HDFSInterleaveFiles[] fileThreads = new HDFSInterleaveFiles[bestThreads]; // start interleaveFile threads for (int t = 0; t < bestThreads; t++) { fileThreads[t] = new HDFSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, fs, t, codec); fileThreads[t].start(); } for (int t = 0; t < bestThreads; t++) fileThreads[t].join(); } factory.finalize(); timer.stop(); Logger.DEBUG("Time to process data: " + timer.getFormattedCurrentTime()); return 0; }
From source file:be.ugent.intec.halvade.uploader.input.BaseFileReader.java
protected static BufferedReader getReader(boolean readFromDistributedStorage, String file) throws FileNotFoundException, IOException { InputStream hdfsIn;/*from ww w . j a va 2 s. c o m*/ if (readFromDistributedStorage) { Path pt = new Path(file); FileSystem fs = FileSystem.get(pt.toUri(), new Configuration()); hdfsIn = fs.open(pt); // read the stream in the correct format! if (file.endsWith(".gz")) { GZIPInputStream gzip = new GZIPInputStream(hdfsIn, BUFFERSIZE); return new BufferedReader(new InputStreamReader(gzip)); } else if (file.endsWith(".bz2")) { CBZip2InputStream bzip2 = new CBZip2InputStream(hdfsIn); return new BufferedReader(new InputStreamReader(bzip2)); } else return new BufferedReader(new InputStreamReader(hdfsIn)); } else { if (file.endsWith(".gz")) { GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file), BUFFERSIZE); return new BufferedReader(new InputStreamReader(gzip)); } else if (file.endsWith(".bz2")) { CBZip2InputStream bzip2 = new CBZip2InputStream(new FileInputStream(file)); return new BufferedReader(new InputStreamReader(bzip2)); } else if (file.equals("-")) { return new BufferedReader(new InputStreamReader(System.in)); } else return new BufferedReader(new FileReader(file)); } }
From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java
License:Open Source License
public void exportSplitter(String filename, Configuration conf) throws URISyntaxException, IOException { DataOutputStream dos = null;//ww w . j ava 2 s .c o m FileSystem hdfs = null; try { hdfs = FileSystem.get(new URI(filename), conf); Path file = new Path(filename); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file); dos = new DataOutputStream(os); dos.writeInt(regions.size()); for (BedRegion region : regions) { dos.writeUTF(region.contig); dos.writeInt(region.start); dos.writeInt(region.end); dos.writeInt(region.key); } } finally { if (dos != null) dos.close(); } }
From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java
License:Open Source License
private void importSplitter(String filename, Configuration conf) throws URISyntaxException, IOException { regions = new ArrayList(); DataInputStream dis = null;// ww w. ja v a2 s. co m FileSystem hdfs = null; try { hdfs = FileSystem.get(new URI(filename), conf); Path file = new Path(filename); InputStream is = hdfs.open(file); dis = new DataInputStream(is); int len = dis.readInt(); for (int i = 0; i < len; i++) { String contig = dis.readUTF(); int start = dis.readInt(); int end = dis.readInt(); int key = dis.readInt(); regions.add(new BedRegion(contig, start, end, key)); } } finally { if (dis != null) dis.close(); } }
From source file:be.ugent.intec.halvade.utils.HalvadeConf.java
License:Open Source License
public static void setKnownSitesOnHDFS(Configuration conf, String[] val) throws IOException, URISyntaxException { conf.setInt(numberOfSites, val.length); FileSystem fs;//from w ww . jav a2s . com for (int i = 0; i < val.length; i++) { // check if dir add all files! fs = FileSystem.get(new URI(val[i]), conf); if (fs.isFile(new Path(val[i]))) { conf.set(sitesOnHDFSName + i, val[i]); } else { FileStatus[] files = fs.listStatus(new Path(val[i])); for (FileStatus file : files) { if (!file.isDir()) { conf.set(sitesOnHDFSName + i, file.getPath().toString()); } } } } }