Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(URI uri, Configuration conf) throws IOException

Source Link

Document

Get a FileSystem for this URI's scheme and authority.

Usage

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runHalvadeJob(Configuration halvadeConf, String tmpOutDir, int jobType)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    String pipeline = "";
    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        HalvadeConf.setIsPass2(halvadeConf, true);
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = RNA_PASS2;//w w w . j  a va  2s.co m
    } else if (jobType == HalvadeResourceManager.DNA) {
        HalvadeResourceManager.setJobResources(halvadeOpts, halvadeConf, jobType, false,
                halvadeOpts.useBamInput);
        pipeline = DNA;
    }
    HalvadeConf.setOutDir(halvadeConf, tmpOutDir);
    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), halvadeConf);
    if (outFs.exists(new Path(tmpOutDir))) {
        Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);
    }
    if (halvadeOpts.useBamInput)
        setHeaderFile(halvadeOpts.in, halvadeConf);

    Job halvadeJob = Job.getInstance(halvadeConf, "Halvade" + pipeline);
    halvadeJob.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    halvadeJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    addInputFiles(halvadeOpts.in, halvadeConf, halvadeJob);
    FileOutputFormat.setOutputPath(halvadeJob, new Path(tmpOutDir));

    if (jobType == HalvadeResourceManager.RNA_SHMEM_PASS2) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RnaGATKReducer.class);
    } else if (jobType == HalvadeResourceManager.DNA) {
        halvadeJob.setMapperClass(halvadeOpts.alignmentTools[halvadeOpts.aln]);
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.DnaGATKReducer.class);
    }

    halvadeJob.setMapOutputKeyClass(ChromosomeRegion.class);
    halvadeJob.setMapOutputValueClass(SAMRecordWritable.class);
    halvadeJob.setInputFormatClass(HalvadeTextInputFormat.class);
    halvadeJob.setOutputKeyClass(Text.class);
    if (halvadeOpts.mergeBam) {
        halvadeJob.setSortComparatorClass(SimpleChrRegionComparator.class);
        halvadeJob.setOutputValueClass(SAMRecordWritable.class);
    } else {
        halvadeJob.setPartitionerClass(ChrRgPartitioner.class);
        halvadeJob.setSortComparatorClass(ChrRgSortComparator.class);
        halvadeJob.setGroupingComparatorClass(ChrRgGroupingComparator.class);
        halvadeJob.setOutputValueClass(VariantContextWritable.class);
    }

    if (halvadeOpts.justAlign)
        halvadeJob.setNumReduceTasks(0);
    else if (halvadeOpts.mergeBam) {
        halvadeJob.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.BamMergeReducer.class);
        halvadeJob.setNumReduceTasks(1);
    } else
        halvadeJob.setNumReduceTasks(halvadeOpts.reduces);

    if (halvadeOpts.useBamInput) {
        halvadeJob.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.AlignedBamMapper.class);
        halvadeJob.setInputFormatClass(BAMInputFormat.class);
    }

    return runTimedJob(halvadeJob, "Halvade Job");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Configuration combineConf = getConf();
    if (!halvadeOpts.out.endsWith("/"))
        halvadeOpts.out += "/";
    HalvadeConf.setInputDir(combineConf, halvadeOutDir);
    HalvadeConf.setOutDir(combineConf, mergeOutDir);
    FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf);
    if (outFs.exists(new Path(mergeOutDir))) {
        Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists.");
        Logger.INFO("ERROR: Please remove this directory before trying again.");
        System.exit(-2);/*from w ww .j a v a2s . co  m*/
    }
    HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll);
    HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false,
            halvadeOpts.useBamInput);
    Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF");
    combineJob.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);

    addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf");
    FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir));

    combineJob.setMapperClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineMapper.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineMapper.class);
    combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class);
    combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);
    combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class);
    combineJob.setNumReduceTasks(1);
    combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class
            : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class);
    combineJob.setOutputKeyClass(Text.class);
    combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class);

    return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job");
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void setHeaderFile(String input, Configuration conf) throws IOException, URISyntaxException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    String headerFile = null;/*from   ww w  .  ja  va2s . c om*/
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        FileStatus[] files = fs.listStatus(new Path(input));
        if (files.length > 0)
            headerFile = files[0].getPath().toString();
    } else
        headerFile = input;
    if (headerFile != null)
        HalvadeConf.setHeaderFile(conf, headerFile);
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job) throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    Logger.DEBUG("adding input files from " + input);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory()) {
                FileInputFormat.addInputPath(job, file.getPath());
            }/*  ww  w  .j a v  a 2s .c  o m*/
        }
    } else
        FileInputFormat.addInputPath(job, new Path(input));
}

From source file:be.ugent.intec.halvade.MapReduceRunner.java

License:Open Source License

protected void addInputFiles(String input, Configuration conf, Job job, String filter)
        throws URISyntaxException, IOException {
    FileSystem fs = FileSystem.get(new URI(input), conf);
    if (fs.getFileStatus(new Path(input)).isDirectory()) {
        // add every file in directory
        FileStatus[] files = fs.listStatus(new Path(input));
        for (FileStatus file : files) {
            if (!file.isDirectory() && file.getPath().getName().endsWith(filter)) {
                FileInputFormat.addInputPath(job, file.getPath());
            }//  ww w  .  ja v a 2  s.  c  om
        }
    } else {
        FileInputFormat.addInputPath(job, new Path(input));
    }
}

From source file:be.ugent.intec.halvade.uploader.HalvadeUploader.java

License:Open Source License

private int processFiles() throws IOException, InterruptedException, URISyntaxException, Throwable {
    Timer timer = new Timer();
    timer.start();//from   w ww.  j  a v a  2  s  .  c  o m

    AWSUploader upl = null;
    FileSystem fs = null;
    // write to s3?
    boolean useAWS = false;
    if (outputDir.startsWith("s3")) {
        useAWS = true;
        String existingBucketName = outputDir.replace("s3://", "").split("/")[0];
        outputDir = outputDir.replace("s3://" + existingBucketName + "/", "");
        upl = new AWSUploader(existingBucketName, SSE, profile);
    } else {
        Configuration conf = getConf();
        fs = FileSystem.get(new URI(outputDir), conf);
        Path outpath = new Path(outputDir);
        if (fs.exists(outpath) && !fs.getFileStatus(outpath).isDirectory()) {
            Logger.DEBUG("please provide an output directory");
            return 1;
        }
    }

    FileReaderFactory factory = FileReaderFactory.getInstance(mthreads);
    if (manifest != null) {
        Logger.DEBUG("reading input files from " + manifest);
        // read from file
        BufferedReader br = new BufferedReader(new FileReader(manifest));
        String line;
        while ((line = br.readLine()) != null) {
            String[] files = line.split("\t");
            if (files.length == 2) {
                factory.addReader(files[0], files[1], false);
            } else if (files.length == 1) {
                factory.addReader(files[0], null, isInterleaved);
            }
        }
    } else if (file1 != null && file2 != null) {
        Logger.DEBUG("Paired-end read input in 2 files.");
        factory.addReader(file1, file2, false);
    } else if (file1 != null) {
        if (isInterleaved)
            Logger.DEBUG("Single-end read input in 1 files.");
        else
            Logger.DEBUG("Paired-end read input in 1 files.");
        factory.addReader(file1, null, isInterleaved);
    } else {
        Logger.DEBUG("Incorrect input, use either a manifest file or give both file1 and file2 as input.");
    }

    // start reading
    (new Thread(factory)).start();

    int bestThreads = mthreads;
    long maxFileSize = getBestFileSize();
    if (useAWS) {
        AWSInterleaveFiles[] fileThreads = new AWSInterleaveFiles[bestThreads];
        // start interleaveFile threads
        for (int t = 0; t < bestThreads; t++) {
            fileThreads[t] = new AWSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, upl, t,
                    codec);
            fileThreads[t].start();
        }
        for (int t = 0; t < bestThreads; t++)
            fileThreads[t].join();
        if (upl != null)
            upl.shutDownNow();
    } else {

        HDFSInterleaveFiles[] fileThreads = new HDFSInterleaveFiles[bestThreads];
        // start interleaveFile threads
        for (int t = 0; t < bestThreads; t++) {
            fileThreads[t] = new HDFSInterleaveFiles(outputDir + "halvade_" + t + "_", maxFileSize, fs, t,
                    codec);
            fileThreads[t].start();
        }
        for (int t = 0; t < bestThreads; t++)
            fileThreads[t].join();
    }
    factory.finalize();
    timer.stop();
    Logger.DEBUG("Time to process data: " + timer.getFormattedCurrentTime());
    return 0;
}

From source file:be.ugent.intec.halvade.uploader.input.BaseFileReader.java

protected static BufferedReader getReader(boolean readFromDistributedStorage, String file)
        throws FileNotFoundException, IOException {
    InputStream hdfsIn;/*from ww w  .  j a  va  2 s.  c  o  m*/
    if (readFromDistributedStorage) {
        Path pt = new Path(file);
        FileSystem fs = FileSystem.get(pt.toUri(), new Configuration());
        hdfsIn = fs.open(pt);
        // read the stream in the correct format!
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(hdfsIn, BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(hdfsIn);
            return new BufferedReader(new InputStreamReader(bzip2));
        } else
            return new BufferedReader(new InputStreamReader(hdfsIn));

    } else {
        if (file.endsWith(".gz")) {
            GZIPInputStream gzip = new GZIPInputStream(new FileInputStream(file), BUFFERSIZE);
            return new BufferedReader(new InputStreamReader(gzip));
        } else if (file.endsWith(".bz2")) {
            CBZip2InputStream bzip2 = new CBZip2InputStream(new FileInputStream(file));
            return new BufferedReader(new InputStreamReader(bzip2));
        } else if (file.equals("-")) {
            return new BufferedReader(new InputStreamReader(System.in));
        } else
            return new BufferedReader(new FileReader(file));
    }
}

From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java

License:Open Source License

public void exportSplitter(String filename, Configuration conf) throws URISyntaxException, IOException {
    DataOutputStream dos = null;//ww w . j ava  2  s .c  o m
    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(new URI(filename), conf);
        Path file = new Path(filename);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }
        OutputStream os = hdfs.create(file);
        dos = new DataOutputStream(os);
        dos.writeInt(regions.size());
        for (BedRegion region : regions) {
            dos.writeUTF(region.contig);
            dos.writeInt(region.start);
            dos.writeInt(region.end);
            dos.writeInt(region.key);
        }
    } finally {
        if (dos != null)
            dos.close();
    }
}

From source file:be.ugent.intec.halvade.utils.ChromosomeSplitter.java

License:Open Source License

private void importSplitter(String filename, Configuration conf) throws URISyntaxException, IOException {
    regions = new ArrayList();
    DataInputStream dis = null;// ww  w. ja  v  a2 s.  co  m
    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(new URI(filename), conf);
        Path file = new Path(filename);
        InputStream is = hdfs.open(file);
        dis = new DataInputStream(is);
        int len = dis.readInt();
        for (int i = 0; i < len; i++) {
            String contig = dis.readUTF();
            int start = dis.readInt();
            int end = dis.readInt();
            int key = dis.readInt();
            regions.add(new BedRegion(contig, start, end, key));
        }
    } finally {
        if (dis != null)
            dis.close();
    }
}

From source file:be.ugent.intec.halvade.utils.HalvadeConf.java

License:Open Source License

public static void setKnownSitesOnHDFS(Configuration conf, String[] val)
        throws IOException, URISyntaxException {
    conf.setInt(numberOfSites, val.length);
    FileSystem fs;//from   w  ww  .  jav a2s  . com
    for (int i = 0; i < val.length; i++) {
        // check if dir add all files!
        fs = FileSystem.get(new URI(val[i]), conf);
        if (fs.isFile(new Path(val[i]))) {
            conf.set(sitesOnHDFSName + i, val[i]);
        } else {
            FileStatus[] files = fs.listStatus(new Path(val[i]));
            for (FileStatus file : files) {
                if (!file.isDir()) {
                    conf.set(sitesOnHDFSName + i, file.getPath().toString());
                }
            }
        }
    }
}