org.bgi.flexlab.gaea.data.mapreduce.input.header.SamHdfsFileHeader.java Source code

Java tutorial

Introduction

Here is the source code for org.bgi.flexlab.gaea.data.mapreduce.input.header.SamHdfsFileHeader.java

Source

/*******************************************************************************
 * Copyright (c) 2017, BGI-Shenzhen
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>
 *******************************************************************************/
package org.bgi.flexlab.gaea.data.mapreduce.input.header;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileReader;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.bgi.flexlab.gaea.data.exception.FileNotExistException;
import org.bgi.flexlab.gaea.data.structure.header.SamFileHeader;
import org.seqdoop.hadoop_bam.util.WrapSeekable;

import java.io.IOException;
import java.util.ArrayList;

/* *
 * bam header io for hdfs
 * */
public class SamHdfsFileHeader extends SamFileHeader {
    public final static String BAM_HEADER_FILE_NAME = "SAMFileHeader";
    protected final static SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
    protected static boolean MERGE_SEQUENCE_DICTIONARIES = true;
    protected static SAMFileHeader.SortOrder headerMergerSortOrder;
    protected final static boolean ASSUME_SORTED = false;

    public static class HeaderPathFilter implements PathFilter {
        @Override
        public boolean accept(Path path) {
            if (path.getName().startsWith("_"))
                return false;
            return true;
        }
    }

    @SuppressWarnings("deprecation")
    public static SAMFileHeader getSAMHeader(FileSystem fs, Path file) throws IOException {
        SAMFileReader samr = new SAMFileReader(fs.open(file));
        SAMFileHeader header = samr.getFileHeader();
        samr.close();
        if (header == null) {
            throw new FileNotExistException.MissingHeaderException(file.getName());
        }

        return header;
    }

    public static SAMFileHeader getCramHeader(FileSystem fs, Path path) throws IOException {
        SeekableStream sin = WrapSeekable.openPath(fs, path);
        SAMFileHeader header = CramIO.readCramHeader(sin).getSamFileHeader();
        sin.close();
        return header;
    }

    public static SAMFileHeader traversal(Path input, FileSystem fs, Configuration conf, boolean cram) {
        ArrayList<SAMFileHeader> mergeHeaders = new ArrayList<SAMFileHeader>();
        SAMFileHeader mergedHeader = null;
        boolean matchedSortOrders = true;

        FileStatus status = null;
        try {
            status = fs.getFileStatus(input);
        } catch (IOException e2) {
            throw new FileNotExistException(input.getName());
        }

        if (status.isFile()) {
            SAMFileHeader header = null;
            if (!cram)
                try {
                    header = getSAMHeader(fs, input);
                } catch (IOException e) {
                    throw new RuntimeException(e.toString());
                }
            else
                try {
                    header = getCramHeader(fs, input);
                } catch (IOException e) {
                    throw new RuntimeException(e.toString());
                }
            matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER;
            if (!contains(header, mergeHeaders))
                mergeHeaders.add(header);
        } else {
            FileStatus[] stats = null;
            try {
                stats = fs.listStatus(input, new HeaderPathFilter());
            } catch (IOException e) {
                throw new RuntimeException(e.toString());
            }

            for (FileStatus file : stats) {
                Path filePath = file.getPath();
                SAMFileHeader header = null;
                if (file.isFile()) {
                    if (!cram)
                        try {
                            header = getSAMHeader(fs, filePath);
                        } catch (IOException e) {
                            throw new RuntimeException(e.toString());
                        }
                    else
                        try {
                            header = getCramHeader(fs, filePath);
                        } catch (IOException e) {
                            throw new RuntimeException(e.toString());
                        }
                } else {
                    header = traversal(filePath, fs, conf, cram);
                }
                matchedSortOrders = matchedSortOrders && header.getSortOrder() == SORT_ORDER;
                if (!contains(header, mergeHeaders))
                    mergeHeaders.add(header);
            }
        }
        if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
            headerMergerSortOrder = SORT_ORDER;
        } else {
            headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        }
        mergedHeader = new SamFileHeaderMerger(headerMergerSortOrder, mergeHeaders, MERGE_SEQUENCE_DICTIONARIES)
                .getMergedHeader();
        return mergedHeader;
    }

    public static SAMFileHeader loadHeader(Path input, Configuration conf, Path output) throws IOException {
        return loadHeader(input, conf, output, false);
    }

    public static SAMFileHeader loadHeader(Path input, Configuration conf, Path output, boolean cram)
            throws IOException {
        FileSystem fs = input.getFileSystem(conf);
        SAMFileHeader mergeHeader = traversal(input, fs, conf, cram);
        if (mergeHeader == null) {
            throw new FileNotExistException.MissingHeaderException(input.getName());
        }
        writeHeader(conf, mergeHeader, output);

        return mergeHeader;
    }

    public static void writeHeader(Configuration conf, SAMFileHeader header, Path output) {
        Path rankSumTestObjPath = null;
        FsAction[] v = FsAction.values();
        StringBuilder uri = new StringBuilder();
        uri.append(output);
        if (!output.getName().endsWith("/")) {
            uri.append(System.getProperty("file.separator"));
        }
        uri.append(BAM_HEADER_FILE_NAME);
        conf.set(BAM_HEADER_FILE_NAME, uri.toString());
        rankSumTestObjPath = new Path(uri.toString());
        FileSystem fs = null;
        try {
            fs = rankSumTestObjPath.getFileSystem(conf);
            FsPermission permission = new FsPermission(v[7], v[7], v[7]);
            if (!fs.exists(output)) {
                fs.mkdirs(output, permission);
            } else {
                fs.setPermission(output, permission);
            }

            SamFileHeaderCodec.writeHeader(header, fs.create(rankSumTestObjPath));
        } catch (IOException e) {
            throw new RuntimeException(e.toString());
        } finally {
            try {
                fs.close();
            } catch (IOException ioe) {
                throw new RuntimeException(ioe.toString());
            }
        }
    }

    public static SAMFileHeader getHeader(Configuration conf) {
        if (conf.get(BAM_HEADER_FILE_NAME) == null)
            return null;

        SAMFileHeader header = null;
        try {
            Path headerPath = new Path(conf.get(BAM_HEADER_FILE_NAME));
            HdfsHeaderLineReader reader = new HdfsHeaderLineReader(headerPath, conf);
            header = SamFileHeaderCodec.readHeader(reader);
        } catch (IOException e) {
            throw new RuntimeException(e.getMessage());
        }

        return header;
    }
}