com.google.cloud.genomics.dataflow.readers.bam.ReadConverter.java Source code

Java tutorial

Introduction

Here is the source code for com.google.cloud.genomics.dataflow.readers.bam.ReadConverter.java

Source

/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.genomics.dataflow.readers.bam;

import com.google.api.client.util.Maps;
import com.google.api.services.genomics.model.CigarUnit;
import com.google.api.services.genomics.model.LinearAlignment;
import com.google.api.services.genomics.model.Position;
import com.google.api.services.genomics.model.Read;
import com.google.common.base.Function;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import com.google.common.collect.Lists;
import htsjdk.samtools.*;
import htsjdk.samtools.util.SequenceUtil;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * Converts SAMRecords to Reads.
 */
public class ReadConverter {
    static HashBiMap<String, String> CIGAR_OPERATIONS;
    static BiMap<String, String> CIGAR_OPERATIONS_INV;

    static {
        CIGAR_OPERATIONS = HashBiMap.create();
        CIGAR_OPERATIONS.put("ALIGNMENT_MATCH", "M");
        CIGAR_OPERATIONS.put("CLIP_HARD", "H");
        CIGAR_OPERATIONS.put("CLIP_SOFT", "S");
        CIGAR_OPERATIONS.put("DELETE", "D");
        CIGAR_OPERATIONS.put("INSERT", "I");
        CIGAR_OPERATIONS.put("PAD", "P");
        CIGAR_OPERATIONS.put("SEQUENCE_MATCH", "=");
        CIGAR_OPERATIONS.put("SEQUENCE_MISMATCH", "X");
        CIGAR_OPERATIONS.put("SKIP", "N");
        CIGAR_OPERATIONS_INV = CIGAR_OPERATIONS.inverse();
    }

    /**
     * Generates a Read from a SAMRecord. 
     */
    public static final Read makeRead(final SAMRecord record) {
        Read read = new Read();
        read.setId(record.getReadName()); // TODO: make more unique
        read.setFragmentName(record.getReadName());
        read.setReadGroupId(getAttr(record, "RG"));
        read.setNumberReads(record.getReadPairedFlag() ? 2 : 1);
        read.setProperPlacement(record.getReadPairedFlag() && record.getProperPairFlag());
        if (!record.getReadUnmappedFlag() && record.getAlignmentStart() > 0) {
            LinearAlignment alignment = new LinearAlignment();

            Position position = new Position();
            position.setPosition((long) record.getAlignmentStart() - 1);
            position.setReferenceName(record.getReferenceName());
            position.setReverseStrand(record.getReadNegativeStrandFlag());
            alignment.setPosition(position);

            alignment.setMappingQuality(record.getMappingQuality());

            final String referenceSequence = (record.getAttribute("MD") != null)
                    ? new String(SequenceUtil.makeReferenceFromAlignment(record, true))
                    : null;
            List<CigarUnit> cigar = Lists.transform(record.getCigar().getCigarElements(),
                    new Function<CigarElement, CigarUnit>() {
                        @Override
                        public CigarUnit apply(CigarElement c) {
                            CigarUnit u = new CigarUnit();
                            CigarOperator o = c.getOperator();
                            u.setOperation(CIGAR_OPERATIONS_INV.get(o.toString()));
                            u.setOperationLength((long) c.getLength());
                            if (referenceSequence != null && (u.getOperation().equals("SEQUENCE_MISMATCH")
                                    || u.getOperation().equals("DELETE"))) {
                                u.setReferenceSequence(referenceSequence);
                            }
                            return u;
                        }
                    });
            alignment.setCigar(cigar);
            read.setAlignment(alignment);
        }
        read.setDuplicateFragment(record.getDuplicateReadFlag());
        read.setFragmentLength(record.getInferredInsertSize());
        if (record.getReadPairedFlag()) {
            if (record.getFirstOfPairFlag()) {
                read.setReadNumber(0);
            } else if (record.getSecondOfPairFlag()) {
                read.setReadNumber(1);
            }

            if (!record.getMateUnmappedFlag()) {
                Position matePosition = new Position();
                matePosition.setPosition((long) record.getMateAlignmentStart() - 1);
                matePosition.setReferenceName(record.getMateReferenceName());
                matePosition.setReverseStrand(record.getMateNegativeStrandFlag());
                read.setNextMatePosition(matePosition);
            }
        }
        read.setFailedVendorQualityChecks(record.getReadFailsVendorQualityCheckFlag());
        read.setSecondaryAlignment(record.getNotPrimaryAlignmentFlag());
        read.setSupplementaryAlignment(record.getSupplementaryAlignmentFlag());
        read.setAlignedSequence(record.getReadString());
        byte[] baseQualities = record.getBaseQualities();
        if (baseQualities.length > 0) {
            List<Integer> readBaseQualities = new ArrayList<Integer>(baseQualities.length);
            for (byte b : baseQualities) {
                readBaseQualities.add(new Integer(b));
            }
            read.setAlignedQuality(readBaseQualities);
        }

        Map<String, List<String>> attributes = Maps.newHashMap();
        for (SAMRecord.SAMTagAndValue tagAndValue : record.getAttributes()) {
            attributes.put(tagAndValue.tag, Lists.newArrayList(tagAndValue.value.toString()));
        }
        read.setInfo(attributes);

        return read;
    }

    public static String getAttr(SAMRecord record, String attributeName) {
        try {
            return record.getStringAttribute(attributeName);
        } catch (SAMException ex) {
            return "";
        }
    }
}