Here you can find the source of bytesToCompressedBases(final byte[] readBases)
Parameter | Description |
---|---|
readBases | Bases as ASCII bytes. |
static byte[] bytesToCompressedBases(final byte[] readBases)
//package com.java2s; /*/*from w w w. j a v a2 s . com*/ * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ public class Main { private static final byte COMPRESSED_EQUAL_LOW = 0; private static final byte COMPRESSED_A_LOW = 1; private static final byte COMPRESSED_C_LOW = 2; private static final byte COMPRESSED_M_LOW = 3; private static final byte COMPRESSED_G_LOW = 4; private static final byte COMPRESSED_R_LOW = 5; private static final byte COMPRESSED_S_LOW = 6; private static final byte COMPRESSED_V_LOW = 7; private static final byte COMPRESSED_T_LOW = 8; private static final byte COMPRESSED_W_LOW = 9; private static final byte COMPRESSED_Y_LOW = 10; private static final byte COMPRESSED_H_LOW = 11; private static final byte COMPRESSED_K_LOW = 12; private static final byte COMPRESSED_D_LOW = 13; private static final byte COMPRESSED_B_LOW = 14; private static final byte COMPRESSED_N_LOW = 15; private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4; private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4; private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4; private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4; private static final byte COMPRESSED_T_HIGH = (byte) (COMPRESSED_T_LOW << 4); private static final byte COMPRESSED_N_HIGH = (byte) (COMPRESSED_N_LOW << 4); private static final byte COMPRESSED_M_HIGH = (byte) (COMPRESSED_M_LOW << 4); private static final byte COMPRESSED_R_HIGH = (byte) (COMPRESSED_R_LOW << 4); private static final byte COMPRESSED_S_HIGH = (byte) (COMPRESSED_S_LOW << 4); private static final byte COMPRESSED_V_HIGH = (byte) (COMPRESSED_V_LOW << 4); private static final byte COMPRESSED_W_HIGH = (byte) (COMPRESSED_W_LOW << 4); private static final byte COMPRESSED_Y_HIGH = (byte) (COMPRESSED_Y_LOW << 4); private static final byte COMPRESSED_H_HIGH = (byte) (COMPRESSED_H_LOW << 4); private static final byte COMPRESSED_K_HIGH = (byte) (COMPRESSED_K_LOW << 4); private static final byte COMPRESSED_D_HIGH = (byte) (COMPRESSED_D_LOW << 4); private static final byte COMPRESSED_B_HIGH = (byte) (COMPRESSED_B_LOW << 4); /** * Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long, * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15. * @param readBases Bases as ASCII bytes. * @return New byte array with bases represented as nybbles, in BAM binary format. */ static byte[] bytesToCompressedBases(final byte[] readBases) { final byte[] compressedBases = new byte[(readBases.length + 1) / 2]; int i; for (i = 1; i < readBases.length; i += 2) { compressedBases[i / 2] = (byte) (charToCompressedBaseHigh(readBases[i - 1]) | charToCompressedBaseLow(readBases[i])); } // Last nybble if (i == readBases.length) { compressedBases[i / 2] = charToCompressedBaseHigh((char) readBases[i - 1]); } return compressedBases; } /** * Convert from ASCII byte to BAM nybble representation of a base in high-order nybble. * @param base One of =AaCcGgTtNn. * @return High-order nybble-encoded equivalent. */ private static byte charToCompressedBaseHigh(final int base) { switch (base) { case '=': return COMPRESSED_EQUAL_HIGH; case 'a': case 'A': return COMPRESSED_A_HIGH; case 'c': case 'C': return COMPRESSED_C_HIGH; case 'g': case 'G': return COMPRESSED_G_HIGH; case 't': case 'T': return COMPRESSED_T_HIGH; case 'n': case 'N': case '.': return COMPRESSED_N_HIGH; // IUPAC ambiguity codes case 'M': case 'm': return COMPRESSED_M_HIGH; case 'R': case 'r': return COMPRESSED_R_HIGH; case 'S': case 's': return COMPRESSED_S_HIGH; case 'V': case 'v': return COMPRESSED_V_HIGH; case 'W': case 'w': return COMPRESSED_W_HIGH; case 'Y': case 'y': return COMPRESSED_Y_HIGH; case 'H': case 'h': return COMPRESSED_H_HIGH; case 'K': case 'k': return COMPRESSED_K_HIGH; case 'D': case 'd': return COMPRESSED_D_HIGH; case 'B': case 'b': return COMPRESSED_B_HIGH; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } /** * Convert from ASCII byte to BAM nybble representation of a base in low-order nybble. * @param base One of =AaCcGgTtNn. * @return Low-order nybble-encoded equivalent. */ private static byte charToCompressedBaseLow(final int base) { switch (base) { case '=': return COMPRESSED_EQUAL_LOW; case 'a': case 'A': return COMPRESSED_A_LOW; case 'c': case 'C': return COMPRESSED_C_LOW; case 'g': case 'G': return COMPRESSED_G_LOW; case 't': case 'T': return COMPRESSED_T_LOW; case 'n': case 'N': case '.': return COMPRESSED_N_LOW; // IUPAC ambiguity codes case 'M': case 'm': return COMPRESSED_M_LOW; case 'R': case 'r': return COMPRESSED_R_LOW; case 'S': case 's': return COMPRESSED_S_LOW; case 'V': case 'v': return COMPRESSED_V_LOW; case 'W': case 'w': return COMPRESSED_W_LOW; case 'Y': case 'y': return COMPRESSED_Y_LOW; case 'H': case 'h': return COMPRESSED_H_LOW; case 'K': case 'k': return COMPRESSED_K_LOW; case 'D': case 'd': return COMPRESSED_D_LOW; case 'B': case 'b': return COMPRESSED_B_LOW; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } }