Java Byte Array Convert To bytesToCompressedBases(final byte[] readBases)

Here you can find the source of bytesToCompressedBases(final byte[] readBases)

Description

Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long, with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15.

License

Open Source License

Parameter

Parameter Description
readBases Bases as ASCII bytes.

Return

New byte array with bases represented as nybbles, in BAM binary format.

Declaration

static byte[] bytesToCompressedBases(final byte[] readBases) 

Method Source Code

//package com.java2s;
/*/*from   w  w  w.  j  a v  a2 s .  com*/
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

public class Main {
    private static final byte COMPRESSED_EQUAL_LOW = 0;
    private static final byte COMPRESSED_A_LOW = 1;
    private static final byte COMPRESSED_C_LOW = 2;
    private static final byte COMPRESSED_M_LOW = 3;
    private static final byte COMPRESSED_G_LOW = 4;
    private static final byte COMPRESSED_R_LOW = 5;
    private static final byte COMPRESSED_S_LOW = 6;
    private static final byte COMPRESSED_V_LOW = 7;
    private static final byte COMPRESSED_T_LOW = 8;
    private static final byte COMPRESSED_W_LOW = 9;
    private static final byte COMPRESSED_Y_LOW = 10;
    private static final byte COMPRESSED_H_LOW = 11;
    private static final byte COMPRESSED_K_LOW = 12;
    private static final byte COMPRESSED_D_LOW = 13;
    private static final byte COMPRESSED_B_LOW = 14;
    private static final byte COMPRESSED_N_LOW = 15;
    private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4;
    private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4;
    private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4;
    private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4;
    private static final byte COMPRESSED_T_HIGH = (byte) (COMPRESSED_T_LOW << 4);
    private static final byte COMPRESSED_N_HIGH = (byte) (COMPRESSED_N_LOW << 4);
    private static final byte COMPRESSED_M_HIGH = (byte) (COMPRESSED_M_LOW << 4);
    private static final byte COMPRESSED_R_HIGH = (byte) (COMPRESSED_R_LOW << 4);
    private static final byte COMPRESSED_S_HIGH = (byte) (COMPRESSED_S_LOW << 4);
    private static final byte COMPRESSED_V_HIGH = (byte) (COMPRESSED_V_LOW << 4);
    private static final byte COMPRESSED_W_HIGH = (byte) (COMPRESSED_W_LOW << 4);
    private static final byte COMPRESSED_Y_HIGH = (byte) (COMPRESSED_Y_LOW << 4);
    private static final byte COMPRESSED_H_HIGH = (byte) (COMPRESSED_H_LOW << 4);
    private static final byte COMPRESSED_K_HIGH = (byte) (COMPRESSED_K_LOW << 4);
    private static final byte COMPRESSED_D_HIGH = (byte) (COMPRESSED_D_LOW << 4);
    private static final byte COMPRESSED_B_HIGH = (byte) (COMPRESSED_B_LOW << 4);

    /**
     * Convert from a byte array containing =AaCcGgTtNn represented as ASCII, to a byte array half as long,
     * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15.
     * @param readBases Bases as ASCII bytes.
     * @return New byte array with bases represented as nybbles, in BAM binary format.
     */
    static byte[] bytesToCompressedBases(final byte[] readBases) {
        final byte[] compressedBases = new byte[(readBases.length + 1) / 2];
        int i;
        for (i = 1; i < readBases.length; i += 2) {
            compressedBases[i / 2] = (byte) (charToCompressedBaseHigh(readBases[i - 1])
                    | charToCompressedBaseLow(readBases[i]));
        }
        // Last nybble
        if (i == readBases.length) {
            compressedBases[i / 2] = charToCompressedBaseHigh((char) readBases[i - 1]);
        }
        return compressedBases;
    }

    /**
     * Convert from ASCII byte to BAM nybble representation of a base in high-order nybble.
     * @param base One of =AaCcGgTtNn.
     * @return High-order nybble-encoded equivalent.
     */
    private static byte charToCompressedBaseHigh(final int base) {
        switch (base) {
        case '=':
            return COMPRESSED_EQUAL_HIGH;
        case 'a':
        case 'A':
            return COMPRESSED_A_HIGH;
        case 'c':
        case 'C':
            return COMPRESSED_C_HIGH;
        case 'g':
        case 'G':
            return COMPRESSED_G_HIGH;
        case 't':
        case 'T':
            return COMPRESSED_T_HIGH;
        case 'n':
        case 'N':
        case '.':
            return COMPRESSED_N_HIGH;

        // IUPAC ambiguity codes
        case 'M':
        case 'm':
            return COMPRESSED_M_HIGH;
        case 'R':
        case 'r':
            return COMPRESSED_R_HIGH;
        case 'S':
        case 's':
            return COMPRESSED_S_HIGH;
        case 'V':
        case 'v':
            return COMPRESSED_V_HIGH;
        case 'W':
        case 'w':
            return COMPRESSED_W_HIGH;
        case 'Y':
        case 'y':
            return COMPRESSED_Y_HIGH;
        case 'H':
        case 'h':
            return COMPRESSED_H_HIGH;
        case 'K':
        case 'k':
            return COMPRESSED_K_HIGH;
        case 'D':
        case 'd':
            return COMPRESSED_D_HIGH;
        case 'B':
        case 'b':
            return COMPRESSED_B_HIGH;
        default:
            throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }

    /**
     * Convert from ASCII byte to BAM nybble representation of a base in low-order nybble.
     * @param base One of =AaCcGgTtNn.
     * @return Low-order nybble-encoded equivalent.
     */
    private static byte charToCompressedBaseLow(final int base) {
        switch (base) {
        case '=':
            return COMPRESSED_EQUAL_LOW;
        case 'a':
        case 'A':
            return COMPRESSED_A_LOW;
        case 'c':
        case 'C':
            return COMPRESSED_C_LOW;
        case 'g':
        case 'G':
            return COMPRESSED_G_LOW;
        case 't':
        case 'T':
            return COMPRESSED_T_LOW;
        case 'n':
        case 'N':
        case '.':
            return COMPRESSED_N_LOW;

        // IUPAC ambiguity codes
        case 'M':
        case 'm':
            return COMPRESSED_M_LOW;
        case 'R':
        case 'r':
            return COMPRESSED_R_LOW;
        case 'S':
        case 's':
            return COMPRESSED_S_LOW;
        case 'V':
        case 'v':
            return COMPRESSED_V_LOW;
        case 'W':
        case 'w':
            return COMPRESSED_W_LOW;
        case 'Y':
        case 'y':
            return COMPRESSED_Y_LOW;
        case 'H':
        case 'h':
            return COMPRESSED_H_LOW;
        case 'K':
        case 'k':
            return COMPRESSED_K_LOW;
        case 'D':
        case 'd':
            return COMPRESSED_D_LOW;
        case 'B':
        case 'b':
            return COMPRESSED_B_LOW;
        default:
            throw new IllegalArgumentException("Bad  byte passed to charToCompressedBase: " + base);
        }
    }
}

Related

  1. BytesTobytes(Byte[] bytes)
  2. bytesToChars(byte[] bytes)
  3. bytesToChars(byte[] data, String enc)
  4. bytesToCharset(final byte[] bytes, final String charset)
  5. bytesToCompressedBases(final byte[] readBases)
  6. bytesToData(byte[][] fileData)
  7. bytesToDec(byte[] bytes)
  8. bytesToEntryCreditAddress(byte[] key)
  9. BytesToInt(byte abyte0[], int offset)