gets a translation into amino acids of a string of nucleotides. - Java Data Structure

Java examples for Data Structure:DNA

Description

gets a translation into amino acids of a string of nucleotides.

Demo Code

/*/*from w  w  w .  j  a va2  s .co m*/
 **  DNAUtils
 **  (c) Copyright 1997, Neomorphic Sofware, Inc.
 **  All Rights Reserved
 **
 **  CONFIDENTIAL
 **  DO NOT DISTRIBUTE
 **
 **  File: DNAUtils.java
 **
 */
import java.util.List;
import java.util.ArrayList;

public class Main{
    public static final int[] FRAME_MAPPING = { 0, 0, 0, 1, 2, -0, -1, -2 };
    public static final int ONE_LETTER_CODE = 100;
    public static final int THREE_LETTER_CODE = 101;
    /** Genetic Code in 1-character amino acid codes. by default set to default
     genetic code 1 */
    protected static String aa1[][][] = aa1Default;
    /** Genetic Code in 3-character amino acid codes, default set to gen code 1 */
    protected static String aa3[][][] = aa3Default;
    /**
     * gets a translation into amino acids of a string of nucleotides.
     *
     * @param s represents the string of nucleotides.
     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.
     *                  For reverse strand frames, 
     *                  translate the reverse complement.
     *                  Then reverse that result.
     * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE
     *                 indicating how many letters should encode each amino acid.
     * @return a representation of the amino acid sequence
     *         encoded by the given nucleotide sequence.
     */
    public static String translate(String s, int frametype, int codetype) {
        return translate(s, frametype, codetype, null, null, null);
    }
    /**
     * gets a translation into amino acids of a string of nucleotides.
     *
     * @param s represents the string of nucleotides.
     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.
     *                  For reverse strand frames, 
     *                  translate the reverse complement.
     *                  Then reverse that result.
     * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE
     *                 indicating how many letters should encode each amino acid.
     * @param initial_string what goes at front of entire translation
     * @param pre_string what goes before every amino acid
     * @param post_string what goes after every amino acid
     * @return a representation of the amino acid sequence
     *         encoded by the given nucleotide sequence.
     */
    public static String translate(String s, int frametype, int codetype,
            String initial_string, String pre_string, String post_string) {
        String result = null;
        if (codetype == ONE_LETTER_CODE || codetype == 1) {
            result = translate(s, frametype, getGeneticCodeOne(),
                    initial_string, pre_string, post_string);
        } else if (codetype == THREE_LETTER_CODE || codetype == 3) {
            result = translate(s, frametype, getGeneticCodeThree(),
                    initial_string, pre_string, post_string);
        }
        return result;
    }
    /**
     * gets a translation into amino acids of a string of nucleotides.
     *
     * @param s represents the string of nucleotides.
     * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE.
     *                  For reverse strand frames, 
     *                  translate the reverse complement.
     *                  Then reverse that result.
     * @param genetic_code the result of one of the getGeneticCode methods
     *                     of this class.
     * @param initial_string what goes at front of entire translation
     * @param pre_string what goes before every amino acid
     * @param post_string what goes after every amino acid
     * @return a representation of the amino acid sequence
     *         encoded by the given nucleotide sequence.
     * @see #getGeneticCodeOne
     * @see #getGeneticCodeThree
     */
    // currently only translates in +1, +2, +3 
    // for -1, -2, -3: translate reverse complement, then reverse result
    // initial_string is what goes at front of entire translation
    // pre_string is what goes before every amino acid
    // post_string is what goes after every amino acid
    public static String translate(String s, int frametype,
            String[][][] genetic_code, String initial_string,
            String pre_string, String post_string) {
        int frame = FRAME_MAPPING[frametype];

        int length = s.length();
        byte[] basenums = new byte[length];
        for (int i = 0; i < length; i++) {
            switch (s.charAt(i)) {
            case 'A':
            case 'a':
                basenums[i] = 0;
                break;
            case 'C':
            case 'c':
                basenums[i] = 1;
                break;
            case 'G':
            case 'g':
                basenums[i] = 2;
                break;
            case 'T':
            case 't':
                basenums[i] = 3;
                break;
            default:
                basenums[i] = 4;
                break;
            }
        }

        String residue;
        //    int residue_charsize = 3;
        int residue_charsize = genetic_code[0][0][0].length();
        if (pre_string != null) {
            residue_charsize += pre_string.length();
        }
        if (post_string != null) {
            residue_charsize += post_string.length();
        }

        StringBuffer amino_acids = new StringBuffer(length);
        // StringBuffer amino_acids = 
        //new StringBuffer(((int)(length-(int)Math.abs(frame))/3)*residue_charsize;

        if (initial_string != null)
            amino_acids.append(initial_string);

        // checking for no spaces, can build non-spaced faster by avoiding 
        //     amino_acids.append("") calls
        int extra_bases = (length - (int) Math.abs(frame)) % 3;
        int k = 0;
        if (pre_string == null && post_string == null) {
            for (int i = frame; i < length - 2; i += 3, k = i) {
                residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]];
                amino_acids.append(residue);

            }
            for (int i = 0; i < extra_bases; i++) {
                amino_acids.append(" ");
            }
        } else {
            if (pre_string == null) {
                pre_string = "";
            }
            if (post_string == null) {
                post_string = "";
            }
            for (int i = frame; i < length - 2; i += 3) {
                residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]];
                amino_acids.append(pre_string);
                amino_acids.append(residue);
                amino_acids.append(post_string);
            }
            for (int i = 0; i < extra_bases; i++) {
                amino_acids.append(" ");
            }
        }
        return amino_acids.toString();
    }
    /**
     * gets a representation of the genetic code.
     * The three dimensions of the array returned correspond
     * to the three nucleotides in a codon.
     * Each dimension ranges from 0 to 4
     * representing bases A, C, G, T, and N respectively.
     * Prefer the constants A, C, G, T, and N to the integers
     * when subscripting the array.
     *
     * @return the genetic code
     *         expressed in one-character amino acid codes.
     */
    public static String[][][] getGeneticCodeOne() {
        return aa1;
    }
    /**
     * gets a representation of the genetic code.
     * The three dimensions of the array returned correspond
     * to the three nucleotides in a codon.
     * Each dimension ranges from 0 to 4
     * representing bases A, C, G, T, and N respectively.
     * Prefer the constants A, C, G, T, and N to the integers
     * when subscripting the array.
     *
     * @return the genetic code
     *         expressed in three-character amino acid codes.
     */
    public static String[][][] getGeneticCodeThree() {
        return aa3;
    }
}

Related Tutorials