Java examples for Data Structure:DNA
gets a translation into amino acids of a string of nucleotides.
/*/*from w w w . j a va2 s .co m*/ ** DNAUtils ** (c) Copyright 1997, Neomorphic Sofware, Inc. ** All Rights Reserved ** ** CONFIDENTIAL ** DO NOT DISTRIBUTE ** ** File: DNAUtils.java ** */ import java.util.List; import java.util.ArrayList; public class Main{ public static final int[] FRAME_MAPPING = { 0, 0, 0, 1, 2, -0, -1, -2 }; public static final int ONE_LETTER_CODE = 100; public static final int THREE_LETTER_CODE = 101; /** Genetic Code in 1-character amino acid codes. by default set to default genetic code 1 */ protected static String aa1[][][] = aa1Default; /** Genetic Code in 3-character amino acid codes, default set to gen code 1 */ protected static String aa3[][][] = aa3Default; /** * gets a translation into amino acids of a string of nucleotides. * * @param s represents the string of nucleotides. * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE. * For reverse strand frames, * translate the reverse complement. * Then reverse that result. * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE * indicating how many letters should encode each amino acid. * @return a representation of the amino acid sequence * encoded by the given nucleotide sequence. */ public static String translate(String s, int frametype, int codetype) { return translate(s, frametype, codetype, null, null, null); } /** * gets a translation into amino acids of a string of nucleotides. * * @param s represents the string of nucleotides. * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE. * For reverse strand frames, * translate the reverse complement. * Then reverse that result. * @param codetype ONE_LETTER_CODE, or THREE_LETTER_CODE * indicating how many letters should encode each amino acid. * @param initial_string what goes at front of entire translation * @param pre_string what goes before every amino acid * @param post_string what goes after every amino acid * @return a representation of the amino acid sequence * encoded by the given nucleotide sequence. */ public static String translate(String s, int frametype, int codetype, String initial_string, String pre_string, String post_string) { String result = null; if (codetype == ONE_LETTER_CODE || codetype == 1) { result = translate(s, frametype, getGeneticCodeOne(), initial_string, pre_string, post_string); } else if (codetype == THREE_LETTER_CODE || codetype == 3) { result = translate(s, frametype, getGeneticCodeThree(), initial_string, pre_string, post_string); } return result; } /** * gets a translation into amino acids of a string of nucleotides. * * @param s represents the string of nucleotides. * @param frametype FRAME_ONE, FRAME_TWO, or FRAME_THREE. * For reverse strand frames, * translate the reverse complement. * Then reverse that result. * @param genetic_code the result of one of the getGeneticCode methods * of this class. * @param initial_string what goes at front of entire translation * @param pre_string what goes before every amino acid * @param post_string what goes after every amino acid * @return a representation of the amino acid sequence * encoded by the given nucleotide sequence. * @see #getGeneticCodeOne * @see #getGeneticCodeThree */ // currently only translates in +1, +2, +3 // for -1, -2, -3: translate reverse complement, then reverse result // initial_string is what goes at front of entire translation // pre_string is what goes before every amino acid // post_string is what goes after every amino acid public static String translate(String s, int frametype, String[][][] genetic_code, String initial_string, String pre_string, String post_string) { int frame = FRAME_MAPPING[frametype]; int length = s.length(); byte[] basenums = new byte[length]; for (int i = 0; i < length; i++) { switch (s.charAt(i)) { case 'A': case 'a': basenums[i] = 0; break; case 'C': case 'c': basenums[i] = 1; break; case 'G': case 'g': basenums[i] = 2; break; case 'T': case 't': basenums[i] = 3; break; default: basenums[i] = 4; break; } } String residue; // int residue_charsize = 3; int residue_charsize = genetic_code[0][0][0].length(); if (pre_string != null) { residue_charsize += pre_string.length(); } if (post_string != null) { residue_charsize += post_string.length(); } StringBuffer amino_acids = new StringBuffer(length); // StringBuffer amino_acids = //new StringBuffer(((int)(length-(int)Math.abs(frame))/3)*residue_charsize; if (initial_string != null) amino_acids.append(initial_string); // checking for no spaces, can build non-spaced faster by avoiding // amino_acids.append("") calls int extra_bases = (length - (int) Math.abs(frame)) % 3; int k = 0; if (pre_string == null && post_string == null) { for (int i = frame; i < length - 2; i += 3, k = i) { residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]]; amino_acids.append(residue); } for (int i = 0; i < extra_bases; i++) { amino_acids.append(" "); } } else { if (pre_string == null) { pre_string = ""; } if (post_string == null) { post_string = ""; } for (int i = frame; i < length - 2; i += 3) { residue = genetic_code[basenums[i]][basenums[i + 1]][basenums[i + 2]]; amino_acids.append(pre_string); amino_acids.append(residue); amino_acids.append(post_string); } for (int i = 0; i < extra_bases; i++) { amino_acids.append(" "); } } return amino_acids.toString(); } /** * gets a representation of the genetic code. * The three dimensions of the array returned correspond * to the three nucleotides in a codon. * Each dimension ranges from 0 to 4 * representing bases A, C, G, T, and N respectively. * Prefer the constants A, C, G, T, and N to the integers * when subscripting the array. * * @return the genetic code * expressed in one-character amino acid codes. */ public static String[][][] getGeneticCodeOne() { return aa1; } /** * gets a representation of the genetic code. * The three dimensions of the array returned correspond * to the three nucleotides in a codon. * Each dimension ranges from 0 to 4 * representing bases A, C, G, T, and N respectively. * Prefer the constants A, C, G, T, and N to the integers * when subscripting the array. * * @return the genetic code * expressed in three-character amino acid codes. */ public static String[][][] getGeneticCodeThree() { return aa3; } }