Java tutorial
/* * The MIT License (MIT) * * Copyright (c) 2007-2015 Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.broad.igv.feature; import com.google.common.base.Objects; import com.google.common.collect.Table; import com.google.common.collect.TreeBasedTable; import com.google.gson.*; import org.apache.log4j.Logger; import org.broad.igv.track.SequenceTrack; import org.broad.igv.util.ParsingUtils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.*; /** * @author jrobinso */ public class AminoAcidManager { private static final Logger log = Logger.getLogger(AminoAcidManager.class); /** * File which contains listing of amino acid names. * Format: Full Name \t 3 letter abbreviation \t Single letter abbrev. */ private static final String AANameFilePath = "resources/AANamesTable.txt"; /** * Table containing mapping from string forms (full, TLA, single-letter-abbrev) * to amino acid object. No codon information stored here */ private static final Map<String, AminoAcid> AANameMap = new HashMap<String, AminoAcid>(20); private static final String[] BASE_SEQUENCES = { "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG", "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG", "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG" }; static final String DEFAULT_CODON_TABLE_PATH = "resources/geneticCode.json"; static final String DEFAULT_TRANS_TABLE_PATH = "resources/defaultTranslationTables.json"; //ID of the "standard" translation table public static final int STANDARD_TABLE_ID = 1; private static final String DEFAULT_CHROMO_KEY = "default"; private LinkedHashMap<CodonTableKey, CodonTable> allCodonTables = new LinkedHashMap<CodonTableKey, CodonTable>( 20); private CodonTable currentCodonTable; private static Table<String, String, CodonTableKey> genomeChromoTable = TreeBasedTable.create(); private static AminoAcidManager instance; private AminoAcidManager() { initAANameMap(); try { loadDefaultTranslationTables(); } catch (JsonParseException e) { log.error(e); } } public static AminoAcidManager getInstance() { if (instance == null) { try { AminoAcidManager newInstance = new AminoAcidManager(); newInstance.loadCodonTables(DEFAULT_CODON_TABLE_PATH); instance = newInstance; } catch (IOException e) { handleExceptionLoading(e); } catch (JsonParseException e) { handleExceptionLoading(e); } } return instance; } /** * Reset the codon table to the default file, * and the current codon table to the default contained * in that file * * @return Instance of AminoAcidManager, for chaining */ public static AminoAcidManager resetToDefaultCodonTables() { instance = null; return getInstance(); } private static void handleExceptionLoading(Exception e) { log.error(e); if (instance == null) { throw new IllegalStateException("No codon table present, and error loading " + DEFAULT_CODON_TABLE_PATH, e); } } /** * Removes all codon tables. * Mainly for testing */ synchronized void clear() { allCodonTables.clear(); currentCodonTable = null; } /** * Each codon translation table is identified by an integer id * These are specified in the file. We specify a table * by filename/id combination * * @param codonTablePath * @param id * @return Whether setting the table was successful */ public boolean setCodonTable(String codonTablePath, int id) { CodonTableKey key = new CodonTableKey(codonTablePath, id); return setCodonTable(key); } public boolean setCodonTable(CodonTableKey key) { if (allCodonTables.containsKey(key)) { currentCodonTable = allCodonTables.get(key); return true; } else { return false; } } /** * @param codon 3-letter nucleotide sequence * @return The amino acid represented by this codon, as * decoded from the current codon table */ public AminoAcid getAminoAcid(String codon) { return currentCodonTable.getAminoAcid(codon); } /** * Return a list of amino acids for the input sequence of nucleotides * * @param direction * @param sequence * @return */ List<AminoAcid> getAminoAcids(Strand direction, String sequence) { // Sequence must be divisible by 3. It is the responsibility of the // calling program to send a sequence properly aligned. int readLength = sequence.length() / 3; List<AminoAcid> acids = new ArrayList<AminoAcid>(readLength); if (direction == Strand.NEGATIVE) { sequence = SequenceTrack.getReverseComplement(sequence); } for (int i = 0; i <= sequence.length() - 3; i += 3) { String codon = sequence.substring(i, i + 3).toUpperCase(); AminoAcid aa = currentCodonTable.getAminoAcid(codon); acids.add(aa); } if (direction == Strand.NEGATIVE) { Collections.reverse(acids); } return acids; } /** * Get the amino acid sequence for an interval. * Assumptions and conventions * * The start and end positions are on the positive strand * irrespective of the read direction. * * Reading will begin from the startPosition if strand == POSITIVE, endPosition if NEGATIVE * * @return AminoAcidSequence, or null if seqBytes == null */ public AminoAcidSequence getAminoAcidSequence(Strand strand, int start, String nucSequence) { if (nucSequence == null) { return null; } else { int l = nucSequence.length(); int rem = l % 3; int aaStart = strand == Strand.POSITIVE ? 0 : 0 + rem; List<AminoAcid> acids = getAminoAcids(strand, nucSequence); return new AminoAcidSequence(strand, start + aaStart, acids, currentCodonTable.getKey()); } } /** * Given the 'name' of an amino acid, find a match. Lookups * can be by full name, short form, or single letter. Note that * in the case of multiple matches, the first is returned. * This matters most for the stop codon, whose full name * is ambiguous (ochre, amber, opal) if the the short form * or single letter is used. * * @param name * @return */ public static AminoAcid getAminoAcidByName(String name) { initAANameMap(); AminoAcid aa = AANameMap.get(name); if (aa == null) { aa = AminoAcid.NULL_AMINO_ACID; } return aa; } public Set<String> getMappingSNPs(String codon, AminoAcid mutAA) { Set<String> mapSNPs = new HashSet<String>(); Set<String> SNPs = getAllSNPs(codon); for (String modCodon : SNPs) { //We use short name because all 3 stop codon have different long names, //and we don't care about the difference here. if (currentCodonTable.getAminoAcid(modCodon).equalsByName(mutAA.getShortName())) { mapSNPs.add(modCodon); } } return mapSNPs; } /** * Gets all possible strings which are a SNP from * the provided sequence. Does not include original in * returned set. Assumes sequence is DNA sequence, consisting * of A,T,G,C, and uses that set to create SNPs. * * @param sequence * @return */ public static Set<String> getAllSNPs(String sequence) { Set<String> SNPs = new HashSet<String>(); char[] bps = "ATGC".toCharArray(); char[] orig = sequence.toCharArray(); char[] mod; for (int loc = 0; loc < orig.length; loc++) { mod = orig.clone(); for (char bp : bps) { if (bp == orig[loc]) { continue; } mod[loc] = bp; SNPs.add(new String(mod)); } } return SNPs; } /** * Load codon tables from the specified path. If any exceptions occur * while loading, no changes are made to this instance. * <p/> * Note that the new codon tables are ADDED to the existing tables * <p/> * The currentCodonTable is set to be the codonTable with id = defaultid if present * If not, the first one in the array is set as default * * @param codonTablesPath * @return */ synchronized void loadCodonTables(String codonTablesPath) throws IOException, JsonParseException { LinkedHashMap<CodonTableKey, CodonTable> newCodonTables = new LinkedHashMap<CodonTableKey, CodonTable>(20); CodonTable defaultCodonTable = null; InputStream is = AminoAcidManager.class.getResourceAsStream(codonTablesPath); if (is == null) { is = ParsingUtils.openInputStream(codonTablesPath); } if (codonTablesPath.endsWith(".json")) { JsonObject allData = readJSONFromStream(is); int defaultId = -1; defaultId = allData.get("defaultid").getAsInt(); JsonArray codonArray = allData.get("Genetic-code-table").getAsJsonArray(); if (codonArray.size() == 0) { throw new JsonParseException("JSON File has empty array for Genetic-code-table"); } for (int ca = 0; ca < codonArray.size(); ca++) { CodonTable curTable = CodonTable.createFromJSON(codonTablesPath, codonArray.get(ca).getAsJsonObject()); newCodonTables.put(curTable.getKey(), curTable); if (defaultCodonTable == null || curTable.getId() == defaultId) { defaultCodonTable = curTable; } } } else { throw new IllegalArgumentException("Unknown file type, must be .json"); } allCodonTables.putAll(newCodonTables); currentCodonTable = defaultCodonTable; } // private static JsonObject readJSONFromStream(InputStream is) throws JsonParseException { // BufferedReader reader = new BufferedReader(new InputStreamReader(is)); // JSONTokener tokener = new JSONTokener(reader); // return new JsonObject(tokener); // } private static JsonObject readJSONFromStream(InputStream is) { BufferedReader reader = new BufferedReader(new InputStreamReader(is)); JsonParser parser = new JsonParser(); return parser.parse(reader).getAsJsonObject(); } /** * Initialize table of amino acid names, for easy lookup of * AminoAcid by symbols. This method is idempotent, only called once * to read name file. */ private synchronized static void initAANameMap() { if (!AANameMap.isEmpty()) { return; } try { InputStream is = AminoAcidManager.class.getResourceAsStream(AANameFilePath); if (is == null) { return; } BufferedReader reader = new BufferedReader(new InputStreamReader(is)); String nextLine; while ((nextLine = reader.readLine()) != null) { if (nextLine.startsWith("#")) continue; String[] tokens = nextLine.split("\t"); if (tokens.length == 3) { String fullName = tokens[0].trim(); String shortName = tokens[1].trim(); String symbol = tokens[2].trim(); assert symbol.length() == 1; AminoAcid aa = new AminoAcid(fullName, shortName, symbol.charAt(0)); for (String sym : new String[] { fullName, shortName, symbol }) { if (!AANameMap.containsKey(sym)) { AANameMap.put(sym, aa); } } } } } catch (IOException ex) { log.error(ex); throw new RuntimeException(ex); } } public Collection<CodonTable> getAllCodonTables() { return Collections.unmodifiableCollection(allCodonTables.values()); } public CodonTable getCodonTable() { return currentCodonTable; } private static void loadDefaultTranslationTables() throws JsonParseException { InputStream is = AminoAcidManager.class.getResourceAsStream(DEFAULT_TRANS_TABLE_PATH); JsonObject allData = readJSONFromStream(is); JsonArray organisms = allData.get("organisms").getAsJsonArray(); for (int ind = 0; ind < organisms.size(); ind++) { JsonObject obj = organisms.get(ind).getAsJsonObject(); //Process each translation table setting String genomeId = obj.get("genomeId").getAsString(); String codonTablePath = DEFAULT_CODON_TABLE_PATH; try { Object tmpPath = obj.get("codonTablePath"); if (tmpPath != null && tmpPath != JsonNull.INSTANCE && tmpPath instanceof String) { codonTablePath = (String) tmpPath; } } catch (JsonParseException e) { log.error("No codon table path found in " + DEFAULT_TRANS_TABLE_PATH + ". Using default: " + codonTablePath); } JsonObject chromosomes = obj.get("chromosomes").getAsJsonObject(); Iterator<Map.Entry<String, JsonElement>> iterator = chromosomes.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry<String, JsonElement> entry = iterator.next(); String chromoName = entry.getKey(); int id = entry.getValue().getAsInt(); CodonTableKey key = new CodonTableKey(codonTablePath, id); genomeChromoTable.put(genomeId, chromoName, key); } } } // /** // * Load the default codon table for the given genome and chromosome. // * We check the given name, alias, and finally use the default for the specified // * genome. // * // * @param genome // * @param chrName // */ // public void loadDefaultCodonTable(Genome genome, String chrName) { // Map<String, CodonTableKey> chrMap = genomeChromoTable.row(genome.getId()); // String[] tryChromos = new String[]{ // chrName, genome.getCanonicalChrName(chrName), DEFAULT_CHROMO_KEY // }; // for (String tryChromo : tryChromos) { // if (chrMap.containsKey(tryChromo)) { // setCodonTable(chrMap.get(tryChromo)); // return; // } // } // } public static class CodonTableKey { private final String sourcePath; private final int id; private CodonTableKey(String sourcePath, int id) { this.sourcePath = sourcePath; this.id = id; } @Override public boolean equals(Object object) { if (object instanceof CodonTableKey) { CodonTableKey other = (CodonTableKey) object; return this.id == other.id && Objects.equal(this.sourcePath, other.sourcePath); } return false; } @Override public int hashCode() { return Objects.hashCode(this.sourcePath, this.id); } public int getId() { return id; } } /** * Store information about current codon translation table. * Intended to be loaded from external resource, and then never modified. * To that end, collections contained here are set to be unmodifiable */ public static class CodonTable { private final CodonTableKey key; private final List<String> names; private final Set<AminoAcid> starts; private final Map<String, AminoAcid> codonMap; /** * Get the amino acid represented by this codon * * @param codon * @return */ public AminoAcid getAminoAcid(String codon) { if (codon.length() != 3) { throw new IllegalArgumentException("Codon must be length 3: " + codon); } AminoAcid aa = codonMap.get(codon); if (aa == null) { return AminoAcid.NULL_AMINO_ACID; } return aa; } private CodonTable(String path, int id, List<String> names, Set<AminoAcid> starts, Map<String, AminoAcid> codonMap) { this.key = new CodonTableKey(path, id); this.names = Collections.unmodifiableList(names); this.starts = Collections.unmodifiableSet(starts); this.codonMap = Collections.unmodifiableMap(codonMap); } private static CodonTable createFromJSON(String sourcePath, JsonObject jsonObject) throws JsonParseException { int id = jsonObject.get("id").getAsInt(); JsonArray jsonnames = jsonObject.get("name").getAsJsonArray(); List<String> names = new ArrayList<String>(jsonnames.size()); for (int nn = 0; nn < jsonnames.size(); nn++) { names.add(jsonnames.get(nn).getAsString()); } //Data is written as several long strings which line up String aas = jsonObject.get("ncbieaa").getAsString(); String startString = jsonObject.get("sncbieaa").getAsString(); return build(sourcePath, id, names, aas, startString); } private static CodonTable build(String sourcePath, int id, List<String> names, String aas, String startString) { String base1 = BASE_SEQUENCES[0]; String base2 = BASE_SEQUENCES[1]; String base3 = BASE_SEQUENCES[2]; checkLengths(base1, base2, base3, aas, startString); Map<String, AminoAcid> codonMap = new HashMap<String, AminoAcid>(aas.length()); Set<AminoAcid> starts = new HashSet<AminoAcid>(aas.length()); for (int cc = 0; cc < aas.length(); cc++) { String codon = base1.substring(cc, cc + 1) + base2.substring(cc, cc + 1) + base3.substring(cc, cc + 1); AminoAcid aa = AANameMap.get(aas.substring(cc, cc + 1)); codonMap.put(codon, aa); if (startString.charAt(cc) == 'M') { starts.add(aa); } } return new CodonTable(sourcePath, id, names, starts, codonMap); } private static void checkLengths(String... values) { int length = values[0].length(); assert length == 64; for (int v = 1; v < values.length; v++) { if (values[v].length() != length) { String msg = "Amino acid and codon strings must all be the same length."; msg += "Expected length " + length + ", found length " + values[v].length(); throw new InputMismatchException(msg); } } } public int getId() { return key.id; } public String getDisplayName() { return names.get(0); } public Set<AminoAcid> getStarts() { return starts; } Map<String, AminoAcid> getCodonMap() { return codonMap; } @Override public boolean equals(Object object) { if (object instanceof CodonTable) { CodonTable other = (CodonTable) object; return Objects.equal(this.key, other.key) && Objects.equal(this.names, other.names) && Objects.equal(this.starts, other.starts) && Objects.equal(this.codonMap, other.codonMap); } return false; } @Override public int hashCode() { return Objects.hashCode(this.key.id, this.key.sourcePath, this.names, this.starts, this.codonMap); } public CodonTableKey getKey() { return key; } } }