Java tutorial
package org.intermine.bio.io.gff3; /* * Copyright (C) 2002-2013 FlyMine * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. See the LICENSE file for more * information or http://www.gnu.org/copyleft/lesser.html. * */ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import org.apache.commons.lang3.StringEscapeUtils; import org.intermine.util.StringUtil; import org.intermine.util.XmlUtil; /** * A class that represents one line of a GFF3 file. Some of this code is * derived from BioJava. * * @author Kim Rutherford */ public class GFF3Record { private String sequenceID; private String source; private String type; private int start; private int end; private Double score; private String strand; private String phase; private Map<String, List<String>> attributes = new LinkedHashMap<String, List<String>>(); /** * Create a GFF3Record from a line of a GFF3 file * @param line the String to parse * @throws IOException if there is an error during parsing the line */ public GFF3Record(String line) throws IOException { StringTokenizer st = new StringTokenizer(line, "\t", false); if (st.countTokens() < 8) { throw new IOException("GFF line too short (" + st.countTokens() + " fields): " + line); } sequenceID = XmlUtil.fixEntityNames(URLDecoder.decode(st.nextToken(), "UTF-8")).trim(); source = st.nextToken().trim(); if ("".equals(source) || ".".equals(source)) { source = null; } type = st.nextToken().trim(); String startString = st.nextToken().trim(); try { if (".".equals(startString)) { start = -1; } else { start = Integer.parseInt(startString); } } catch (NumberFormatException nfe) { throw new IOException( "can not parse integer for start position: " + startString + " from line: " + line); } String endString = st.nextToken().trim(); try { if (".".equals(endString)) { end = -1; } else { end = Integer.parseInt(endString); } } catch (NumberFormatException nfe) { throw new IOException("can not parse integer for end position: " + endString + " from line: " + line); } String scoreString = st.nextToken().trim(); if ("".equals(scoreString) || ".".equals(scoreString)) { score = null; } else { try { score = new Double(scoreString); } catch (NumberFormatException nfe) { throw new IOException("can not parse score: " + scoreString + " from line: " + line); } } strand = st.nextToken().trim(); if ("".equals(strand) || ".".equals(strand)) { strand = null; } phase = st.nextToken().trim(); if ("".equals(phase) || ".".equals(phase)) { phase = null; } if (st.hasMoreTokens()) { parseAttribute(st.nextToken(), line); } } /** * Create a new GFF3Record * @param sequenceID the sequence name * @param source the source * @param type the feature type * @param start the start coordinate on the sequence given by sequenceID * @param end the end coordinate on the sequence * @param score the feature score or null if there is no score * @param strand the feature strand or null * @param phase the phase or null * @param attributes a Map from attribute name to a List of attribute values */ public GFF3Record(String sequenceID, String source, String type, int start, int end, Double score, String strand, String phase, Map<String, List<String>> attributes) { this.sequenceID = sequenceID.trim(); this.source = source.trim(); this.type = type.trim(); this.start = start; this.end = end; this.score = score; if (strand != null) { this.strand = strand.trim(); } if (phase != null) { this.phase = phase.trim(); } this.attributes = attributes; } private void parseAttribute(String argAttributeString, String line) throws IOException { String attributeString = StringEscapeUtils.unescapeHtml4(argAttributeString); String[] sTok = attributeString.split("(?<!\\\\);"); for (int j = 0; j < sTok.length; j++) { String attVal = sTok[j].trim(); if (attVal.length() == 0) { continue; } String attName; List<String> valList = new ArrayList<String>(); int spaceIndx = attVal.indexOf("="); if (spaceIndx == -1) { throw new IOException( "the attributes section must contain name=value pairs, " + "while parsing: " + line); } else { attName = attVal.substring(0, spaceIndx); attributeString = attVal.substring(spaceIndx + 1).trim(); if (!"\"\"".equals(attributeString)) { while (attributeString.length() > 0) { if (attributeString.startsWith("\"")) { attributeString = attributeString.substring(1); int quoteIndx = attributeString.indexOf("\""); if (quoteIndx > 0) { valList.add(attributeString.substring(0, quoteIndx)); attributeString = attributeString.substring(quoteIndx + 1).trim(); if (attributeString.startsWith(",")) { attributeString = attributeString.substring(1).trim(); } } else { throw new IOException("unmatched quote in this line: " + line + " (reading attribute: " + attName + ", " + attributeString + ")"); } } else { int commaIndx = attributeString.indexOf(","); if (commaIndx == -1) { valList.add(attributeString); attributeString = ""; } else { valList.add(attributeString.substring(0, commaIndx)); attributeString = attributeString.substring(commaIndx + 1).trim(); } } } } } // Decode values for (int i = 0; i < valList.size(); i++) { String value = valList.get(i); if (!"Target".equals(attName) && !"Gap".equals(attName)) { value = URLDecoder.decode(value, "UTF-8"); } value = XmlUtil.fixEntityNames(value); valList.set(i, value); } attributes.put(attName, valList); } } /** * Return the sequenceID field of this record. * @return the sequenceID field of this record */ public String getSequenceID() { return sequenceID; } /** * Return the source field of this record. * @return the source field of this record */ public String getSource() { return source; } /** * Return the type field of this record. * @return the type field of this record */ public String getType() { return type; } /** * Set the type of this record. * @param type the new type */ public void setType(String type) { this.type = type; } /** * Return the start field of this record. * @return the start field of this record */ public int getStart() { return start; } /** * Return the end field of this record. * @return the end field of this record */ public int getEnd() { return end; } /** * Return the score field of this record. * @return the score field of this record */ public Double getScore() { return score; } /** * Return the strand field of this record. * @return returns null if the strand is unset (ie. with an empty field or contained "." in the * original GFF3 file) */ public String getStrand() { return strand; } /** * Return the phase field of this record. * @return returns null if the phase is unset (ie. with an empty field or contained "." in the * original GFF3 file) */ public String getPhase() { return phase; } /** * Return the first value of the Id field from the attributes of this record. * @return the Id from the attributes of this record or null of there isn't a value */ public String getId() { if (getAttributes().containsKey("ID")) { return getAttributes().get("ID").get(0); } else { return null; } } /** * Set the Id of this GFF3Record. * @param id the new id */ public void setId(String id) { attributes.put("ID", Collections.singletonList(id)); } /** * Return the list of the Name field from the attributes of this record. * @return the Name from the attributes of this record or null of there isn't a value */ public List<String> getNames() { if (getAttributes().containsKey("Name")) { return getAttributes().get("Name"); } else { return null; } } /** * Return the first value of the Alias field from the attributes of this record. * @return the Alias from the attributes of this record or null of there isn't a value */ public String getAlias() { if (getAttributes().containsKey("Alias")) { return getAttributes().get("Alias").get(0); } else { return null; } } /** * Return the list of the Parent field from the attributes of this record. * @return the Parent from the attributes of this record or null of there isn't a value */ public List<String> getParents() { if (getAttributes().containsKey("Parent")) { return getAttributes().get("Parent"); } else { return null; } } /** * Return the first value of the Target field from the attributes of this record. * @return the Target from the attributes of this record or null of there isn't a value */ public String getTarget() { if (getAttributes().containsKey("Target")) { return getAttributes().get("Target").get(0); } else { return null; } } /** * Return the first value of the Gap field from the attributes of this record. * @return the Gap from the attributes of this record or null of there isn't a value */ public String getGap() { if (getAttributes().containsKey("Gap")) { return getAttributes().get("Gap").get(0); } else { return null; } } /** * Return the first value of the Note field from the attributes of this record. * @return the Note from the attributes of this record or null of there isn't a value */ public String getNote() { if (getAttributes().containsKey("Note")) { return getAttributes().get("Note").get(0); } else { return null; } } /** * Return the first value of the Dbxref field from the attributes of this record. * @return the Dbxref from the attributes of this record or null of there isn't a value */ public List<String> getDbxrefs() { if (getAttributes().containsKey("Dbxref")) { return getAttributes().get("Dbxref"); } else { return null; } } /** * Return the first value of the OntologyTerm field from the attributes of this record. * @return the OntologyTerm from the attributes of this record or null of there isn't a value */ public String getOntologyTerm() { if (getAttributes().containsKey("Ontology_term")) { return getAttributes().get("Ontology_term").get(0); } else { return null; } } /** * Return the attributes of this record as a Map from attribute key to Lists of attribute * values. * @return the attributes of this record */ public Map<String, List<String>> getAttributes() { return attributes; } /** * {@inheritDoc} */ @Override public String toString() { return "<GFF3Record: sequenceID: " + sequenceID + " source: " + source + " type: " + type + " start: " + start + " end: " + end + " score: " + score + " strand: " + strand + " phase: " + phase + " attributes: " + attributes + ">"; } /** * Return this record in GFF format. The String is suitable for output to a GFF file. * @return a GFF line */ public String toGFF3() { try { return URLEncoder.encode(sequenceID, "UTF-8") + "\t" + ((source == null) ? "." : source) + "\t" + type + "\t" + start + "\t" + end + "\t" + ((score == null) ? "." : score.toString()) + "\t" + ((strand == null) ? "." : strand) + "\t" + ((phase == null) ? "." : phase) + "\t" + writeAttributes(); } catch (UnsupportedEncodingException e) { throw new RuntimeException("error while encoding: " + sequenceID, e); } } private String writeAttributes() { StringBuffer sb = new StringBuffer(); boolean first = true; for (Map.Entry<String, List<String>> entry : attributes.entrySet()) { if (!first) { sb.append(";"); } first = false; String listValue; List<String> oldList = entry.getValue(); List<String> encodedList = new ArrayList<String>(oldList); for (int i = 0; i < encodedList.size(); i++) { Object oldValue = encodedList.get(i); String newValue; try { newValue = URLEncoder.encode("" + oldValue, "UTF-8"); newValue = newValue.replaceAll("\\+", " "); // decode white space from "+" newValue = newValue.replaceAll("%3A", ":"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("error while encoding: " + oldValue, e); } encodedList.set(i, newValue); } listValue = StringUtil.join(encodedList, ","); sb.append(entry.getKey() + "=" + listValue); } return sb.toString(); } }