org.intermine.bio.dataconversion.GoAnnotationDbConverter.java Source code

Java tutorial

Introduction

Here is the source code for org.intermine.bio.dataconversion.GoAnnotationDbConverter.java

Source

package org.intermine.bio.dataconversion;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

/*
 * Copyright (C) 2002-2016 FlyMine
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  See the LICENSE file for more
 * information or http://www.gnu.org/copyleft/lesser.html.
 *
 */

import org.intermine.dataconversion.ItemWriter;
import org.intermine.metadata.Model;
import org.intermine.metadata.StringUtil;
import org.intermine.metadata.TypeUtil;
import org.intermine.model.bio.BioEntity;
import org.intermine.objectstore.ObjectStoreException;
import org.intermine.sql.Database;
import org.intermine.util.PropertiesUtil;
import org.intermine.xml.full.Item;
import org.intermine.xml.full.ReferenceList;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.sql.Array;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/**
 * 
 * @author
 */
public class GoAnnotationDbConverter extends BioDBConverter {

    // configuration maps
    private static final Map<String, String> WITH_TYPES = new LinkedHashMap<String, String>();

    // maps retained across all files
    protected Map<String, String> goTerms = new LinkedHashMap<String, String>();
    private Map<String, String> evidenceCodes = new LinkedHashMap<String, String>();
    private Map<String, String> dataSets = new LinkedHashMap<String, String>();
    private Map<String, String> publications = new LinkedHashMap<String, String>();
    private Map<String, Item> organisms = new LinkedHashMap<String, Item>();
    protected Map<String, String> productMap = new LinkedHashMap<String, String>();
    private Set<String> dbRefs = new HashSet<String>();
    @SuppressWarnings("unused")
    private Map<String, String> databaseAbbreviations = new HashMap<String, String>();
    private Map<String, String> dataSources = new HashMap<String, String>();

    // maps renewed for each file
    private Map<GoTermToGene, Set<Evidence>> goTermGeneToEvidence = new LinkedHashMap<GoTermToGene, Set<Evidence>>();
    private Map<Integer, List<String>> productCollectionsMap = new LinkedHashMap<Integer, List<String>>();
    private Map<String, Integer> storedProductIds = new HashMap<String, Integer>();

    // These should be altered for different ontologies:
    protected String termClassName = "GOTerm";
    protected String termCollectionName = "goAnnotation";
    protected String annotationClassName = "GOAnnotation";
    private String gaff = "2.0";
    private static final String DEFAULT_ANNOTATION_TYPE = "gene";
    private static final String DEFAULT_IDENTIFIER_FIELD = "primaryIdentifier";

    private static final Logger LOG = Logger.getLogger(GoAnnotationDbConverter.class);

    // 
    private static final String DATASET_TITLE = "SGD curated GO data";
    private static final String DATA_SOURCE_NAME = "SGD";
    private static final GoAnnotationDbProcessor PROCESSOR = new GoAnnotationDbProcessor();
    private Map<String, Item> genes = new HashMap();
    private static final Integer TAXON_ID = 4932;
    private static final Integer HUMAN_TAXON_ID = 9606;
    //private Item organism;

    /**
     * Constructor
     *
     * @param writer the ItemWriter used to handle the resultant items
     * @param model the Model
     * @throws Exception if an error occurs in storing or finding Model
     */
    public GoAnnotationDbConverter(Database database, Model model, ItemWriter writer) throws ObjectStoreException {
        super(database, model, writer);
    }

    /**
     * Sets the file format for the GAF.  2.0 is the default.
     *
     * @param gaff GO annotation file format
     */
    public void setGaff(String gaff) {
        this.gaff = gaff;
    }

    static {
        WITH_TYPES.put("FB", "Gene");
        WITH_TYPES.put("UniProt", "Protein");
    }

    /**
     * {@inheritDoc}
     */
    public void process() throws Exception {
        Connection connection = getDatabase().getConnection();
        processGOAnnotations(connection);
    }

    /**
     * {@inheritDoc}
     */
    public void processGOAnnotations(Connection connection) throws SQLException, ObjectStoreException {

        Item organism = newOrganism("4932");

        ResultSet res = PROCESSOR.getGOAnnotations(connection);

        while (res.next()) {

            String productId = res.getString("sgdid");
            String symbol = res.getString("gene_name");
            String qualifier = res.getString("go_qualifier");
            String goId = res.getString("goid");
            String pub = res.getString("pmid");
            String pubxref = res.getString("sgdrefid");
            String strEvidence = res.getString("evidence_code");
            Array withTextDb = res.getArray("withText");
            String annotType = res.getString("annotation_type");
            String taxonId = parseTaxonId(res.getString("taxid"));
            String dataSourceCode = res.getString("source");
            Array annotExt = res.getArray("annotext");
            Array annotExtPre = res.getArray("annotextension");

            String annotationExtension = "";
            String[] pre = (String[]) annotExtPre.getArray();
            String[] val = (String[]) annotExt.getArray();
            for (int i = 0; i < pre.length; i++) {
                if (pre[i] != null && val[i] != null && i < (pre.length - 1)) {
                    annotationExtension += pre[i] + " (" + val[i] + "); ";
                } else if (pre[i] != null && i < pre.length) {
                    annotationExtension += pre[i] + " (" + val[i] + ")";
                }
            }

            String[] with = (String[]) withTextDb.getArray();
            String withText = "";
            for (int j = 0; j < with.length; j++) {
                if (with[j] != null && j < (with.length - 1)) {
                    withText += with[j] + ",";
                } else if (with[j] != null && j < with.length) {
                    withText += with[j];
                }
            }

            if (StringUtils.isNotEmpty(strEvidence)) {
                storeEvidenceCode(strEvidence, annotType, withText);
            } else {
                throw new IllegalArgumentException("Evidence is a required column but not " + "found for goterm "
                        + goId + " and productId " + productId);
            }

            GoTermToGene key = new GoTermToGene(productId, goId, qualifier, withText, annotationExtension, pub,
                    pubxref); // create unique key for go annotation

            String dataSource = DATA_SOURCE_NAME;
            String type = "gene";

            String productIdentifier = newProduct(productId, type, organism, dataSource, dataSourceCode, true,
                    "primaryIdentifier");

            if (productIdentifier != null) { // null if resolver could not resolve an identifier

                String pubRefId = newPublication(pub, pubxref); // null if no pub found

                Set<Evidence> allEvidenceForAnnotation = goTermGeneToEvidence.get(key); // get evidence codes for this goterm|gene pair

                // new evidence
                String newStrEvidence = "";
                if (StringUtils.isEmpty(withText)) {
                    newStrEvidence = strEvidence + ":" + annotType;
                } else {
                    newStrEvidence = strEvidence + ":" + annotType + ":" + withText;
                }

                if (allEvidenceForAnnotation == null) { //|| !StringUtils.isEmpty(withText)
                    String goTermIdentifier = newGoTerm(goId, dataSource, dataSourceCode);
                    Evidence evidence = new Evidence(newStrEvidence, pubRefId, withText, organism, dataSource,
                            dataSourceCode);
                    allEvidenceForAnnotation = new LinkedHashSet<Evidence>();
                    allEvidenceForAnnotation.add(evidence);
                    goTermGeneToEvidence.put(key, allEvidenceForAnnotation);
                    Integer storedAnnotationId = createGoAnnotation(productIdentifier, type, goTermIdentifier,
                            organism, qualifier, dataSource, dataSourceCode, annotationExtension);
                    evidence.setStoredAnnotationId(storedAnnotationId);
                } else {
                    boolean seenEvidenceCode = false;
                    Integer storedAnnotationId = null;

                    for (Evidence evidence : allEvidenceForAnnotation) {
                        String evidenceCode = evidence.getEvidenceCode();
                        storedAnnotationId = evidence.storedAnnotationId;
                        // already have evidence code, just add pub
                        if (evidenceCode.equals(newStrEvidence)) {
                            evidence.addPublicationRefId(pubRefId);
                            seenEvidenceCode = true;
                        }
                    }
                    if (!seenEvidenceCode) {
                        Evidence evidence = new Evidence(newStrEvidence, pubRefId, withText, organism, dataSource,
                                dataSourceCode);
                        evidence.storedAnnotationId = storedAnnotationId;
                        allEvidenceForAnnotation.add(evidence);
                    }
                }
            }
        }
        storeProductCollections();
        storeEvidence();
    }

    private void storeProductCollections() throws ObjectStoreException {
        for (Map.Entry<Integer, List<String>> entry : productCollectionsMap.entrySet()) {
            Integer storedProductId = entry.getKey();
            List<String> annotationIds = entry.getValue();
            ReferenceList goAnnotation = new ReferenceList(termCollectionName, annotationIds);
            store(goAnnotation, storedProductId);
        }
    }

    private void storeEvidence() throws ObjectStoreException {
        for (Set<Evidence> annotationEvidence : goTermGeneToEvidence.values()) {
            List<String> evidenceRefIds = new ArrayList<String>();
            Integer goAnnotationRefId = null;
            for (Evidence evidence : annotationEvidence) {
                Item goevidence = createItem("GOEvidence");
                goevidence.setReference("code", evidenceCodes.get(evidence.getEvidenceCode()));
                List<String> publicationEvidence = evidence.getPublications();
                if (!publicationEvidence.isEmpty()) {
                    goevidence.setCollection("publications", publicationEvidence);
                }

                // with objects
                /* if (!StringUtils.isEmpty(evidence.withText)) {
                goevidence.setAttribute("withText", evidence.withText);
                List<String> with = createWithObjects(evidence.withText, evidence.organism,
                        evidence.dataSource, evidence.dataSourceCode);
                if (!with.isEmpty()) {
                    goevidence.addCollection(new ReferenceList("with", with));
                }
                    }*/
                //goevidence.setAttribute("withText", evidence.withText);
                store(goevidence);
                evidenceRefIds.add(goevidence.getIdentifier());
                goAnnotationRefId = evidence.getStoredAnnotationId();
            }

            ReferenceList refIds = new ReferenceList("evidence", new ArrayList<String>(evidenceRefIds));
            store(refIds, goAnnotationRefId);
        }
    }

    private Integer createGoAnnotation(String productIdentifier, String productType, String termIdentifier,
            Item organism, String qualifier, String dataSource, String dataSourceCode, String annotationExtension)
            throws ObjectStoreException {
        Item goAnnotation = createItem(annotationClassName);
        goAnnotation.setReference("subject", productIdentifier);
        goAnnotation.setReference("ontologyTerm", termIdentifier);

        if (!StringUtils.isEmpty(qualifier)) {
            goAnnotation.setAttribute("qualifier", qualifier);
        }
        if (!StringUtils.isEmpty(annotationExtension)) {
            goAnnotation.setAttribute("annotationExtension", annotationExtension);
        }

        goAnnotation.addToCollection("dataSets", getDataset(dataSource, dataSourceCode));
        if ("gene".equals(productType)) {
            addProductCollection(productIdentifier, goAnnotation.getIdentifier());
        }
        Integer storedAnnotationId = store(goAnnotation);
        return storedAnnotationId;
    }

    private void addProductCollection(String productIdentifier, String goAnnotationIdentifier) {
        Integer storedProductId = storedProductIds.get(productIdentifier);
        List<String> annotationIds = productCollectionsMap.get(storedProductId);
        if (annotationIds == null) {
            annotationIds = new ArrayList<String>();
            productCollectionsMap.put(storedProductId, annotationIds);
        }
        annotationIds.add(goAnnotationIdentifier);
    }

    /**
     * Given the 'with' text from a gene_association entry parse for recognised identifier
     * types and create Gene or Protein items accordingly.
     *
     * @param withText string from the gene_association entry
     * @param organism organism to reference
     * @param dataSource the name of goa file source
     * @param dataSourceCode short code to describe data source
     * @throws ObjectStoreException if problem when storing
     * @return a list of Items
     */
    protected List<String> createWithObjects(String withText, Item organism, String dataSource,
            String dataSourceCode) throws ObjectStoreException {

        List<String> withProductList = new ArrayList<String>();
        try {
            String[] elements = withText.split("[; |,]");
            for (int i = 0; i < elements.length; i++) {
                String entry = elements[i].trim();
                // rely on the format being type:identifier
                if (entry.indexOf(':') > 0) {
                    String prefix = entry.substring(0, entry.indexOf(':'));
                    String value = entry.substring(entry.indexOf(':') + 1);

                    if (WITH_TYPES.containsKey(prefix) && StringUtils.isNotEmpty(value)) {
                        String className = WITH_TYPES.get(prefix);
                        String productIdentifier = null;

                        // if a UniProt protein it may be from a different organism
                        // also FlyBase may be from a different Drosophila species
                        if ("UniProt".equals(prefix)) {
                            productIdentifier = newProduct(value, className, organism, dataSource, dataSourceCode,
                                    false, null);
                        } else if ("FB".equals(prefix)) {
                            // if organism is D. melanogaster then create with gene
                            // TODO could still be wrong as the FBgn could be a different species
                            if ("7227".equals(organism.getAttribute("taxonId").getValue())) {
                                productIdentifier = newProduct(value, className, organism, dataSource,
                                        dataSourceCode, true, "primaryIdentifier");
                            }
                        } else {
                            productIdentifier = newProduct(value, className, organism, dataSource, dataSourceCode,
                                    true, "primaryIdentifier");
                        }
                        if (productIdentifier != null) {
                            withProductList.add(productIdentifier);
                        }
                    } else {
                        LOG.debug("createWithObjects skipping a withType prefix:" + prefix);
                    }
                }
            }
        } catch (RuntimeException e) {
            LOG.error("createWithObjects broke with: " + withText);
            throw e;
        }
        return withProductList;
    }

    private String newProduct(String identifier, String type, Item organism, String dataSource,
            String dataSourceCode, boolean createOrganism, String field) throws ObjectStoreException {
        String idField = field;
        String accession = identifier;
        String clsName = null;
        // find gene attribute first to see if organism should be part of key
        if ("gene".equalsIgnoreCase(type)) {
            clsName = "Gene";
            String taxonId = organism.getAttribute("taxonId").getValue();
        } else if ("protein".equalsIgnoreCase(type)) {
            // TODO use values in config
            clsName = "Protein";
            idField = "primaryAccession";
        } else {
            String typeCls = TypeUtil.javaiseClassName(type);

            if (getModel().getClassDescriptorByName(typeCls) != null) {
                Class<?> cls = getModel().getClassDescriptorByName(typeCls).getType();
                if (BioEntity.class.isAssignableFrom(cls)) {
                    clsName = typeCls;
                }
            }
            if (clsName == null) {
                throw new IllegalArgumentException("Unrecognised annotation type '" + type + "'");
            }
        }

        boolean includeOrganism = false;

        String key = makeProductKey(accession, type, organism, includeOrganism);

        if (productMap.containsKey(key)) {
            return productMap.get(key);
        }

        // if a Dmel gene we need to use FlyBaseIdResolver to find a current id
        Item product = createItem(clsName);
        if (organism != null && createOrganism) {
            product.setReference("organism", organism.getIdentifier());
        }
        product.setAttribute(idField, accession);

        String dataSetIdentifier = getDataset(dataSource, dataSourceCode);
        product.addToCollection("dataSets", dataSetIdentifier);

        Integer storedProductId = store(product);
        storedProductIds.put(product.getIdentifier(), storedProductId);
        productMap.put(key, product.getIdentifier());
        return product.getIdentifier();
    }

    private String makeProductKey(String identifier, String type, Item organism, boolean createOrganism) {
        if (type == null) {
            throw new IllegalArgumentException("No type provided when creating " + organism + ": " + identifier);
        } else if (identifier == null) {
            throw new IllegalArgumentException("No identifier provided when creating " + organism + ": " + type);
        }

        return identifier + type.toLowerCase() + ((createOrganism) ? organism.getIdentifier() : "");
    }

    private String resolveTerm(String identifier) {
        String goId = identifier;
        return goId;
    }

    private String newGoTerm(String identifier, String dataSource, String dataSourceCode)
            throws ObjectStoreException {
        if (identifier == null) {
            return null;
        }

        String goTermIdentifier = goTerms.get(identifier);
        if (goTermIdentifier == null) {
            Item item = createItem(termClassName);
            item.setAttribute("identifier", identifier);
            item.addToCollection("dataSets", getDataset(dataSource, dataSourceCode));
            store(item);

            goTermIdentifier = item.getIdentifier();
            goTerms.put(identifier, goTermIdentifier);
        }
        return goTermIdentifier;
    }

    private void storeEvidenceCode(String code, String annotType, String withText) throws ObjectStoreException {
        /*if (evidenceCodes.get(code) == null) {
        Item item = createItem("GOEvidenceCode");
        item.setAttribute("code", code);
        evidenceCodes.put(code, item.getIdentifier());
        store(item);
          }*/
        //, String annotType
        String combinationcode = "";
        if (withText == null || withText.isEmpty()) {
            combinationcode = code + ":" + annotType;
        } else {
            combinationcode = code + ":" + annotType + ":" + withText;
        }

        if (evidenceCodes.get(combinationcode) == null) {
            Item item = createItem("GOEvidenceCode");
            item.setAttribute("code", code);
            if (!annotType.isEmpty()) {
                item.setAttribute("annotType", annotType);
            }
            if (withText != null && !withText.isEmpty()) {
                item.setAttribute("withText", withText);
            }
            store(item);
            evidenceCodes.put(combinationcode, item.getIdentifier());
            System.out.println("Code is :  " + combinationcode);
        }
    }

    private String getDataSourceCodeName(String sourceCode) {
        String title = sourceCode;

        // re-write some codes to better data source names
        if ("UniProtKB".equalsIgnoreCase(sourceCode)) {
            title = "UniProt";
        } else if ("FB".equalsIgnoreCase(sourceCode)) {
            title = "FlyBase";
        } else if ("WB".equalsIgnoreCase(sourceCode)) {
            title = "WormBase";
        } else if ("SP".equalsIgnoreCase(sourceCode)) {
            title = "UniProt";
        } else if (sourceCode.startsWith("GeneDB")) {
            title = "GeneDB";
        } else if ("SANGER".equalsIgnoreCase(sourceCode)) {
            title = "GeneDB";
        } else if ("GOA".equalsIgnoreCase(sourceCode)) {
            title = "Gene Ontology";
        } else if ("PINC".equalsIgnoreCase(sourceCode)) {
            title = "Proteome Inc.";
        } else if ("Pfam".equalsIgnoreCase(sourceCode)) {
            title = "PFAM"; // to merge with interpro
        }
        return title;
    }

    private String getDataset(String dataSource, String code) throws ObjectStoreException {
        String dataSetIdentifier = dataSets.get(code);
        if (dataSetIdentifier == null) {
            String dataSourceName = getDataSourceCodeName(code);
            String title = "GO Annotation from " + dataSourceName;
            Item item = createItem("DataSet");
            item.setAttribute("name", title);
            item.setReference("dataSource", getDataSource(getDataSourceCodeName(dataSource)));
            dataSetIdentifier = item.getIdentifier();
            dataSets.put(code, dataSetIdentifier);
            store(item);
        }
        return dataSetIdentifier;
    }

    /**
     * Return a DataSource item for the given title
     * @param name the DataSource name
     * @return the DataSource Item
     */
    public String getDataSource(String name) {
        if (name == null) {
            return null;
        }
        String refId = dataSources.get(name);
        if (refId == null) {
            Item dataSource = createItem("DataSource");
            dataSource.setAttribute("name", name);
            try {
                store(dataSource);
            } catch (ObjectStoreException e) {
                throw new RuntimeException("failed to store DataSource with name: " + name, e);
            }
            refId = dataSource.getIdentifier();
            dataSources.put(name, refId);
        }
        return refId;
    }

    private String newPublication(String pubMedId, String sgdId) throws ObjectStoreException {

        String pubRefId = null;

        if (pubMedId != null && sgdId != null) { //lists both PMID and SGD_REF; it is assumed there will be one publication per line
            pubRefId = publications.get(pubMedId);
            if (pubRefId == null) {
                Item item = createItem("Publication");
                item.setAttribute("pubMedId", pubMedId);
                item.setAttribute("pubXrefId", sgdId);
                pubRefId = item.getIdentifier();
                publications.put(pubMedId, pubRefId);
                store(item);
            }
        } else if (pubMedId == null && sgdId != null) { //lists only GO_REF
            pubRefId = publications.get(sgdId);
            if (pubRefId == null) {
                Item item = createItem("Publication");
                item.setAttribute("pubXrefId", sgdId);
                pubRefId = item.getIdentifier();
                publications.put(sgdId, pubRefId);
                store(item);
            }
        } else if (pubMedId != null && sgdId == null) { //lists only PMID
            pubRefId = publications.get(pubMedId);
            if (pubRefId == null) {
                Item item = createItem("Publication");
                item.setAttribute("pubMedId", pubMedId);
                pubRefId = item.getIdentifier();
                publications.put(pubMedId, pubRefId);
                store(item);
            }
        }

        return pubRefId;
    }

    private String createDbReference(String value) throws ObjectStoreException {
        if (StringUtils.isEmpty(value)) {
            return null;
        }
        String dataSource = null;
        if (!dbRefs.contains(value)) {
            Item item = createItem("DatabaseReference");
            // FB:FBrf0055969
            if (value.contains(":")) {
                String[] bits = value.split(":");
                if (bits.length == 2) {
                    String db = bits[0];
                    dataSource = getDataSourceCodeName(db);
                    value = bits[1];
                }
            }
            item.setAttribute("identifier", value);
            if (StringUtils.isNotEmpty(dataSource)) {
                item.setReference("source", getDataSource(dataSource));
            }
            dbRefs.add(value);
            store(item);
            return item.getIdentifier();
        }
        return null;
    }

    private Item newOrganism(String taxonId) throws ObjectStoreException {
        Item item = organisms.get(taxonId);
        if (item == null) {
            item = createItem("Organism");
            item.setAttribute("taxonId", taxonId);
            organisms.put(taxonId, item);
            store(item);
        }
        return item;
    }

    private String parseTaxonId(String input) {
        if ("taxon:".equals(input)) {
            throw new IllegalArgumentException("Invalid taxon id read: " + input);
        }
        String taxonId = input.split(":")[1];
        if (taxonId.contains("|")) {
            taxonId = taxonId.split("\\|")[0];
        }
        return taxonId;
    }

    private class Evidence {
        private List<String> publicationRefIds = new ArrayList<String>();
        private String evidenceCode = null;
        private Integer storedAnnotationId = null;
        private String withText = null;
        private Item organism = null;
        private String dataSourceCode = null;
        private String dataSource = null;

        protected Evidence(String evidenceCode, String publicationRefId, String withText, Item organism,
                String dataset, String datasource) {
            this.evidenceCode = evidenceCode;
            this.withText = withText;
            this.organism = organism;
            this.dataSourceCode = dataset;
            this.dataSource = datasource;
            addPublicationRefId(publicationRefId);
        }

        protected void addPublicationRefId(String publicationRefId) {
            if (publicationRefId != null) {
                publicationRefIds.add(publicationRefId);
            }
        }

        protected List<String> getPublications() {

            return publicationRefIds;
        }

        protected String getEvidenceCode() {
            return evidenceCode;
        }

        @SuppressWarnings("unused")
        protected String getWithText() {
            return withText;
        }

        @SuppressWarnings("unused")
        protected String getDataset() {
            return dataSourceCode;
        }

        @SuppressWarnings("unused")
        protected String getDatasource() {
            return dataSource;
        }

        @SuppressWarnings("unused")
        protected Item getOrganism() {
            return organism;
        }

        /**
         * @return the storedAnnotationId
         */
        protected Integer getStoredAnnotationId() {
            return storedAnnotationId;
        }

        /**
         * @param storedAnnotationId the storedAnnotationId to set
         */
        protected void setStoredAnnotationId(Integer storedAnnotationId) {
            this.storedAnnotationId = storedAnnotationId;
        }
    }

    /**
     * Identify a GoTerm/geneProduct pair with qualifier
     * used to also use evidence code
     */
    private class GoTermToGene {
        private String productId;
        private String goId;
        private String qualifier;
        private String withText;
        private String annotationExtension;
        private String pub;
        private String pubxref;

        /**
         * Constructor
         *
         * @param productId gene/protein identifier
         * @param goId      GO term id
         * @param qualifier qualifier
         */
        GoTermToGene(String productId, String goId, String qualifier, String withText, String annotationExtension,
                String pub, String pubxref) {
            this.productId = productId;
            this.goId = goId;
            this.qualifier = qualifier;
            this.withText = withText;
            this.annotationExtension = annotationExtension;
            this.pub = pub;
            this.pubxref = pubxref;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public boolean equals(Object o) {
            if (o instanceof GoTermToGene) {
                GoTermToGene go = (GoTermToGene) o;
                if (annotationExtension == null) {
                    return productId.equals(go.productId) && goId.equals(go.goId) && qualifier.equals(go.qualifier)
                            && withText.equals(go.withText) && pub.equals(go.pub) && pubxref.equals(go.pubxref);
                } else {
                    System.out.println("in GoTermToGene...." + annotationExtension);
                    return productId.equals(go.productId) && goId.equals(go.goId) && qualifier.equals(go.qualifier)
                            && withText.equals(go.withText) && pub.equals(go.pub) && pubxref.equals(go.pubxref)
                            && annotationExtension.equals(go.annotationExtension);
                }
            }
            return false;
        }

    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String getDataSetTitle(int taxonId) {
        return DATASET_TITLE;
    }

}