ubic.gemma.core.loader.protein.StringProteinProteinInteractionConverter.java Source code

Java tutorial

Introduction

Here is the source code for ubic.gemma.core.loader.protein.StringProteinProteinInteractionConverter.java

Source

/*
 * The Gemma project
 *
 * Copyright (c) 2010 University of British Columbia
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package ubic.gemma.core.loader.protein;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import ubic.gemma.core.loader.protein.biomart.model.Ensembl2NcbiValueObject;
import ubic.gemma.core.loader.protein.string.model.StringProteinProteinInteraction;
import ubic.gemma.core.loader.util.converter.Converter;
import ubic.gemma.model.association.Gene2GeneProteinAssociation;
import ubic.gemma.model.common.description.DatabaseEntry;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.persistence.util.Settings;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * Class that is responsible for converting value objects generated from the parsing of STRING files
 * (StringProteinProteinInteraction) into Gemma Gene2GeneProteinAssociations. To do that it refers to a map ensembl2ncbi
 * ids
 *
 * @author ldonnison
 */
@SuppressWarnings({ "WeakerAccess", "unused" }) // Possible external use
@Deprecated
public class StringProteinProteinInteractionConverter implements Converter<Object, Object> {

    private static final Log log = LogFactory.getLog(StringProteinProteinInteractionConverter.class);

    /**
     * The joining string between two protein ids to create the url link in string for the interaction
     */
    private static final String PROTEIN_2_PROTEIN_LINK = "%0D";

    /**
     * String url
     **/
    private static String stringUrl;

    /**
     * Version of string being used
     */
    private static String stringVersion;

    private AtomicBoolean producerDone = new AtomicBoolean(false);

    /**
     * The key is the ensembl protein id.
     */
    private Map<String, Ensembl2NcbiValueObject> ensembl2ncbi;

    /**
     * Reference to external database as held in gemma system
     */
    private ExternalDatabase stringExternalDatabase;

    /**
     * @param ensembl2ncbi Map of ensembl peptide ids to entrez/ncbi id genes.
     */
    public StringProteinProteinInteractionConverter(Map<String, Ensembl2NcbiValueObject> ensembl2ncbi) {
        this.ensembl2ncbi = ensembl2ncbi;

        StringProteinProteinInteractionConverter.stringVersion = Settings.getString("protein.string.version");
        StringProteinProteinInteractionConverter.stringUrl = Settings.getString("protein.string.linksurl");
        if (StringProteinProteinInteractionConverter.stringUrl == null
                || StringProteinProteinInteractionConverter.stringUrl.length() == 0)
            throw new RuntimeException(new ConfigurationException("stringUrl was null or empty"));
        if (StringProteinProteinInteractionConverter.stringVersion == null
                || StringProteinProteinInteractionConverter.stringVersion.length() == 0)
            throw new RuntimeException(new ConfigurationException("stringVersion was null or empty"));
    }

    /**
     * Threaded conversion of domain objects to Gemma objects.
     *
     * @param gene2GeneProteinAssociationQueue queue
     * @param stringProteinProteinInteractions interactions
     */
    public void convert(final BlockingQueue<Gene2GeneProteinAssociation> gene2GeneProteinAssociationQueue,
            final Collection<StringProteinProteinInteraction> stringProteinProteinInteractions) {
        // start up thread to convert a member of geneInfoQueue to a gene/geneproduct/databaseentry
        // then push the gene onto the geneQueue for loading
        Thread convertThread = new Thread(new Runnable() {
            @Override
            public void run() {

                try {
                    for (StringProteinProteinInteraction stringProteinProteinInteraction : stringProteinProteinInteractions) {
                        if (stringProteinProteinInteraction == null) {
                            continue;
                        }
                        // converter
                        Collection<Gene2GeneProteinAssociation> dataColl = StringProteinProteinInteractionConverter.this
                                .convert(stringProteinProteinInteraction);
                        // this returns a collection so split out and put on queue
                        for (Gene2GeneProteinAssociation gene2GeneProteinAssociation : dataColl) {
                            gene2GeneProteinAssociationQueue.put(gene2GeneProteinAssociation);
                        }
                    }
                } catch (InterruptedException e) {
                    StringProteinProteinInteractionConverter.log.info("Interrupted.");
                }
                producerDone.set(true);
            }

        }, "Converter");

        convertThread.start();
    }

    @Override
    public Collection<Object> convert(Collection<?> sourceDomainObjects) {
        long startTime = System.currentTimeMillis();
        Collection<Object> results = new HashSet<>();
        for (Object object : sourceDomainObjects) {
            results.add(this.convert(object));
        }
        long EndTime = System.currentTimeMillis();
        long time = (EndTime - startTime) / 1000;
        StringProteinProteinInteractionConverter.log.info("Time taken for conversion call is  " + time);
        return results;
    }

    /**
     * Standard converter code
     *
     * @see ubic.gemma.core.loader.util.converter.Converter#convert(java.lang.Object)
     */
    @Override
    public Object convert(Object sourceDomainObject) {

        Object processedObject;
        if (sourceDomainObject instanceof Collection) {
            processedObject = this.convert((Collection<?>) sourceDomainObject);
        } else if (sourceDomainObject instanceof StringProteinProteinInteraction) {
            StringProteinProteinInteraction stringProteinProteinInteraction = (StringProteinProteinInteraction) sourceDomainObject;
            processedObject = this.convert(stringProteinProteinInteraction);
        } else {
            throw new RuntimeException("Incorrect domain object passed");
        }

        return processedObject;

    }

    /**
     * Given a StringProteinProteinInteraction value object create a gemma Gene2GeneProteinAssociation. One
     * StringProteinProteinInteraction can potentially create many Gene2GeneProteinAssociation objects If the call to
     * getNcbiGene returns more than 1 gene then each gene returned is turned into an interaction. Which means that the
     * same ensemble protein protein id interaction could be duplicated as many times as there is gene mappings. This is
     * done for both protein 1 and protein2 so a matrix is formed.
     *
     * @param sourceDomainObject the domain object to process
     * @return collection of Gene2GeneProteinAssociation representing this interaction
     */
    public Collection<Gene2GeneProteinAssociation> convert(StringProteinProteinInteraction sourceDomainObject) {

        Collection<Gene2GeneProteinAssociation> gene2GeneProteinAssociations = new ArrayList<>();

        // if(sourceDomainObject instanceof StringProteinProteinInteraction){

        // have to create a matrix of interactions take the ensemble id and see how many ncbi ids it maps to
        Collection<Gene> genesForProteinOne = this.getNcbiGene(sourceDomainObject.getProtein1());
        Collection<Gene> genesForProteinTwo = this.getNcbiGene(sourceDomainObject.getProtein2());

        // empty if no mapping found
        if (genesForProteinOne.isEmpty()) {
            StringProteinProteinInteractionConverter.log
                    .warn("No ncbi gene mapping for protein 1: " + sourceDomainObject.getProtein1());
        } else if (genesForProteinTwo.isEmpty()) {
            StringProteinProteinInteractionConverter.log
                    .warn("No ncbi gene mapping for protein 2: " + sourceDomainObject.getProtein2());
        } else {
            // create the one to many mapping from ensembl to ncbi/entrez
            for (Gene geneProtein1 : genesForProteinOne) {
                for (Gene geneProtein2 : genesForProteinTwo) {
                    Gene2GeneProteinAssociation gene2GeneProteinAssociation = Gene2GeneProteinAssociation.Factory
                            .newInstance(geneProtein1, geneProtein2, this.getDataBaseEntry(sourceDomainObject),
                                    sourceDomainObject.getEvidenceVector(), sourceDomainObject.getCombined_score());

                    gene2GeneProteinAssociations.add(gene2GeneProteinAssociation);
                }
            }
        }
        return gene2GeneProteinAssociations;
    }

    /**
     * Create a database entry which represents the external record as held in string
     *
     * @param stringProteinProteinInteractionId object which contains the two protein ids
     * @return DatabaseEntry representing the record as held in string
     */
    public DatabaseEntry getDataBaseEntry(StringProteinProteinInteraction stringProteinProteinInteractionId) {
        String proteinProteinInteraction = this.getProteinProteinInteractionId(stringProteinProteinInteractionId);
        return DatabaseEntry.Factory.newInstance(proteinProteinInteraction,
                StringProteinProteinInteractionConverter.stringVersion,
                StringProteinProteinInteractionConverter.stringUrl, stringExternalDatabase);
    }

    /**
     * One ensemblProteinID can map to multiple ncbi genes. This method takes the ensembl gene and creates a collection
     * of entrez ncbi genes. It first has to remove the taxon id from the beginning of the peptide id as given by
     * string.
     *
     * @param ensemblProteinId The ensembl protein id in this interaction
     * @return Collection of genes as represented in ncbi entrez gene
     */
    public Collection<Gene> getNcbiGene(String ensemblProteinId) {
        // log.debug("getting ncbi gene for ensembl id " + ensemblProteinId);
        Collection<Gene> genes = new ArrayList<>();

        // in case species id is still on there from STRING like 12334.ENSD....
        String eid = ensemblProteinId.replaceFirst("[0-9]+\\.", "");

        Ensembl2NcbiValueObject e2n = ensembl2ncbi.get(eid);
        if (e2n == null || e2n.getEntrezgenes().isEmpty()) {
            return genes;
        }

        String ensemblGeneId = e2n.getEnsemblGeneId();

        Collection<String> entrezGeneIds = (e2n.getEntrezgenes());
        for (String entrezGeneId : entrezGeneIds) {
            if (!entrezGeneId.isEmpty()) {
                Gene gene = Gene.Factory.newInstance();
                gene.setNcbiGeneId(Integer.parseInt(entrezGeneId));
                gene.setEnsemblId(ensemblGeneId);
                genes.add(gene);
                if (StringProteinProteinInteractionConverter.log.isDebugEnabled())
                    StringProteinProteinInteractionConverter.log
                            .debug("Entry found for entrezGeneId " + entrezGeneId);
            }
        }

        return genes;
    }

    /**
     * This is a made up value for the accessionId which is the protein peptide id 1 and the protein peptide 2 combined
     * and separated by a percentage This is so that it can be sent as a whole to string to retrieve the record in
     * string
     *
     * @param stringProteinProteinInteraction string protein interaction
     * @return Combined protein 1 and protein 2 ids representing an identifier for this protein interaction
     */
    public String getProteinProteinInteractionId(StringProteinProteinInteraction stringProteinProteinInteraction) {
        return stringProteinProteinInteraction.getProtein1()
                .concat(StringProteinProteinInteractionConverter.PROTEIN_2_PROTEIN_LINK)
                .concat(stringProteinProteinInteraction.getProtein2());
    }

    /**
     * @return the stringExternalDatabase
     */
    public ExternalDatabase getStringExternalDatabase() {
        return stringExternalDatabase;
    }

    public void setStringExternalDatabase(ExternalDatabase externalDatabase) {
        this.stringExternalDatabase = externalDatabase;
    }

    public boolean isProducerDone() {
        return this.producerDone.get();
    }

    /**
     * Set the map of ids
     *
     * @param bioMartStringEntrezGeneMapping the map
     */
    public void setEnsemblEntrezMap(Map<String, Ensembl2NcbiValueObject> bioMartStringEntrezGeneMapping) {
        this.ensembl2ncbi = bioMartStringEntrezGeneMapping;
    }

    public void setProducerDoneFlag(AtomicBoolean flag) {
        this.producerDone = flag;
    }

}