com.act.biointerpretation.metadata.ProteinMetadataComparator.java Source code

Java tutorial

Introduction

Here is the source code for com.act.biointerpretation.metadata.ProteinMetadataComparator.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.biointerpretation.metadata;

import act.server.NoSQLAPI;
import act.shared.Reaction;
import org.apache.commons.lang3.tuple.Pair;
import org.json.JSONObject;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ProteinMetadataComparator implements Comparator {

    //The ranking is contextualized on a host
    private Host host;
    //The ranking is contextualized on a location within that host
    private Localization localization;

    public ProteinMetadataComparator(Host host, Localization localization) {
        this.host = host;
        this.localization = localization;
    }

    public Host getHost() {
        return host;
    }

    public void setHost(Host host) {
        this.host = host;
    }

    public Localization getLocalization() {
        return localization;
    }

    public void setLocalization(Localization localization) {
        this.localization = localization;
    }

    @Override
    public int compare(Object o1, Object o2) {
        ProteinMetadata p1 = (ProteinMetadata) o1;
        ProteinMetadata p2 = (ProteinMetadata) o2;

        int score1 = score(p1);
        int score2 = score(p2);

        return score2 - score1;
    }

    public int score(ProteinMetadata pmd) {
        double out = 0.0;

        // Score enzyme efficiency, will result in picking the highest values as 
        // the dominant consideration, biased on kcatkm
        if (pmd.kcatkm != null) {
            out += (Math.log(pmd.kcatkm)) * 20;
        }

        if (pmd.specificActivity != null) {
            out += (Math.log(pmd.specificActivity)) * 20;
        }

        //Score modifications
        if (pmd.modifications == null) {
            //No prediction no change
        } else if (pmd.modifications == true) {
            out += -50;
        } else {
            out += 30;
        }

        //Score subunits
        if (pmd.heteroSubunits == null) {
            //No prediction no change
        } else if (pmd.heteroSubunits == true) {
            //If needs multiple subunits, this is potentially problematic
            out += -10;
        } else {
            //Great if there is a clear indication that there are no subunits
            out += 30;
        }

        //Score cloned
        if (pmd.cloned != null) {
            Integer cloned = pmd.cloned.get(host);
            if (cloned == null) {
                //No prediction no change
            } else {
                //Will be positive or negative, scales with organism similarity up to 140 (or -140)
                out += cloned * 20;
            }
        }

        //Score localization
        if (pmd.localization != null) {
            Localization prediction = pmd.localization.get(host);
            if (prediction == Localization.unknown) {
                //No prediction no change
            } else if (prediction != Localization.questionable) {
                out += -20; //Small penalty for ambiguity about the location
            } else if (prediction != localization) {
                out += -30; //larger penalty if the prediction is not where you want it
            } else if (prediction == localization) {
                out += 20; //Otherwise a small bonus if things match up
            } else {
                System.err.println("This should never happen - localization");
            }
        }

        double round = Math.round(out);
        return (int) round;
    }

    public static void main(String[] args) throws Exception {
        // THIS main appears to be a debugging call. Does not seem to be used anywhere
        String INDB = "SHOULD_COME_FROM_CMDLINE"; // "jarvis_2016-12-09";
        String OUTDB = "SHOULD_COME_FROM_CMDLINE"; // was collection reactions

        createProteinMetadataTable(INDB, OUTDB);
    }

    public static Map<Long, List<Pair<ProteinMetadata, Integer>>> createProteinMetadataTable(String sourceDB,
            String destDB) throws Exception {
        ProteinMetadataComparator comp = new ProteinMetadataComparator(Host.Ecoli, Localization.cytoplasm);

        NoSQLAPI api = new NoSQLAPI(sourceDB, destDB);
        Iterator<Reaction> iterator = api.readRxnsFromInKnowledgeGraph();

        //Create a single instance of the factory method to use for all json
        ProteinMetadataFactory factory = ProteinMetadataFactory.initiate();

        //Create a list to aggregate the results of the database scan
        List<ProteinMetadata> agg = new ArrayList<>();

        //Scan the database and store ProteinMetadata objects
        while (iterator.hasNext()) {
            Reaction rxn = iterator.next();

            Reaction.RxnDataSource source = rxn.getDataSource();
            if (!source.equals(Reaction.RxnDataSource.BRENDA)) {
                continue;
            }

            Set<JSONObject> jsons = rxn.getProteinData();

            for (JSONObject json : jsons) {
                ProteinMetadata meta = factory.create(json);

                Long rxnId;
                if (rxn.getUUID() < 0) {
                    rxnId = (long) Reaction.reverseID(rxn.getUUID());
                } else {
                    rxnId = (long) rxn.getUUID();
                }

                meta.setReactionId(rxnId);
                agg.add(meta);
            }
        }

        System.out.println("All Metadata's parsed: " + agg.size());

        //For each protein metadata, gather up ones that have a non-zero score into a new list
        List<ProteinMetadata> agg2 = new ArrayList<>();
        Map<Long, List<Pair<ProteinMetadata, Integer>>> reactionIdToScore = new HashMap<>();
        for (ProteinMetadata pmd : agg) {
            //Consider if it is invalid (meaning a really crappy enzyme) and if so ignore it
            if (!pmd.isValid(Host.Ecoli)) {
                continue;
            }

            //Score the protein
            int score = comp.score(pmd);
            if (!reactionIdToScore.containsKey(pmd.reactionId)) {
                reactionIdToScore.put(pmd.reactionId, new ArrayList<>());
            }

            reactionIdToScore.get(pmd.reactionId).add(Pair.of(pmd, score));
            if (score > 0) {
                agg2.add(pmd);
            }
        }
        return reactionIdToScore;
    }
}