ubic.pubmedgate.resolve.focusedAnalysis.PickNeuroLexRegions.java Source code

Java tutorial

Introduction

Here is the source code for ubic.pubmedgate.resolve.focusedAnalysis.PickNeuroLexRegions.java

Source

/*
 * The WhiteText project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.pubmedgate.resolve.focusedAnalysis;

import java.io.File;
import java.io.FileInputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.BAMSandAllen.JenaUtil;
import ubic.BAMSandAllen.Util;
import ubic.BAMSandAllen.Vocabulary;
import ubic.basecode.dataStructure.params.ParamKeeper;
import ubic.basecode.util.FileTools;
import ubic.pubmedgate.Config;
import ubic.pubmedgate.resolve.EvaluationRDFModel;
import ubic.pubmedgate.resolve.SpeciesCounter;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Resource;

public class PickNeuroLexRegions {
    // runs on sever
    protected static Log log = LogFactory.getLog(PickNeuroLexRegions.class);

    public static void printMentionCountForPaper(Set<Resource> mentions, boolean training) throws Exception {
        Model modelLoad = ModelFactory.createDefaultModel();
        String fileProperty;

        if (training) {
            fileProperty = "resolve.Lexicon.resolution.RDF";
            modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        } else {
            fileProperty = "resolve.Lexicon.resolution.RDF.allComp";
            modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        }
        log.info(modelLoad.size());

        Set<Resource> newMentions = new HashSet<Resource>();
        for (Resource mention : mentions) {
            newMentions.add(modelLoad.createResource(mention.getURI()));
        }
        mentions = newMentions;

        boolean reason = true;
        EvaluationRDFModel model = new EvaluationRDFModel(modelLoad, reason);
        log.info("mentions:" + mentions.size());
        log.info("Mention freq:" + model.sumMentionFrequencies(mentions));
        log.info("Mention abstract freq:" + model.sumAbstractFrequencies(mentions));
    }

    public static void countSizes() throws Exception {
        Model modelLoad = ModelFactory.createDefaultModel();
        String fileProperty;

        fileProperty = "resolve.Lexicon.resolution.RDF.allComp";
        modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        boolean reason = true;
        EvaluationRDFModel model = new EvaluationRDFModel(modelLoad, reason);
        model.getStats();

    }

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {
        // has human species
        // has spec to gen match to NIFSTD
        // no rejections
        // sort by occurances
        // get PMID references

        // common.add( "species:ncbi:9606" ); // human

        Model modelLoad = ModelFactory.createDefaultModel();
        String fileProperty;
        fileProperty = "resolve.Lexicon.resolution.RDF.allComp";
        modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        log.info(modelLoad.size());
        fileProperty = "resolve.Lexicon.resolution.RDF";
        modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        log.info(modelLoad.size());

        boolean reason = true;
        EvaluationRDFModel model = new EvaluationRDFModel(modelLoad, reason);
        Set<Resource> humanMentions = model.getMentionsForSpecies("9606");
        Set<Resource> commonSpeciesMentions = new HashSet<Resource>();
        Set<String> commonSpecies = SpeciesCounter.getCommonSpeciesLinStrings();
        Set<Resource> commonSpeciesPMIDs = new HashSet<Resource>();

        for (String common : commonSpecies) {
            common = common.substring(common.lastIndexOf(":") + 1);
            Set<Resource> mentionsForSpecies = model.getMentionsForSpecies(common);
            commonSpeciesMentions.addAll(mentionsForSpecies);

            commonSpeciesPMIDs.addAll(model.getPMIDsForSpecies(common));

        }

        log.info("Common mentions:" + commonSpeciesMentions.size());
        commonSpeciesMentions.retainAll(model.getMatchedMentions());

        log.info("Common pmids:" + commonSpeciesPMIDs.size());

        log.info("Matched common mentions:" + commonSpeciesMentions.size());

        log.info("Human mentions:" + humanMentions.size());
        Set<Resource> BIRNConcepts = model.getNIFSTDConcepts();
        Set<Resource> NNConcepts = model.getNNConcepts();
        humanMentions.retainAll(model.getMatchedMentions());

        log.info("Matched Human mentions:" + humanMentions.size());

        Set<Resource> finalRegionMentions = new HashSet<Resource>();
        int occuranceThreshold = 0;
        int count = 0;
        int nnLinks = 0;
        int birnConcepts = 0;
        ParamKeeper keeper = new ParamKeeper();

        ParamKeeper finalKeeper = new ParamKeeper();

        for (Resource commonMention : commonSpeciesMentions) {
            Set<Resource> concepts = model.getMentionEvaluations(commonMention);
            // there's going to be two of these properties - get the larger? should have number of unseen occurances
            int occurances = commonMention.getProperty(Vocabulary.number_of_occurances).getInt();
            // int abstractCount = commonMention.getProperty( Vocabulary.number_of_abstracts ).getInt();
            Set<Resource> pmids = model.getPMIDs(commonMention);
            Set<Resource> commonPmids = (Set<Resource>) Util.intersect(commonSpeciesPMIDs, pmids);
            // filter pmids for common species
            // if the pmid has link to one of common

            // StmtIterator statements = neuromention.listProperties( Vocabulary.in_PMID );
            // Resource taxon = model.createResource( "http://bio2rdf.org/taxon:" + species );
            // ResIterator resIt = model.listResourcesWithProperty( Vocabulary.mentions_species, taxon );

            int abstractCount = pmids.size();
            int commonAbstractCount = commonPmids.size();

            for (Resource concept : concepts) {
                if (BIRNConcepts.contains(concept)) {
                    birnConcepts++;
                    // how do I know it's spec to gen? -need to create automatic evaluations!
                    if (model.specToGen(commonMention, concept) && commonAbstractCount > 1) {
                        Map<String, String> params = new HashMap<String, String>();
                        params.put("mention URI", commonMention.getURI());
                        String mentionLabel = JenaUtil.getLabel(commonMention);
                        mentionLabel = mentionLabel.substring(0, 1).toUpperCase() + mentionLabel.substring(1);
                        params.put("mention label", mentionLabel);
                        params.put("parent concept URI", concept.getURI());
                        params.put("parent concept label", "\"" + JenaUtil.getLabel(concept) + "\"");
                        params.put("abstract count", abstractCount + "");
                        params.put("common abstract count", commonAbstractCount + "");

                        params.put("PubMed link",
                                "HYPERLINK(\"" + model.getNCBIPMIDLink(commonPmids) + "\",\"PubMed Link\")");

                        count++;
                        finalRegionMentions.add(commonMention);

                        log.info(abstractCount + " " + commonAbstractCount + " " + JenaUtil.getLabel(commonMention)
                                + " < " + JenaUtil.getLabel(concept));

                        // TODO get birnlex IDs? - are they exact?
                        if (Util.intersectSize(NNConcepts, concepts) > 0) {
                            nnLinks++;
                            params.put("hasNNlink", "true");
                        } else {
                            params.put("hasNNlink", "false");
                        }

                        keeper.addParamInstance(params);

                        Map<String, String> finalParams = new HashMap<String, String>();
                        finalParams.put("Label", mentionLabel);
                        finalParams.put("Is part of", JenaUtil.getLabel(concept));
                        finalParams.put("Has role", "Brain Subdivisions based on automated term selection");
                        finalParams.put("Id", "");
                        String pmidComaString = "\"" + model.getComaSepPMIDs(commonPmids, 140) + "\"";
                        finalParams.put("PMID", pmidComaString);
                        finalKeeper.addParamInstance(finalParams);
                    }
                }
            }
        }
        log.info("Number printed:" + count);
        log.info("Number in set:" + finalRegionMentions.size());
        log.info("Has NN links:" + nnLinks);
        log.info("Birn concepts:" + birnConcepts);

        keeper.writeExcel(Config.config.getString("whitetext.resolve.results.folder") + "forNeuroLex.xls");
        finalKeeper
                .writeExcel(Config.config.getString("whitetext.resolve.results.folder") + "forNeuroLexFinal.xls");

        // printMentionCountForPaper( finalRegionMentions, true );
        // printMentionCountForPaper( finalRegionMentions, false );
    }
}