ubic.pubmedgate.resolve.focusedAnalysis.CreateNIFBulkDump.java Source code

Java tutorial

Introduction

Here is the source code for ubic.pubmedgate.resolve.focusedAnalysis.CreateNIFBulkDump.java

Source

/*
 * The WhiteText project
 * 
 * Copyright (c) 2012 University of British Columbia
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package ubic.pubmedgate.resolve.focusedAnalysis;

import java.io.File;
import java.io.FileInputStream;
import java.util.Set;

import org.apache.commons.lang.time.StopWatch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import ubic.BAMSandAllen.JenaUtil;
import ubic.BAMSandAllen.Util;
import ubic.basecode.util.FileTools;
import ubic.pubmedgate.Config;
import ubic.pubmedgate.resolve.EvaluationRDFModel;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Resource;

public class CreateNIFBulkDump {
    protected static Log log = LogFactory.getLog(CreateNIFBulkDump.class);

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {

        // training or unsupervised
        boolean allComp = true;
        // if training then check for accepted
        Model modelLoad = ModelFactory.createDefaultModel();
        String fileProperty;

        if (allComp) {
            fileProperty = "resolve.Lexicon.resolution.RDF.allComp";
            modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);
        }

        fileProperty = "resolve.Lexicon.resolution.RDF";
        modelLoad.read(new FileInputStream(Config.config.getString(fileProperty)), null);

        boolean reason = true;
        EvaluationRDFModel model = new EvaluationRDFModel(modelLoad, reason);

        int count = 0;
        int rejected = 0;
        String result = "pmid,mention,concept";
        log.info("total number of pmids:" + model.getPMIDs().size());
        Set<Resource> NIFSTDConcepts = model.getNIFSTDConcepts();
        Set<Resource> allTerms = model.getTerms();
        int passedPMIDs = 0;
        StopWatch s = new StopWatch();
        s.start();
        Set<Resource> PMIDs = model.getPMIDs();
        for (Resource pmid : PMIDs) {

            log.info("Time per PMID:" + (s.getTime() / ++passedPMIDs) + " processed:" + passedPMIDs + " of "
                    + PMIDs.size());

            String pmidID = pmid.getURI().substring(pmid.getURI().lastIndexOf(':') + 1);
            log.info(pmidID);
            // PMID -> Mention -> Concept
            Set<Resource> mentions = model.getMentionsInPMID(pmid);
            for (Resource mention : mentions) {
                Set<Resource> terms = model.getLinkedResources(mention);
                // it's all resources before it's filtered
                terms.retainAll(allTerms);

                String mentionString = JenaUtil.getLabel(mention);
                for (Resource term : terms) {
                    Set<Resource> concepts = model.getConceptsFromTerms(term);
                    String termString = JenaUtil.getLabel(term);

                    concepts.retainAll(NIFSTDConcepts);

                    for (Resource concept : concepts) {
                        if (!model.rejected(mention, concept)) {
                            String conceptLabel = JenaUtil.getLabel(concept);
                            String conceptURI = "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-GrossAnatomy.owl#"
                                    + concept.getLocalName();
                            log.info("  " + mentionString + "->" + termString + "->" + conceptLabel + "  "
                                    + conceptURI);
                            // write mention, concept and pubmed abstract
                            result += "\r\n" + pmidID + ",\"" + mentionString + "\"," + conceptURI;
                            count++;
                        } else {
                            rejected++;
                        }
                    }
                }
            }
        }
        log.info("Number printed:" + count);
        log.info("Number rejected:" + rejected);
        FileTools.stringToFile(result,
                new File(Config.config.getString("whitetext.resolve.results.folder") + "forNIFAll.txt"));

    }
}