edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedConceptRetrievalExecutor.java Source code

Java tutorial

Introduction

Here is the source code for edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedConceptRetrievalExecutor.java

Source

/*
 * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations
 * under the License.
 */

package edu.cmu.lti.oaqa.bioasq.concept.retrieval;

import edu.cmu.lti.oaqa.baseqa.providers.query.BagOfPhraseQueryStringConstructor;
import edu.cmu.lti.oaqa.baseqa.providers.query.QueryStringConstructor;
import edu.cmu.lti.oaqa.baseqa.util.UimaContextHelper;
import edu.cmu.lti.oaqa.bio.bioasq.services.GoPubMedService;
import edu.cmu.lti.oaqa.bioasq.util.BioASQUtil;
import edu.cmu.lti.oaqa.type.retrieval.AbstractQuery;
import edu.cmu.lti.oaqa.type.retrieval.ConceptSearchResult;
import edu.cmu.lti.oaqa.util.TypeUtil;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import static java.util.stream.Collectors.groupingBy;

/**
 * A {@link JCasAnnotator_ImplBase} that performs search using a query string, transformed from a
 * {@link AbstractQuery} by a {@link QueryStringConstructor} to retrieve relevant
 * {@link ConceptSearchResult}s from {@link GoPubMedService}.
 *
 * @see edu.cmu.lti.oaqa.baseqa.concept.retrieval.LuceneConceptRetrievalExecutor
 * @see GoPubMedSeparateConceptRetrievalExecutor
 *
 * @author <a href="mailto:ziy@cs.cmu.edu">Zi Yang</a> created on 11/3/14
 */
public class GoPubMedConceptRetrievalExecutor extends JCasAnnotator_ImplBase {

    private GoPubMedService service;

    private int pages;

    private int hits;

    private QueryStringConstructor bopQueryStringConstructor;

    private long timeout;

    private int limit;

    private static final Logger LOG = LoggerFactory.getLogger(GoPubMedConceptRetrievalExecutor.class);

    @Override
    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        String conf = UimaContextHelper.getConfigParameterStringValue(context, "conf");
        PropertiesConfiguration gopubmedProperties = new PropertiesConfiguration();
        try {
            gopubmedProperties.load(getClass().getResourceAsStream(conf));
        } catch (ConfigurationException e) {
            throw new ResourceInitializationException(e);
        }
        service = new GoPubMedService(gopubmedProperties);
        pages = UimaContextHelper.getConfigParameterIntValue(context, "pages", 1);
        hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100);
        bopQueryStringConstructor = new BagOfPhraseQueryStringConstructor();
        timeout = UimaContextHelper.getConfigParameterIntValue(context, "timeout", 4);
        limit = UimaContextHelper.getConfigParameterIntValue(context, "limit", Integer.MAX_VALUE);
    }

    @Override
    public void process(JCas jcas) throws AnalysisEngineProcessException {
        AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).stream().findFirst().get();
        String queryString = bopQueryStringConstructor.construct(aquery).replaceAll("[^A-Za-z0-9_\\-\"]+", " ");
        LOG.info("Query string: {}", queryString);
        List<ConceptSearchResult> concepts = Collections.synchronizedList(new ArrayList<>());
        ExecutorService es = Executors.newCachedThreadPool();
        for (BioASQUtil.Ontology ontology : BioASQUtil.Ontology.values()) {
            es.execute(() -> {
                try {
                    concepts.addAll(BioASQUtil.searchOntology(service, jcas, queryString, pages, hits, ontology));
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            });
        }
        es.shutdown();
        try {
            if (!es.awaitTermination(timeout, TimeUnit.MINUTES)) {
                LOG.warn("Timeout occurs for one or some concept retrieval services.");
            }
        } catch (InterruptedException e) {
            throw new AnalysisEngineProcessException(e);
        }
        Map<String, List<ConceptSearchResult>> onto2concepts = concepts.stream()
                .collect(groupingBy(ConceptSearchResult::getSearchId));
        for (Map.Entry<String, List<ConceptSearchResult>> entry : onto2concepts.entrySet()) {
            List<ConceptSearchResult> results = entry.getValue();
            LOG.info("Retrieved {} concepts from {}", results.size(), entry.getKey());
            if (LOG.isDebugEnabled()) {
                results.stream().limit(3).forEach(c -> LOG.debug(" - {}", TypeUtil.toString(c)));
            }
        }
        TypeUtil.rankedSearchResultsByScore(concepts, limit).forEach(ConceptSearchResult::addToIndexes);
    }

}