edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedSeparateConceptRetrievalExecutor.java Source code

Java tutorial

Introduction

Here is the source code for edu.cmu.lti.oaqa.bioasq.concept.retrieval.GoPubMedSeparateConceptRetrievalExecutor.java

Source

/*
 * Open Advancement Question Answering (OAQA) Project Copyright 2016 Carnegie Mellon University
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations
 * under the License.
 */

package edu.cmu.lti.oaqa.bioasq.concept.retrieval;

import com.google.common.base.CharMatcher;
import edu.cmu.lti.oaqa.baseqa.providers.query.BagOfPhraseQueryStringConstructor;
import edu.cmu.lti.oaqa.baseqa.providers.query.QueryStringConstructor;
import edu.cmu.lti.oaqa.baseqa.util.UimaContextHelper;
import edu.cmu.lti.oaqa.bio.bioasq.services.GoPubMedService;
import edu.cmu.lti.oaqa.bioasq.util.BioASQUtil;
import edu.cmu.lti.oaqa.type.retrieval.AbstractQuery;
import edu.cmu.lti.oaqa.type.retrieval.ConceptSearchResult;
import edu.cmu.lti.oaqa.type.retrieval.QueryConcept;
import edu.cmu.lti.oaqa.util.TypeUtil;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import static java.util.stream.Collectors.groupingBy;

/**
 * <p>
 *   A {@link JCasAnnotator_ImplBase} that performs search using the query strings, each transformed
 *   from a {@link QueryConcept} of the {@link AbstractQuery} by a {@link QueryStringConstructor},
 *   to retrieve relevant {@link ConceptSearchResult}s from {@link GoPubMedService}.
 * </p>
 * <p>
 *   This implementaion differs from {@link GoPubMedConceptRetrievalExecutor} in that it avoids the
 *   underlying retrieval model to consider all {@link QueryConcept}s are required or equally
 *   important.
 *   Instead, it allows downstream reranker to decide which {@link ConceptSearchResult}s should be
 *   kept.
 * </p>
 *
 * @see edu.cmu.lti.oaqa.baseqa.concept.retrieval.LuceneConceptRetrievalExecutor
 * @see GoPubMedConceptRetrievalExecutor
 *
 * @author <a href="mailto:ziy@cs.cmu.edu">Zi Yang</a> created on 4/25/16
 */
public class GoPubMedSeparateConceptRetrievalExecutor extends JCasAnnotator_ImplBase {

    private GoPubMedService service;

    private int pages;

    private int hits;

    private QueryStringConstructor bopQueryStringConstructor;

    private long timeout;

    private int limit;

    private static final Logger LOG = LoggerFactory.getLogger(GoPubMedSeparateConceptRetrievalExecutor.class);

    @Override
    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        String conf = UimaContextHelper.getConfigParameterStringValue(context, "conf");
        PropertiesConfiguration gopubmedProperties = new PropertiesConfiguration();
        try {
            gopubmedProperties.load(getClass().getResourceAsStream(conf));
        } catch (ConfigurationException e) {
            throw new ResourceInitializationException(e);
        }
        service = new GoPubMedService(gopubmedProperties);
        pages = UimaContextHelper.getConfigParameterIntValue(context, "pages", 1);
        hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 1);
        bopQueryStringConstructor = new BagOfPhraseQueryStringConstructor();
        timeout = UimaContextHelper.getConfigParameterIntValue(context, "timeout", 4);
        limit = UimaContextHelper.getConfigParameterIntValue(context, "limit", Integer.MAX_VALUE);
    }

    @Override
    public void process(JCas jcas) throws AnalysisEngineProcessException {
        AbstractQuery aquery = TypeUtil.getAbstractQueries(jcas).stream().findFirst().get();
        Collection<QueryConcept> qconcepts = TypeUtil.getQueryConcepts(aquery);
        List<ConceptSearchResult> concepts = Collections.synchronizedList(new ArrayList<>());
        ExecutorService es = Executors.newCachedThreadPool();
        for (QueryConcept qconcept : qconcepts) {
            String queryString = bopQueryStringConstructor.formatQueryConcept(qconcept)
                    .replaceAll("[^A-Za-z0-9_\\-\"]+", " ");
            LOG.info("Query string: {}", queryString);
            for (BioASQUtil.Ontology ontology : BioASQUtil.Ontology.values()) {
                es.execute(() -> {
                    try {
                        concepts.addAll(
                                BioASQUtil.searchOntology(service, jcas, queryString, pages, hits, ontology));
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                });
            }
        }
        es.shutdown();
        try {
            if (!es.awaitTermination(timeout, TimeUnit.MINUTES)) {
                LOG.warn("Timeout occurs for one or some concept retrieval services.");
            }
        } catch (InterruptedException e) {
            throw new AnalysisEngineProcessException(e);
        }
        Map<String, List<ConceptSearchResult>> onto2concepts = concepts.stream()
                .collect(groupingBy(ConceptSearchResult::getSearchId));
        for (Map.Entry<String, List<ConceptSearchResult>> entry : onto2concepts.entrySet()) {
            List<ConceptSearchResult> results = entry.getValue();
            LOG.info("Retrieved {} concepts from {}", results.size(), entry.getKey());
            if (LOG.isDebugEnabled()) {
                results.stream().limit(10).forEach(c -> LOG.debug(" - {}", TypeUtil.toString(c)));
            }
        }
        TypeUtil.rankedSearchResultsByScore(concepts, limit).forEach(ConceptSearchResult::addToIndexes);
    }

}