net.di2e.ecdr.libs.result.relevance.RelevanceNormalizer.java Source code

Introduction

Here is the source code for net.di2e.ecdr.libs.result.relevance.RelevanceNormalizer.java
Source

/**
 * Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.di2e.ecdr.libs.result.relevance;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import net.di2e.ecdr.commons.constants.SearchConstants;
import net.di2e.ecdr.commons.filter.AbstractFilterDelegate.SupportedGeosOptions;
import net.di2e.ecdr.commons.filter.StrictFilterDelegate;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.StopWatch;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.opengis.filter.sort.SortBy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ddf.catalog.data.Result;
import ddf.catalog.data.impl.ResultImpl;
import ddf.catalog.filter.FilterAdapter;
import ddf.catalog.operation.Query;
import ddf.catalog.source.UnsupportedQueryException;

/**
 * Normalizes the Relevance of a result set by looking at the contextual criteria, then doing a local calculation of
 * relevance based on the localized result set
 */
public class RelevanceNormalizer {

    public static final String RELEVANCE_TIMER = "RELEVANCE TIMER:";

    private static final Logger LOGGER = LoggerFactory.getLogger(RelevanceNormalizer.class);
    private static final String METADATA_FIELD = "metadata";
    private static final String ID_FIELD = "id";

    private FilterAdapter filterAdapter;

    public RelevanceNormalizer(FilterAdapter filterAdapter) {
        this.filterAdapter = filterAdapter;
    }

    /**
     * Normalize the relevance score for the results in the query response based on the contextual query criteria
     *
     * @param results
     * @param originalQuery
     * @return
     */
    public List<Result> normalize(List<Result> results, Query originalQuery) {

        SortBy sortBy = originalQuery.getSortBy();
        // We want to do relevance sort if no sort order was specfied or if Relevance sort was specified
        if (sortBy == null || sortBy.getPropertyName() == null || sortBy.getPropertyName().getPropertyName() == null
                || Result.RELEVANCE.equals(sortBy.getPropertyName().getPropertyName())) {

            Map<String, String> filterParameters = getFilterParameters(originalQuery);

            if (canNormalizeQuery(filterParameters)) {
                LOGGER.debug(
                        "Query contained search phrase and will be sorted by relevance, performing re-indexing to normalize relevance.");
                Directory directory = null;
                DirectoryReader iReader = null;
                Map<String, Result> docMap = new HashMap<>();
                List<Result> updatedResults = new ArrayList<>();
                StopWatch stopWatch = new StopWatch();
                stopWatch.start();
                try {
                    Analyzer analyzer = new StandardAnalyzer();

                    // create memory-stored index
                    directory = new RAMDirectory();

                    IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
                    IndexWriter iWriter = new IndexWriter(directory, config);

                    // loop through all of the results and add them to the index
                    for (Result curResult : results) {
                        Document doc = new Document();
                        String text = TextParser.parseTextFrom(curResult.getMetacard().getMetadata());
                        String uuid = UUID.randomUUID().toString();
                        doc.add(new Field(METADATA_FIELD, text, TextField.TYPE_STORED));
                        doc.add(new Field(ID_FIELD, uuid, TextField.TYPE_STORED));
                        iWriter.addDocument(doc);
                        docMap.put(uuid, curResult);
                    }

                    IOUtils.closeQuietly(iWriter);
                    LOGGER.debug("{} Document indexing finished in {} seconds.", RELEVANCE_TIMER,
                            (double) stopWatch.getTime() / 1000.0);
                    // Now search the index:
                    iReader = DirectoryReader.open(directory);
                    IndexSearcher iSearcher = new IndexSearcher(iReader);
                    // Parse a simple query that searches for "text":
                    QueryParser parser = new QueryParser(METADATA_FIELD, analyzer);
                    org.apache.lucene.search.Query query = getQuery(parser, filterParameters);
                    ScoreDoc[] hits = iSearcher.search(query, null, docMap.size()).scoreDocs;
                    LOGGER.debug("Got back {} results", hits.length);

                    // loop through the indexed search results and update the scores in the original query results
                    for (ScoreDoc curHit : hits) {
                        Document doc = iSearcher.doc(curHit.doc);
                        String uuid = doc.getField(ID_FIELD).stringValue();
                        Result result = docMap.get(uuid);
                        docMap.remove(uuid);
                        updatedResults.add(updateResult(result, curHit.score));
                        LOGGER.debug("Relevance for result {} was changed FROM {} TO {}",
                                result.getMetacard().getId(), result.getRelevanceScore(), curHit.score);
                    }
                    // check if there are any results left that did not match the keyword query
                    for (Map.Entry<String, Result> curEntry : docMap.entrySet()) {
                        // add result in with 0 relevance score
                        updatedResults.add(updateResult(curEntry.getValue(), 0));
                    }
                    // create new query response
                    return updatedResults;

                } catch (ParseException | IOException | RuntimeException e) {
                    LOGGER.warn(
                            "Received an exception while trying to perform re-indexing, sending original queryResponse on.",
                            e);
                    return results;
                } finally {
                    IOUtils.closeQuietly(iReader);
                    IOUtils.closeQuietly(directory);
                    stopWatch.stop();
                    LOGGER.debug("{} Total relevance process took {} seconds.", RELEVANCE_TIMER,
                            (double) stopWatch.getTime() / 1000.0);
                }
            } else {
                LOGGER.debug(
                        "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization.");
            }
        } else {
            LOGGER.debug(
                    "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization.");
        }
        return results;
    }

    /**
     * Checks to see if this query can be normalized.
     *
     * @param filterParameters
     *            parameters from original ddf query
     * @return true if this query can be normalzed, false if not
     */
    protected boolean canNormalizeQuery(Map<String, String> filterParameters) {
        return StringUtils.isNotBlank(getSearchPhrase(filterParameters));
    }

    protected org.apache.lucene.search.Query getQuery(QueryParser parser, Map<String, String> filterParameters)
            throws ParseException {
        String searchPhrase = getSearchPhrase(filterParameters);
        org.apache.lucene.search.Query query = parser.parse(searchPhrase);
        if (filterParameters.containsKey(SearchConstants.FUZZY_PARAMETER)
                && StringUtils.equals(filterParameters.get(SearchConstants.FUZZY_PARAMETER), "1")) {
            // should get a boolean query for keyword-based searches
            if (query instanceof BooleanQuery) {
                BooleanQuery booleanQuery = (BooleanQuery) query;
                for (BooleanClause clause : booleanQuery.getClauses()) {
                    if (clause.getQuery() instanceof TermQuery) {
                        TermQuery oldQuery = (TermQuery) clause.getQuery();
                        FuzzyQuery newQuery = new FuzzyQuery(oldQuery.getTerm());
                        clause.setQuery(newQuery);
                    }
                }
            } else {
                LOGGER.debug(
                        "Query was too complex for adding fuzzy. Expected BooleanQuery but ended up being of type {}",
                        query.getClass().getName());
            }
        }

        return query;
    }

    /**
     * Pull out the string-based search phrase from a query.
     *
     * @param filterParameters
     *            filterparameters from the original query
     * @return Search phrase or null if no search phrase was found.
     */
    protected String getSearchPhrase(Map<String, String> filterParameters) {
        String searchPhrase = null;
        if (filterParameters.containsKey(SearchConstants.KEYWORD_PARAMETER)) {
            searchPhrase = filterParameters.get(SearchConstants.KEYWORD_PARAMETER);
        }

        return searchPhrase;
    }

    protected Map<String, String> getFilterParameters(Query originalQuery) {
        HashMap<String, String> map = new HashMap<>();
        try {
            map.putAll(filterAdapter.adapt(originalQuery, new StrictFilterDelegate(false, SupportedGeosOptions.ALL,
                    Collections.<String, String>emptyMap(), Collections.<String, String>emptyMap())));
        } catch (UnsupportedQueryException uqe) {
            LOGGER.debug(
                    "Query did not contain any contextual criteria (search phrases), cannot perform re-relevance on this query.");
        }
        return map;
    }

    /**
     * Creates a new result with an updated score.
     *
     * @param origResult
     *            Original result that contains an older score.
     * @param newScore
     *            New score to update the result with.
     * @return Result with updated score.
     */
    protected Result updateResult(Result origResult, float newScore) {
        ResultImpl result = new ResultImpl(origResult.getMetacard());
        result.setRelevanceScore((double) newScore);
        result.setDistanceInMeters(origResult.getDistanceInMeters());
        return result;
    }

}