Java tutorial
/** * Copyright (C) 2014 Cohesive Integrations, LLC (info@cohesiveintegrations.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.di2e.ecdr.libs.result.relevance; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.UUID; import net.di2e.ecdr.commons.constants.SearchConstants; import net.di2e.ecdr.commons.filter.AbstractFilterDelegate.SupportedGeosOptions; import net.di2e.ecdr.commons.filter.StrictFilterDelegate; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.time.StopWatch; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.opengis.filter.sort.SortBy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ddf.catalog.data.Result; import ddf.catalog.data.impl.ResultImpl; import ddf.catalog.filter.FilterAdapter; import ddf.catalog.operation.Query; import ddf.catalog.source.UnsupportedQueryException; /** * Normalizes the Relevance of a result set by looking at the contextual criteria, then doing a local calculation of * relevance based on the localized result set */ public class RelevanceNormalizer { public static final String RELEVANCE_TIMER = "RELEVANCE TIMER:"; private static final Logger LOGGER = LoggerFactory.getLogger(RelevanceNormalizer.class); private static final String METADATA_FIELD = "metadata"; private static final String ID_FIELD = "id"; private FilterAdapter filterAdapter; public RelevanceNormalizer(FilterAdapter filterAdapter) { this.filterAdapter = filterAdapter; } /** * Normalize the relevance score for the results in the query response based on the contextual query criteria * * @param results * @param originalQuery * @return */ public List<Result> normalize(List<Result> results, Query originalQuery) { SortBy sortBy = originalQuery.getSortBy(); // We want to do relevance sort if no sort order was specfied or if Relevance sort was specified if (sortBy == null || sortBy.getPropertyName() == null || sortBy.getPropertyName().getPropertyName() == null || Result.RELEVANCE.equals(sortBy.getPropertyName().getPropertyName())) { Map<String, String> filterParameters = getFilterParameters(originalQuery); if (canNormalizeQuery(filterParameters)) { LOGGER.debug( "Query contained search phrase and will be sorted by relevance, performing re-indexing to normalize relevance."); Directory directory = null; DirectoryReader iReader = null; Map<String, Result> docMap = new HashMap<>(); List<Result> updatedResults = new ArrayList<>(); StopWatch stopWatch = new StopWatch(); stopWatch.start(); try { Analyzer analyzer = new StandardAnalyzer(); // create memory-stored index directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter iWriter = new IndexWriter(directory, config); // loop through all of the results and add them to the index for (Result curResult : results) { Document doc = new Document(); String text = TextParser.parseTextFrom(curResult.getMetacard().getMetadata()); String uuid = UUID.randomUUID().toString(); doc.add(new Field(METADATA_FIELD, text, TextField.TYPE_STORED)); doc.add(new Field(ID_FIELD, uuid, TextField.TYPE_STORED)); iWriter.addDocument(doc); docMap.put(uuid, curResult); } IOUtils.closeQuietly(iWriter); LOGGER.debug("{} Document indexing finished in {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0); // Now search the index: iReader = DirectoryReader.open(directory); IndexSearcher iSearcher = new IndexSearcher(iReader); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(METADATA_FIELD, analyzer); org.apache.lucene.search.Query query = getQuery(parser, filterParameters); ScoreDoc[] hits = iSearcher.search(query, null, docMap.size()).scoreDocs; LOGGER.debug("Got back {} results", hits.length); // loop through the indexed search results and update the scores in the original query results for (ScoreDoc curHit : hits) { Document doc = iSearcher.doc(curHit.doc); String uuid = doc.getField(ID_FIELD).stringValue(); Result result = docMap.get(uuid); docMap.remove(uuid); updatedResults.add(updateResult(result, curHit.score)); LOGGER.debug("Relevance for result {} was changed FROM {} TO {}", result.getMetacard().getId(), result.getRelevanceScore(), curHit.score); } // check if there are any results left that did not match the keyword query for (Map.Entry<String, Result> curEntry : docMap.entrySet()) { // add result in with 0 relevance score updatedResults.add(updateResult(curEntry.getValue(), 0)); } // create new query response return updatedResults; } catch (ParseException | IOException | RuntimeException e) { LOGGER.warn( "Received an exception while trying to perform re-indexing, sending original queryResponse on.", e); return results; } finally { IOUtils.closeQuietly(iReader); IOUtils.closeQuietly(directory); stopWatch.stop(); LOGGER.debug("{} Total relevance process took {} seconds.", RELEVANCE_TIMER, (double) stopWatch.getTime() / 1000.0); } } else { LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization."); } } else { LOGGER.debug( "Query is not sorted based on relevance with contextual criteria. Skipping relevance normalization."); } return results; } /** * Checks to see if this query can be normalized. * * @param filterParameters * parameters from original ddf query * @return true if this query can be normalzed, false if not */ protected boolean canNormalizeQuery(Map<String, String> filterParameters) { return StringUtils.isNotBlank(getSearchPhrase(filterParameters)); } protected org.apache.lucene.search.Query getQuery(QueryParser parser, Map<String, String> filterParameters) throws ParseException { String searchPhrase = getSearchPhrase(filterParameters); org.apache.lucene.search.Query query = parser.parse(searchPhrase); if (filterParameters.containsKey(SearchConstants.FUZZY_PARAMETER) && StringUtils.equals(filterParameters.get(SearchConstants.FUZZY_PARAMETER), "1")) { // should get a boolean query for keyword-based searches if (query instanceof BooleanQuery) { BooleanQuery booleanQuery = (BooleanQuery) query; for (BooleanClause clause : booleanQuery.getClauses()) { if (clause.getQuery() instanceof TermQuery) { TermQuery oldQuery = (TermQuery) clause.getQuery(); FuzzyQuery newQuery = new FuzzyQuery(oldQuery.getTerm()); clause.setQuery(newQuery); } } } else { LOGGER.debug( "Query was too complex for adding fuzzy. Expected BooleanQuery but ended up being of type {}", query.getClass().getName()); } } return query; } /** * Pull out the string-based search phrase from a query. * * @param filterParameters * filterparameters from the original query * @return Search phrase or null if no search phrase was found. */ protected String getSearchPhrase(Map<String, String> filterParameters) { String searchPhrase = null; if (filterParameters.containsKey(SearchConstants.KEYWORD_PARAMETER)) { searchPhrase = filterParameters.get(SearchConstants.KEYWORD_PARAMETER); } return searchPhrase; } protected Map<String, String> getFilterParameters(Query originalQuery) { HashMap<String, String> map = new HashMap<>(); try { map.putAll(filterAdapter.adapt(originalQuery, new StrictFilterDelegate(false, SupportedGeosOptions.ALL, Collections.<String, String>emptyMap(), Collections.<String, String>emptyMap()))); } catch (UnsupportedQueryException uqe) { LOGGER.debug( "Query did not contain any contextual criteria (search phrases), cannot perform re-relevance on this query."); } return map; } /** * Creates a new result with an updated score. * * @param origResult * Original result that contains an older score. * @param newScore * New score to update the result with. * @return Result with updated score. */ protected Result updateResult(Result origResult, float newScore) { ResultImpl result = new ResultImpl(origResult.getMetacard()); result.setRelevanceScore((double) newScore); result.setDistanceInMeters(origResult.getDistanceInMeters()); return result; } }