Java tutorial
/******************************************************************************* * Copyright (c) 2014 R-Knowsys Technologies, http://www.rknowsys.com * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see `<http://www.gnu.org/licenses/>`. *******************************************************************************/ package com.rknowsys.portal.search.elastic; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.util.Version; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.highlight.HighlightField; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortBuilders; import org.elasticsearch.search.sort.SortOrder; import com.liferay.portal.kernel.dao.orm.QueryUtil; import com.liferay.portal.kernel.log.Log; import com.liferay.portal.kernel.log.LogFactoryUtil; import com.liferay.portal.kernel.search.Document; import com.liferay.portal.kernel.search.DocumentImpl; import com.liferay.portal.kernel.search.Field; import com.liferay.portal.kernel.search.Hits; import com.liferay.portal.kernel.search.HitsImpl; import com.liferay.portal.kernel.search.IndexSearcher; import com.liferay.portal.kernel.search.Indexer; import com.liferay.portal.kernel.search.IndexerRegistryUtil; import com.liferay.portal.kernel.search.Query; import com.liferay.portal.kernel.search.QueryConfig; import com.liferay.portal.kernel.search.SearchContext; import com.liferay.portal.kernel.search.SearchEngineUtil; import com.liferay.portal.kernel.search.SearchException; import com.liferay.portal.kernel.search.SearchPermissionChecker; import com.liferay.portal.kernel.search.Sort; import com.liferay.portal.kernel.search.facet.Facet; import com.liferay.portal.kernel.search.facet.MultiValueFacet; import com.liferay.portal.kernel.search.facet.RangeFacet; import com.liferay.portal.kernel.search.facet.collector.FacetCollector; import com.liferay.portal.kernel.util.GetterUtil; import com.liferay.portal.kernel.util.LocaleUtil; import com.liferay.portal.kernel.util.PropsKeys; import com.liferay.portal.kernel.util.PropsUtil; import com.liferay.portal.kernel.util.StringPool; import com.liferay.portal.kernel.util.StringUtil; import com.liferay.portal.kernel.util.Validator; import com.rknowsys.portal.search.elastic.client.ClientFactory; import com.rknowsys.portal.search.elastic.facet.ElasticsearchFacetFieldCollector; import com.rknowsys.portal.search.elastic.facet.LiferayFacetParser; import com.rknowsys.portal.search.elastic.util.Utilities; public class ElasticsearchIndexSearcher implements IndexSearcher { private ClientFactory clientFactory; public static final int INDEX_FILTER_SEARCH_LIMIT = GetterUtil .getInteger(PropsUtil.get(PropsKeys.INDEX_FILTER_SEARCH_LIMIT)); @Override public Hits search(SearchContext searchContext, Query query) throws SearchException { try { int end = searchContext.getEnd(); int start = searchContext.getStart(); if (isFilterSearch(searchContext)) { if (end > INDEX_FILTER_SEARCH_LIMIT) { end = end - INDEX_FILTER_SEARCH_LIMIT + 5; } if ((start < 0) || (start > end) || end < 0) { return new HitsImpl(); } } query = getPermissionQuery(searchContext, query); return doSearch(searchContext, query, start, end); } catch (Exception e) { throw new SearchException(e); } } private Hits doSearch(SearchContext searchContext, Query query, int start, int end) { Client client = getClient(); SearchRequestBuilder searchRequestBuilder = prepareSearchBuilder(searchContext, query, client, start, end); _log.debug("Current lucene version: " + Version.LUCENE_CURRENT); _log.debug("Search query String " + searchRequestBuilder.toString()); SearchRequest searchRequest = searchRequestBuilder.request(); _log.debug("Time Before request to ES: " + System.currentTimeMillis()); ActionFuture<SearchResponse> future = client.search(searchRequest); SearchResponse searchResponse = future.actionGet(); _log.debug("Time After response from ES: " + System.currentTimeMillis()); updateFacetCollectors(searchContext, searchResponse); Hits hits = processSearchHits(searchResponse, query.getQueryConfig()); _log.debug("Total responseCount " + searchResponse.getHits().getTotalHits()); _log.debug("Time After processSearchHits: " + System.currentTimeMillis()); hits.setQuery(query); TimeValue timeValue = searchResponse.getTook(); hits.setSearchTime((float) timeValue.getSecondsFrac()); return hits; } private Query getPermissionQuery(SearchContext searchContext, Query query) { if (searchContext.getEntryClassNames() == null) { return query; } for (String className : searchContext.getEntryClassNames()) { Indexer indexer = IndexerRegistryUtil.getIndexer(className); if (indexer != null) { if (indexer.isFilterSearch() && indexer.isPermissionAware()) { SearchPermissionChecker searchPermissionChecker = SearchEngineUtil.getSearchPermissionChecker(); query = searchPermissionChecker.getPermissionQuery(searchContext.getCompanyId(), searchContext.getGroupIds(), searchContext.getUserId(), className, query, searchContext); } } } return query; } private SearchRequestBuilder prepareSearchBuilder(SearchContext searchContext, Query query, Client client, int start, int end) { SearchRequestBuilder searchRequestBuilder = client.prepareSearch(Utilities.getIndexName(searchContext)); addHighlights(query, searchRequestBuilder); //QueryBuilder queryBuilder = com.rknowsys.portal.search.elastic.liferay.QueryTranslatorUtil.translate(query); //if (queryBuilder == null) { String q = applyCustomESRules(query.toString()); QueryBuilder queryBuilder = QueryBuilders.queryStringQuery(q); //} searchRequestBuilder.setQuery(queryBuilder); _log.debug("Query String" + queryBuilder.toString()); searchRequestBuilder.setTypes("documents"); addFacetCollectorsToSearch(searchContext, searchRequestBuilder); addSortToSearch(searchContext.getSorts(), searchRequestBuilder); int size = end - start; _log.debug("Search Start: " + start + " Search Size: " + size); if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) { searchRequestBuilder.setFrom(start).setSize(size); } return searchRequestBuilder; } private void addHighlights(Query query, SearchRequestBuilder searchRequestBuilder) { QueryConfig queryConfig = query.getQueryConfig(); if (queryConfig.isHighlightEnabled()) { String localizedContentName = DocumentImpl.getLocalizedName(queryConfig.getLocale(), Field.CONTENT); String localizedTitleName = DocumentImpl.getLocalizedName(queryConfig.getLocale(), Field.TITLE); int fragmentSize = queryConfig.getHighlightFragmentSize(); int numberOfFragments = queryConfig.getHighlightSnippetSize(); searchRequestBuilder.addHighlightedField(Field.CONTENT, fragmentSize, numberOfFragments); searchRequestBuilder.addHighlightedField(Field.TITLE, fragmentSize, numberOfFragments); searchRequestBuilder.addHighlightedField(localizedContentName, fragmentSize, numberOfFragments); searchRequestBuilder.addHighlightedField(localizedTitleName, fragmentSize, numberOfFragments); } } private boolean isFilterSearch(SearchContext searchContext) { if (searchContext.getEntryClassNames() == null) { return false; } for (String entryClassName : searchContext.getEntryClassNames()) { Indexer indexer = IndexerRegistryUtil.getIndexer(entryClassName); if (indexer == null) { continue; } if (indexer.isFilterSearch()) { return true; } } return false; } private String applyCustomESRules(String q) { //Place for any custom modifications //-1 causes double negatives and hence positive for orgId:1 and it fails for all wdAdmins q = q.replaceAll("-organizationId:1", "organizationId:\\\\-1"); //replace treepath term as literals q = q.replaceAll("treePath:\\*(.*?)\\*", "treePath:\"$1\""); //groupRoleId should not be split q = q.replaceAll("groupRoleId:(.*?)(\\s|\\)+)", "groupRoleId:\"$1\"$2"); //to take care of regex conversion by lucene 4.0 which ES uses; escaping all forward slashes //String q = query.toString().replaceAll("((?::|(?<!^)\\G)[^\\/\\s]*)(/)", "$1\\\\$2"); return q; } @Override public Hits search(String searchEngineId, long companyId, Query query, Sort[] sort, int start, int end) throws SearchException { try { Client client = getClient(); SearchRequestBuilder searchRequestBuilder = client.prepareSearch(Utilities.getIndexName(companyId)); String q = applyCustomESRules(query.toString()); QueryBuilder queryBuilder = QueryBuilders.queryStringQuery(q); searchRequestBuilder.setQuery(queryBuilder); searchRequestBuilder.setTypes("documents"); addSortToSearch(sort, searchRequestBuilder); _log.debug("Search Start: " + start + " Search End: " + end); if ((start != QueryUtil.ALL_POS) && (end != QueryUtil.ALL_POS)) { searchRequestBuilder.setFrom(start).setSize(end - start); } _log.debug("Query String" + searchRequestBuilder.toString()); SearchRequest searchRequest = searchRequestBuilder.request(); _log.debug("Search query String " + searchRequest.toString()); ActionFuture<SearchResponse> future = client.search(searchRequest); SearchResponse searchResponse = future.actionGet(); Hits hits = processSearchHits(searchResponse, query.getQueryConfig()); hits.setQuery(query); TimeValue timeValue = searchResponse.getTook(); hits.setSearchTime((float) timeValue.getSecondsFrac()); return hits; } catch (Exception e) { throw new SearchException(e); } } @Override public String spellCheckKeywords(SearchContext searchContext) { return StringPool.BLANK; } @Override public Map<String, List<String>> spellCheckKeywords(SearchContext searchContext, int max) { return Collections.emptyMap(); } @Override public String[] suggestKeywordQueries(SearchContext searchContext, int max) { return new String[0]; } protected Document processSearchHit(SearchHit hit) { Document document = new DocumentImpl(); Map<String, Object> source = hit.getSource(); for (String fieldName : source.keySet()) { Object val = source.get(fieldName); if (val == null) { Field field = new Field(fieldName, (String) null); document.add(field); } else if (val instanceof List) { String[] values = ((List<String>) val).toArray(new String[((List<String>) val).size()]); Field field = new Field(fieldName, values); document.add(field); } else { Field field = new Field(fieldName, new String[] { val.toString() }); document.add(field); } } return document; } protected Hits processSearchHits(SearchResponse searchResponse, QueryConfig queryConfig) { Hits hits = new HitsImpl(); List<Document> documents = new ArrayList<Document>(); Set<String> queryTerms = new HashSet<String>(); List<Float> scores = new ArrayList<Float>(); List<String> snippets = new ArrayList<String>(); SearchHits searchHits = searchResponse.getHits(); if (searchHits.totalHits() > 0) { SearchHit[] searchHitsArray = searchHits.getHits(); for (SearchHit searchHit : searchHitsArray) { Document document = processSearchHit(searchHit); documents.add(document); scores.add(searchHit.getScore()); String snippet = StringPool.BLANK; if (queryConfig.isHighlightEnabled()) { snippet = getSnippet(searchHit, queryConfig, queryTerms, searchHit.highlightFields(), Field.CONTENT); if (Validator.isNull(snippet)) { snippet = getSnippet(searchHit, queryConfig, queryTerms, searchHit.highlightFields(), Field.TITLE); } if (Validator.isNotNull(snippet)) { snippets.add(snippet); } } } } int totalHits = (int) searchHits.getTotalHits(); _log.debug("Total Hits: " + totalHits); _log.debug("Total Documents size: " + documents.size()); hits.setDocs(documents.toArray(new Document[documents.size()])); hits.setLength(totalHits); hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()])); hits.setScores(scores.toArray(new Float[scores.size()])); hits.setSnippets(snippets.toArray(new String[snippets.size()])); return hits; } protected String getSnippet(SearchHit searchHit, QueryConfig queryConfig, Set<String> queryTerms, Map<String, HighlightField> highlights, String field) { if (highlights == null) { return StringPool.BLANK; } boolean localizedSearch = true; String defaultLanguageId = LocaleUtil.toLanguageId(LocaleUtil.getDefault()); String queryLanguageId = LocaleUtil.toLanguageId(queryConfig.getLocale()); if (defaultLanguageId.equals(queryLanguageId)) { localizedSearch = false; } if (localizedSearch) { String localizedName = DocumentImpl.getLocalizedName(queryConfig.getLocale(), field); if (searchHit.fields().containsKey(localizedName)) { field = localizedName; } } HighlightField hField = highlights.get(field); if (hField == null) { return StringPool.BLANK; } List<String> snippets = new ArrayList<String>(); Text[] txtArr = hField.getFragments(); if (txtArr == null) { return StringPool.BLANK; } for (Text txt : txtArr) { snippets.add(txt.string()); } String snippet = StringUtil.merge(snippets, "..."); if (Validator.isNotNull(snippet)) { snippet = snippet + "..."; } else { snippet = StringPool.BLANK; } Pattern pattern = Pattern.compile("<em>(.*?)</em>"); Matcher matcher = pattern.matcher(snippet); while (matcher.find()) { queryTerms.add(matcher.group(1)); } snippet = StringUtil.replace(snippet, "<em>", ""); snippet = StringUtil.replace(snippet, "</em>", ""); return snippet; } protected void updateFacetCollectors(SearchContext searchContext, SearchResponse searchResponse) { Map<String, Facet> facetsMap = searchContext.getFacets(); for (Facet facet : facetsMap.values()) { if (facet.isStatic()) { continue; } Aggregations facets = searchResponse.getAggregations(); Aggregation elasticsearchFacet = facets.get(facet.getFieldName()); FacetCollector facetCollector = new ElasticsearchFacetFieldCollector(elasticsearchFacet); facet.setFacetCollector(facetCollector); } } private void addFacetCollectorsToSearch(SearchContext searchContext, SearchRequestBuilder searchRequestBuilder) { Map<String, Facet> facets = searchContext.getFacets(); for (Facet facet : facets.values()) { AggregationBuilder facetBuilder = null; if (facet instanceof MultiValueFacet) { facetBuilder = LiferayFacetParser.getFacetBuilder((MultiValueFacet) facet); } else if (facet instanceof RangeFacet) { facetBuilder = LiferayFacetParser.getFacetBuilder((RangeFacet) facet); } if (facetBuilder != null) { searchRequestBuilder.addAggregation(facetBuilder); } } } private void addSortToSearch(Sort[] sorts, SearchRequestBuilder searchRequestBuilder) { String query = searchRequestBuilder.toString(); if (query.contains("assetTagNames")) //term search { //always adds score to the sort searchRequestBuilder.addSort(SortBuilders.scoreSort()); } else //empty search { //no score needed if (query.contains("com.liferay.portal.model.Organization")) { searchRequestBuilder .addSort(SortBuilders.fieldSort("name_sortable").ignoreUnmapped(true).order(SortOrder.ASC)); } } if (sorts == null) { //for alphabetic order on orgs return; } for (Sort sort : sorts) { if (sort == null) { continue; } String sortFieldName = sort.getFieldName(); SortBuilder sortBuilder = null; if (DocumentImpl.isSortableTextField(sortFieldName)) { sortFieldName = DocumentImpl.getSortableFieldName(sortFieldName); } if (Validator.isNull(sortFieldName) || !sortFieldName.endsWith("sortable")) { continue; } sortBuilder = SortBuilders.fieldSort(sortFieldName).ignoreUnmapped(true) .order(sort.isReverse() ? SortOrder.DESC : SortOrder.ASC); searchRequestBuilder.addSort(sortBuilder); } } public void setClientFactory(ClientFactory clientFactory) { this.clientFactory = clientFactory; } private Client getClient() { return clientFactory.getClient(); } private static final Log _log = LogFactoryUtil.getLog(ElasticsearchIndexSearcher.class); }