Java tutorial
/* * Copyright (C) 2007-2013 Crafter Software Corporation. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.craftercms.search.service.impl; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import javax.activation.FileTypeMap; import org.apache.commons.collections.MapUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.handler.extraction.ExtractingParams; import org.craftercms.core.service.Content; import org.craftercms.search.exception.SearchException; import org.craftercms.search.exception.SolrDocumentBuildException; import org.craftercms.search.service.SearchService; import org.craftercms.search.service.SolrDocumentBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Required; import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap; /** * Implementation of {@link SearchService} using Solr as the underlying search engine. * * @author Michael Chen * @author Alfonso Vasquez * @author Dejan Brkic */ public class SolrSearchService implements SearchService<SolrQuery> { public static final String DEFAULT_FILE_NAME_FIELD_NAME = "file-name"; public static final String SOLR_CONTENT_STREAM_UPDATE_URL = "/update/extract"; public static final String DOCUMENT_LIST_START_PROPERTY_NAME = "start"; public static final String DOCUMENT_LIST_NUM_FOUND_PROPERTY_NAME = "numFound"; public static final String DOCUMENT_LIST_MAX_SCORE_PROPERTY_NAME = "maxScore"; public static final String DOCUMENT_LIST_DOCUMENTS_PROPERTY_NAME = "documents"; private static final Logger logger = LoggerFactory.getLogger(SolrSearchService.class); /** * The ID of the default index. */ protected String defaultIndexId; /** * The file name field name (default is file-name). */ protected String fileNameFieldName; /** * The Solr client used to execute requests against a Solr server. */ protected SolrClient solrClient; /** * The Solr document builder, to build Solr documents from generic XML documents. */ protected SolrDocumentBuilder solrDocumentBuilder; /** * Multi value separator for additional fields of structured documents. */ protected String multiValueSeparator; /** * The regex pattern used to ignore those additional fields that shouldn't be parsed for multi value. */ protected String multiValueIgnorePattern; /** * ID regex/delete query mappings that can be used to specify special delete queries for certain files, * e.g. delete XML documents with their sub-documents. */ protected Map<String, String> deleteQueryMappings; /** * Set of additional filter queries that should be used on all search requests */ protected String[] additionalFilterQueries; /** * Mime type map used to retrieve the mime types of files when submitting binary/structured content for indexing. */ protected FileTypeMap mimeTypesMap; public SolrSearchService() { fileNameFieldName = DEFAULT_FILE_NAME_FIELD_NAME; mimeTypesMap = new ConfigurableMimeFileTypeMap(); } /** * Sets the ID of the default index. */ public void setDefaultIndexId(String defaultIndexId) { this.defaultIndexId = defaultIndexId; } /** * Sets the file name field (default is file-name). */ public void setFileNameFieldName(String fileNameFieldName) { this.fileNameFieldName = fileNameFieldName; } /** * Sets the Solr client used to execute requests against a Solr server. */ @Required public void setSolrClient(SolrClient solrClient) { this.solrClient = solrClient; } /** * Sets the Solr document builder, to build Solr documents from generic XML documents. */ @Required public void setSolrDocumentBuilder(SolrDocumentBuilder solrDocumentBuilder) { this.solrDocumentBuilder = solrDocumentBuilder; } /** * Sets the multi value separator for additional fields of structured documents. */ @Required public void setMultiValueSeparator(String multiValueSeparator) { this.multiValueSeparator = multiValueSeparator; } /** * Sets the regex pattern used to ignore those additional fields that shouldn't be parsed for multi value. */ @Required public void setMultiValueIgnorePattern(String multiValueIgnorePattern) { this.multiValueIgnorePattern = multiValueIgnorePattern; } /** * Sets ID regex/delete query mappings that can be used to specify special delete queries for certain * files, e.g. delete XML documents with their sub-documents. */ public void setDeleteQueryMappings(Map<String, String> deleteQueryMappings) { this.deleteQueryMappings = deleteQueryMappings; } /** * Sets the set of additional filter queries that should be used on all search requests */ public void setAdditionalFilterQueries(String[] additionalFilterQueries) { this.additionalFilterQueries = additionalFilterQueries; } @Override public SolrQuery createQuery() { return new SolrQuery(); } @Override public SolrQuery createQuery(Map<String, String[]> params) { return new SolrQuery(params); } /** * {@inheritDoc} */ public Map<String, Object> search(SolrQuery query) { return search(null, query); } @Override public Map<String, Object> search(String indexId, SolrQuery query) throws SearchException { if (StringUtils.isEmpty(indexId)) { indexId = defaultIndexId; } addAdditionalFilterQueries(indexId, query); if (logger.isDebugEnabled()) { logger.debug("{}Executing query {}", getIndexPrefix(indexId), query); } SolrResponse response; try { response = solrClient.query(indexId, toActualSolrQuery(query)); } catch (SolrServerException | IOException e) { throw new SearchException(indexId, "Search for query " + query + " failed", e); } // Solr search result is a List<Map.Entry<String,Object>>, where every entry is a (name,value) pair, // and there can be duplicate names in the list. NamedList<Object> list = response.getResponse(); // Convert this list into a map where values of the same name are grouped into a list. Map<String, Object> map = toMap(list); if (logger.isDebugEnabled()) { logger.debug("{}Response for query {}: {}", getIndexPrefix(indexId), query, map); } return map; } /** * {@inheritDoc} */ public void update(String site, String id, String xml, boolean ignoreRootInFieldNames) throws SearchException { update(null, site, id, xml, ignoreRootInFieldNames); } @Override public void update(String indexId, String site, String id, String xml, boolean ignoreRootInFieldNames) throws SearchException { if (StringUtils.isEmpty(indexId)) { indexId = defaultIndexId; } String finalId = site + ":" + id; // This is done because when a document is updated, and it had children before but not now, the children // would be orphaned (SOLR-6096) delete(indexId, site, id); try { SolrInputDocument solrDoc = solrDocumentBuilder.build(site, id, xml, ignoreRootInFieldNames); NamedList<Object> response = solrClient.add(indexId, solrDoc).getResponse(); if (logger.isDebugEnabled()) { logger.debug(getSuccessfulMessage(indexId, finalId, "Update", response)); } } catch (SolrDocumentBuildException e) { throw new SearchException(indexId, "Unable to build Solr document for " + finalId, e); } catch (IOException e) { throw new SearchException(indexId, "I/O error while executing update for " + finalId, e); } catch (Exception e) { throw new SearchException(indexId, "Error executing update for " + finalId, e); } } /** * {@inheritDoc} */ public void delete(String site, String id) throws SearchException { delete(null, site, id); } @Override public void delete(String indexId, String site, String id) throws SearchException { if (StringUtils.isEmpty(indexId)) { indexId = defaultIndexId; } String finalId = site + ":" + id; String query = getDeleteQuery(finalId); NamedList<Object> response; try { if (StringUtils.isNotEmpty(query)) { response = solrClient.deleteByQuery(indexId, query).getResponse(); if (logger.isDebugEnabled()) { logger.debug(getSuccessfulMessage(indexId, query, "Delete", response)); } } else { response = solrClient.deleteById(indexId, finalId).getResponse(); if (logger.isDebugEnabled()) { logger.debug(getSuccessfulMessage(indexId, finalId, "Delete", response)); } } } catch (IOException e) { if (StringUtils.isNotEmpty(query)) { throw new SearchException(indexId, "I/O error while executing delete for " + query, e); } else { throw new SearchException(indexId, "I/O error while executing delete for " + finalId, e); } } catch (Exception e) { if (StringUtils.isNotEmpty(query)) { throw new SearchException(indexId, "Error executing delete for " + query, e); } else { throw new SearchException(indexId, "Error executing delete for " + finalId, e); } } } @Override public void updateContent(String site, String id, File file) throws SearchException { updateContent(null, site, id, file, null); } @Override public void updateContent(String indexId, String site, String id, File file) throws SearchException { updateContent(indexId, site, id, file, null); } @Override public void updateContent(String site, String id, File file, Map<String, List<String>> additionalFields) throws SearchException { updateContent(null, site, id, file, additionalFields); } @Override public void updateContent(String indexId, String site, String id, File file, Map<String, List<String>> additionalFields) throws SearchException { if (StringUtils.isEmpty(indexId)) { indexId = defaultIndexId; } String finalId = site + ":" + id; String fileName = FilenameUtils.getName(id); String contentType = mimeTypesMap.getContentType(fileName); ContentStreamUpdateRequest request = new ContentStreamUpdateRequest(SOLR_CONTENT_STREAM_UPDATE_URL); NamedList<Object> response; try { ModifiableSolrParams params = solrDocumentBuilder.buildParams(site, id, ExtractingParams.LITERALS_PREFIX, null, additionalFields); params.set(ExtractingParams.LITERALS_PREFIX + fileNameFieldName, fileName); request.setParams(params); request.addFile(file, contentType); request.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); response = solrClient.request(request, indexId); } catch (SolrServerException e) { logger.warn("{}Unable to update file through content stream request: {}. Attempting to perform just " + "the metadata update", getIndexPrefix(indexId), e.getMessage()); try { SolrInputDocument inputDocument = solrDocumentBuilder.build(site, id, additionalFields); inputDocument.setField(fileNameFieldName, fileName); response = solrClient.add(indexId, inputDocument).getResponse(); } catch (IOException e1) { throw new SearchException(indexId, "I/O error while executing update file for " + finalId, e1); } catch (SolrServerException e1) { throw new SearchException(indexId, e1.getMessage(), e1); } } catch (IOException e) { throw new SearchException(indexId, "I/O error while executing update file for " + finalId, e); } if (logger.isDebugEnabled()) { logger.debug(getSuccessfulMessage(indexId, finalId, "Update file", response)); } } @Override public void updateContent(String site, String id, Content content) throws SearchException { throw new UnsupportedOperationException("Only use updateContent methods that receive a file"); } @Override public void updateContent(String indexId, String site, String id, Content content) throws SearchException { throw new UnsupportedOperationException("Only use updateContent methods that receive a file"); } @Override public void updateContent(String site, String id, Content content, Map<String, List<String>> additionalFields) throws SearchException { throw new UnsupportedOperationException("Only use updateContent methods that receive a file"); } @Override public void updateContent(String indexId, String site, String id, Content content, Map<String, List<String>> additionalFields) throws SearchException { throw new UnsupportedOperationException("Only use updateContent methods that receive a file"); } public void commit() throws SearchException { commit(null); } @Override public void commit(String indexId) throws SearchException { if (StringUtils.isEmpty(indexId)) { indexId = defaultIndexId; } try { NamedList<Object> response = solrClient.commit(indexId).getResponse(); if (logger.isDebugEnabled()) { logger.debug(String.format("%sCommit successful: %s", getIndexPrefix(indexId), response)); } } catch (IOException e) { throw new SearchException(indexId, "I/O error while executing commit", e); } catch (Exception e) { throw new SearchException(indexId, "Error executing commit", e); } } protected SolrParams toActualSolrQuery(QueryParams queryParams) { return new ModifiableSolrParams(queryParams.getParams()); } @SuppressWarnings("unchecked") protected Map<String, Object> toMap(NamedList<Object> namedList) { Map<String, Object> map = new LinkedHashMap<String, Object>(); for (Map.Entry<String, Object> entry : namedList) { Object valueInMap = map.get(entry.getKey()); // If we have already a value in the map for the same key, that means the key has multiple values, // so group them in a list. if (valueInMap != null) { List<Object> group; if (valueInMap instanceof List) { group = (List<Object>) valueInMap; } else { group = new ArrayList<>(); group.add(valueInMap); map.put(entry.getKey(), group); } group.add(toSerializableValue(entry.getValue())); } else { map.put(entry.getKey(), toSerializableValue(entry.getValue())); } } return map; } @SuppressWarnings("unchecked") protected Object toSerializableValue(Object namedListValue) { if (namedListValue instanceof NamedList) { // The value can also be a NamedList, so convert it to map. return toMap((NamedList<Object>) namedListValue); } else if (namedListValue instanceof SolrDocumentList) { // If the value is a SolrDocumentList, copy the list attributes to a map SolrDocumentList docList = (SolrDocumentList) namedListValue; Map<String, Object> docListMap = new HashMap<String, Object>(4); docListMap.put(DOCUMENT_LIST_START_PROPERTY_NAME, docList.getStart()); docListMap.put(DOCUMENT_LIST_NUM_FOUND_PROPERTY_NAME, docList.getNumFound()); docListMap.put(DOCUMENT_LIST_MAX_SCORE_PROPERTY_NAME, docList.getMaxScore()); docListMap.put(DOCUMENT_LIST_DOCUMENTS_PROPERTY_NAME, extractDocs(docList)); return docListMap; } else { return namedListValue; } } protected Collection<Map<String, Object>> extractDocs(SolrDocumentList docList) { Collection<Map<String, Object>> docs = new ArrayList<>(docList.size()); for (SolrDocument doc : docList) { Map<String, Object> docMap = new LinkedHashMap<>(); for (Map.Entry<String, Object> field : doc) { String name = field.getKey(); Object value = field.getValue(); // If the value is an Iterable with a single value, return just that one value. This is done for backwards // compatibility since Solr 4 did this for us before if (value instanceof Iterable) { Iterator<?> iter = ((Iterable<?>) value).iterator(); if (iter.hasNext()) { Object first = iter.next(); if (!iter.hasNext()) { value = first; } } } docMap.put(name, value); } docs.add(docMap); } return docs; } protected String getSuccessfulMessage(String indexId, String idOrQuery, String operation, Object solrResponse) { return String.format("%s%s for %s successful: %s", getIndexPrefix(indexId), operation, idOrQuery, solrResponse); } protected String getIndexPrefix(String indexId) { return StringUtils.isNotEmpty(indexId) ? "[" + indexId + "] " : ""; } protected String getDeleteQuery(String id) { if (MapUtils.isNotEmpty(deleteQueryMappings)) { for (Map.Entry<String, String> mapping : deleteQueryMappings.entrySet()) { if (id.matches(mapping.getKey())) { return String.format(mapping.getValue(), id); } } } return null; } protected void addAdditionalFilterQueries(String indexId, SolrQuery solrQuery) { if (solrQuery.isDisableAdditionalFilters()) { if (logger.isDebugEnabled()) { logger.debug("{}Additional filters disabled for query {}", getIndexPrefix(indexId), solrQuery); } return; } String query = solrQuery.getQuery(); String[] filterQueries = solrQuery.getFilterQueries(); for (String additionalFilterQuery : additionalFilterQueries) { boolean add = true; if (StringUtils.isNotEmpty(query)) { if (query.contains(additionalFilterQuery)) { add = false; } } if (ArrayUtils.isNotEmpty(filterQueries)) { for (String filterQuery : filterQueries) { if (filterQuery.contains(additionalFilterQuery)) { add = false; break; } } } if (add) { solrQuery.addFilterQuery(additionalFilterQuery); } } } }