org.ambraproject.service.search.SolrSearchService.java Source code

Java tutorial

Introduction

Here is the source code for org.ambraproject.service.search.SolrSearchService.java

Source

/*
 * Copyright (c) 2006-2013 by Public Library of Science
 * http://plos.org
 * http://ambraproject.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.ambraproject.service.search;

import org.ambraproject.ApplicationException;
import org.ambraproject.service.cache.Cache;
import org.ambraproject.util.Pair;
import org.ambraproject.views.SearchHit;
import org.ambraproject.views.SearchResultSinglePage;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.regex.Pattern;

/**
 * Service to provide search capabilities for the application.
 *
 * @author Scott Sterling
 * @author Dragisa Krsmanovic
 * @author Joe Osowski
 */
public class SolrSearchService implements SearchService {
    private static final Logger log = LoggerFactory.getLogger(SolrSearchService.class);

    private static final int CACHE_TTL = 3600 * 24; // one day

    private SolrServerFactory serverFactory;
    private Cache cache;
    private int queryTimeout;

    private static final int MAX_FACET_SIZE = 100;
    private static final int MIN_FACET_COUNT = 1;

    // sort option possible values (sort direction is optional)
    // field desc|asc
    // sum(field1, field2) desc|asc
    // break up the option string on comma: ","
    private static final Pattern SORT_OPTION_PATTERN = Pattern.compile(",(?![^\\(\\)]*\\))");

    private Map validKeywords = null;
    private List pageSizes = null;

    //We have two collections here, as list supports ordering
    //And we want to keep the sorts in the order in which they are defined
    private List displaySorts = null;
    private Map validSorts = null;

    /**
     * Perform an "all the words" search (across most article fields)
     * <p/>
     * It uses <a href="http://wiki.apache.org/solr/DisMaxRequestHandler">DisMax Query Parser</a>.
     *
     * @param sParams The search parameters to use.
     * @return One "page" of articles which contain the terms in <code>queryString</code>
     * @throws ApplicationException Thrown by a failed query attempt
     */
    public SearchResultSinglePage simpleSearch(SearchParameters sParams) throws ApplicationException {
        sParams.setQuery(sParams.getQuery());

        log.debug("Simple Search performed on the String: " + sParams.getQuery());

        //We query SOLR three times.
        // 1 - The main 'big' query
        // 2 - Make the cross journals facet
        // 3 - (If applicable) the Keywords facet

        SolrQuery query = createQuery(sParams.getQuery(), sParams.getStartPage(), sParams.getPageSize(), true);

        //Notice: there is some code duplication here. note below
        SolrQuery journalFacetsQuery = createFacetsQuery(sParams.getQuery(), "cross_published_journal_key", true);
        SolrQuery articleTypeFacetsQuery = createFacetsQuery(sParams.getQuery(), "article_type_facet", true);

        //Set filters for the three queries,
        setFilters(query, sParams, false, false);

        //The journals query doesn't get the journal filter and the articles query doesn't get the articles filter
        //Notice: there is some code duplication here. note below
        setFilters(journalFacetsQuery, sParams, true, false);
        setFilters(articleTypeFacetsQuery, sParams, false, true);

        //Set the sort ordering for results, if applicable.
        setSort(query, sParams);

        //If the keywords parameter is specified, we need to change what field we're querying against
        //aka, body, conclusions, materials and methods ... etc ...
        if (sParams.getFilterKeyword().length() > 0) {
            String fieldkey = sParams.getFilterKeyword();

            if (!validKeywords.containsKey(fieldkey)) {
                throw new ApplicationException("Invalid filterKeyword value of " + fieldkey + " specified");
            }

            String fieldName = (String) validKeywords.get(fieldkey);

            //Set the field for dismax to use
            query.set("qf", fieldName);
            journalFacetsQuery.set("qf", fieldName);
            articleTypeFacetsQuery.set("qf", fieldName);
        }

        //Perform searches!
        SearchResultSinglePage results = search(query);

        QueryResponse journalFacetsResponse = getSOLRResponse(journalFacetsQuery);
        QueryResponse articleTypeFacetsResponse = getSOLRResponse(articleTypeFacetsQuery);
        FacetField journals = journalFacetsResponse.getFacetField("cross_published_journal_key");
        FacetField articleTypes = articleTypeFacetsResponse.getFacetField("article_type_facet");

        results.setJournalFacet(facetCountsToHashMap(journals));
        results.setArticleTypeFacet(facetCountsToHashMap(articleTypes));

        //Only execute the keyword search facet if the keyword wasn't specified
        if (sParams.getFilterKeyword().length() == 0) {
            SolrQuery keywordFacetQuery = createKeywordFacetQuery(sParams.getQuery());
            setFilters(keywordFacetQuery, sParams, false, false);
            FacetField keywords = facetSearch(keywordFacetQuery, "doc_partial_type");
            results.setKeywordFacet(facetCountsToHashMap(keywords));
        }

        return results;
    }

    /**
     * Execute a Solr search composed from the contents of the <code>SearchParameters.unformattedQuery</code> property.
     * The query is filtered by the journal and category fields also contained in the <code>searchParameters</code>
     * parameter.  No filter is created for date ranges, since that is assumed to be contained in
     * <code>SearchParameters.unformattedQuery</code>.
     *
     * @param searchParameters Contains all the parameters necessary to execute a search against the Solr query engine
     * @return A subset (determined by <code>SearchParameters.startPage</code> and <code>SearchParameters.pageSize</code>
     *         of the results of the Solr query generated from the contents of the <code>searchParameters</code>
     *         parameter
     * @throws ApplicationException Thrown during failed interactions with the Solr Server
     */
    public SearchResultSinglePage advancedSearch(SearchParameters searchParameters) throws ApplicationException {
        SearchParameters sp = cleanStrings(searchParameters); // Does not impact unformattedQuery field.
        if (log.isDebugEnabled()) {
            log.debug("Solr Search performed on the unformattedSearch String: "
                    + searchParameters.getUnformattedQuery().trim());
        }

        SolrQuery query = createQuery(null, sp.getStartPage(), sp.getPageSize(), false);
        query.setQuery(searchParameters.getUnformattedQuery().trim());

        SolrQuery journalFacetsQuery = createFacetsQuery(query.getQuery(), "cross_published_journal_key", false);
        SolrQuery articleTypeFacetsQuery = createFacetsQuery(query.getQuery(), "article_type_facet", false);

        setFilters(query, sp, false, false);

        //The journals query doesn't get the journal filter and the articles query doesn't get the articles filter
        //Notice: there is some code duplication here. note above
        setFilters(journalFacetsQuery, sp, true, false);
        setFilters(articleTypeFacetsQuery, sp, false, true);

        setSort(query, sp);

        QueryResponse journalFacetsResponse = getSOLRResponse(journalFacetsQuery);
        QueryResponse articleTypeFacetsResponse = getSOLRResponse(articleTypeFacetsQuery);

        //Notice: there is some code duplication here. note above
        FacetField journals = journalFacetsResponse.getFacetField("cross_published_journal_key");
        FacetField articleTypes = articleTypeFacetsResponse.getFacetField("article_type_facet");

        SearchResultSinglePage results = search(query.setQuery(searchParameters.getUnformattedQuery().trim()));

        results.setJournalFacet(facetCountsToHashMap(journals));
        results.setArticleTypeFacet(facetCountsToHashMap(articleTypes));

        return results;
    }

    /**
     * @inheritDoc
     */
    @Override
    public SearchHit getMostSharedForJournalCategory(String journal, String subjectArea)
            throws ApplicationException {
        SearchParameters sp = new SearchParameters();

        sp.setFilterSubjects(new String[] { subjectArea });
        sp.setFilterJournals(new String[] { journal });
        //We only need one record
        sp.setPageSize(1);
        sp.setStartPage(0);

        //Only search for articles with shares
        //We might turn this info a filter query for a small performance boost
        sp.setUnformattedQuery("alm_twitterCount:[1 TO *] OR alm_facebookCount:[1 TO *]");
        sp.setSortValue("sum(alm_twitterCount, alm_facebookCount) desc");

        SearchResultSinglePage results = advancedSearch(sp);
        if (results.getHits().size() > 0) {
            return results.getHits().get(0);
        } else {
            return null;
        }
    }

    /**
     * @inheritDoc
     */
    @Override
    public SearchHit getMostViewedForJournalCategory(String journal, String subjectArea)
            throws ApplicationException {
        SearchParameters sp = new SearchParameters();

        sp.setFilterSubjects(new String[] { subjectArea });
        sp.setFilterJournals(new String[] { journal });
        //We only need one record
        sp.setPageSize(1);
        sp.setStartPage(0);

        //Only search for articles with shares
        //We might turn this info a filter query for a small performance boost
        sp.setUnformattedQuery("counter_total_month:[1 TO *]");
        sp.setSortValue("counter_total_month desc");

        SearchResultSinglePage results = advancedSearch(sp);
        if (results.getHits().size() > 0) {
            return results.getHits().get(0);
        } else {
            return null;
        }
    }

    /**
     * @inheritDoc
     */
    @Override
    public SearchHit getMostViewedAllTimeForJournalCategory(String journal, String subjectArea)
            throws ApplicationException {
        SearchParameters sp = new SearchParameters();

        sp.setFilterSubjects(new String[] { subjectArea });
        sp.setFilterJournals(new String[] { journal });
        //We only need one record
        sp.setPageSize(1);
        sp.setStartPage(0);

        //Only search for articles with shares
        //We might turn this info a filter query for a small performance boost
        sp.setUnformattedQuery("counter_total_all:[1 TO *]");
        sp.setSortValue("counter_total_all desc");

        SearchResultSinglePage results = advancedSearch(sp);
        if (results.getHits().size() > 0) {
            return results.getHits().get(0);
        } else {
            return null;
        }
    }

    /**
     * Populate facets of the search object.
     * <p/>
     * If no search results and hence facets are found remove defined filters and try the search again.  Journals will
     * always be the complete list.
     *
     * @param searchParameters The search parameters
     * @return a populared SearchResultSinglePage object
     * @throws ApplicationException
     */
    public SearchResultSinglePage getFilterData(SearchParameters searchParameters) throws ApplicationException {
        //TODO: This function queries SOLR for the journal and article type list
        //We should migrate this away from config and into a database when it is
        //available

        //Does not impact unformattedQuery field.
        SearchParameters sp = cleanStrings(searchParameters);

        String q = searchParameters.getUnformattedQuery().trim();

        //In this use case, if the query string is empty, we want to get facets for everything
        if (q.length() == 0) {
            q = "*:*";
        }

        if (log.isDebugEnabled()) {
            log.debug("Solr Search performed to get facet data on the unformattedSearch String: " + q);
        }

        //We want a complete set of facet data.  So first, lets get it all
        SolrQuery query = createQuery("*:*", 0, 0, false);

        //Remove facets we don't use in this case
        query.removeFacetField("author_facet");
        query.removeFacetField("editor_facet");
        query.removeFacetField("affiliate_facet");
        //Add the one we do want in this case.
        query.addFacetField("cross_published_journal_key");
        query.addFacetField("article_type");
        query.setFacetLimit(MAX_FACET_SIZE);

        //Related to JO: http://joborder.plos.org/view.php?id=17480
        //(for now) we don't want to search on Issue Images
        query.addFilterQuery(createFilterNoIssueImageDocuments());

        SearchResultSinglePage preFilterResults = search(query);

        setFilters(query, sp, false, false);

        query.setQuery(q);

        SearchResultSinglePage results = null;
        try {
            results = search(query);
        } catch (SolrException e) {
            query.setQuery("*:*");
            if (log.isWarnEnabled()) {
                log.warn("Solr Search failed on the unformattedSearch String: { " + query.getQuery()
                        + " } so the query will be re-run using the String *:* to populate the Filters"
                        + " on the Advanced Search page.", e);
            }
        }

        if (results == null || results.getTotalNoOfResults() == 0) {
            //If no results, remove optional filters and try again
            for (String filter : query.getFilterQueries()) {
                if (filter.indexOf(createFilterFullDocuments()) < 0) {
                    query.removeFilterQuery(filter);
                }
            }

            results = search(query);

            //If results are STILL empty.  We must return something for subjects and article type.
            //So let's use the global list
            if (results.getTotalNoOfResults() == 0) {
                results.setSubjectFacet(preFilterResults.getSubjectFacet());
                results.setArticleTypeFacet(preFilterResults.getArticleTypeFacet());
            }

            results.setFiltersReset(true);
        }

        //Lets always return ALL values for journals
        //These lists will not be dependant on the user's other
        //selections other then the query
        //However, subjects and article type will be!
        results.setJournalFacet(preFilterResults.getJournalFacet());
        results.setArticleTypeFacet(preFilterResults.getArticleTypeFacet());

        return results;
    }

    /**
     * @inheritDoc
     */
    @Override
    public List<String> getAllSubjects(String journal) throws ApplicationException {
        QueryResponse queryResponse = executeSubjectFacetSearch("subject_hierarchy", journal);
        FacetField facet = queryResponse.getFacetField("subject_hierarchy");
        List<String> results = new ArrayList<String>(facet.getValues().size());
        for (FacetField.Count count : facet.getValues()) {
            results.add(count.getName());
        }
        return results;
    }

    /**
     * @inheritDoc
     */
    @Override
    public SubjectCounts getAllSubjectCounts(String journal) throws ApplicationException {
        QueryResponse queryResponse = executeSubjectFacetSearch("subject_facet", journal);
        FacetField facet = queryResponse.getFacetField("subject_facet");
        SubjectCounts results = new SubjectCounts();
        for (FacetField.Count count : facet.getValues()) {
            results.subjectCounts.put(count.getName(), count.getCount());
        }
        results.totalArticles = queryResponse.getResults().getNumFound();
        return results;
    }

    /**
     * Executes a search where results are grouped by one of the subject facets in the solr schema.
     *
     * @param facetName the subject facet of interest.  Depending on the application, this should be
     *     either "subject_facet" or "subject_hierarchy".  The first does not include the entire taxonomy
     *     path, while the second does.
     * @param journal journal of interest
     * @return solr server response
     * @throws ApplicationException
     */
    private QueryResponse executeSubjectFacetSearch(String facetName, String journal) throws ApplicationException {
        SolrQuery query = createQuery("*:*", 0, 0, false);

        // We don't care about results, just facet counts.
        query.setRows(0);

        // We only care about full documents
        query.addFilterQuery(createFilterFullDocuments());
        query.addFilterQuery(createFilterNoIssueImageDocuments());

        // Remove facets we don't use in this case.
        query.removeFacetField("author_facet");
        query.removeFacetField("editor_facet");
        query.removeFacetField("affiliate_facet");
        query.removeFacetField("subject_facet");
        query.removeFacetField("subject_hierarchy");

        // Add the one we do want.
        query.addFacetField(facetName);

        if (journal != null && journal.length() > 0) {
            query.addFilterQuery("cross_published_journal_key:" + journal);
        }

        query.setFacetLimit(-1); // unlimited
        return getSOLRResponse(query);
    }

    /**
     * @enheritDoc
     */
    public SortedMap<String, Long> getTopSubjects() throws ApplicationException {
        if (cache == null) {
            return getTopSubjectsFromSOLR();
        } else {
            String key = "topLevelCategoriesCacheKey".intern();
            return cache.get(key, CACHE_TTL,
                    new Cache.SynchronizedLookup<SortedMap<String, Long>, ApplicationException>(key) {
                        @Override
                        public SortedMap<String, Long> lookup() throws ApplicationException {
                            return getTopSubjectsFromSOLR();
                        }
                    });
        }
    }

    private SortedMap<String, Long> getTopSubjectsFromSOLR() throws ApplicationException {
        SolrQuery query = createQuery("*:*", 0, 0, false);

        // We don't care about results, just facet counts.
        query.setRows(0);

        // We only care about full documents
        query.addFilterQuery(createFilterFullDocuments());

        // Remove facets we don't use in this case.
        query.removeFacetField("author_facet");
        query.removeFacetField("editor_facet");
        query.removeFacetField("affiliate_facet");
        query.removeFacetField("subject_facet");

        // Add the one we do want.
        query.addFacetField("subject_level_1");
        query.setFacetLimit(-1); // unlimited

        QueryResponse queryResponse = getSOLRResponse(query);
        FacetField facet = queryResponse.getFacetField("subject_level_1");

        SortedMap<String, Long> results = new TreeMap<String, Long>();

        //If there is no facet.  Should never happen outside a unit test
        if (facet.getValues() == null) {
            log.warn("No subject_level_1 facet");
        } else {
            for (FacetField.Count count : facet.getValues()) {
                results.put(count.getName(), count.getCount());
            }
        }

        return results;
    }

    /**
     * Add a <i>sort</i> (on a single field) clause to the <code>query</code> parameter.  If the
     * <code>SearchParameters.sort</code> variable contains a single value (no white space), then that value is assumed to
     * be a field name.  If the <code>SearchParameters.sort</code> variable contains two values (separated by whitespace),
     * then the first is assumed to be a field name and the second is assumed to be a <i>sort direction</i>, one of
     * <strong>desc</strong> or <strong>asc</strong>.
     * <p/>
     * If there is only one value in the <code>SearchParameters.sort</code> variable or if the second value is not
     * (non-case-sensitive) <strong>asc</strong>, then the <i>sort direction</i> defaults to <strong>desc</strong>.
     *
     * @param query The SolrQuery which will have a <i>sort</i> clause attached
     * @param sp    The SearchParameters DTO which contains the <code>sort</code> field used by this method
     */
    private void setSort(SolrQuery query, SearchParameters sp) throws ApplicationException {
        if (log.isDebugEnabled()) {
            log.debug("SearchParameters.sort = " + sp.getSortKey());
        }

        if (sp.getSortKey().length() > 0 || (sp.getSortValue() != null && sp.getSortValue().length() > 0)) {
            String sortKey = sp.getSortKey();
            String sortValue = (String) validSorts.get(sortKey);

            //This bit allows a consumer of the method to explicitly set the sort instead of specifying it by key
            if (sp.getSortValue() != null && sp.getSortValue().length() > 0) {
                sortValue = sp.getSortValue();
            } else {
                if (sortValue == null) {
                    throw new ApplicationException("Invalid sort key of '" + sp.getSortKey() + "' specified.");
                }
            }

            String[] sortOptions = SORT_OPTION_PATTERN.split(sortValue);
            for (String sortOption : sortOptions) {
                sortOption = sortOption.trim();
                int index = sortOption.lastIndexOf(" ");

                String fieldName = sortOption;
                String sortDirection = null;

                if (index != -1) {
                    fieldName = sortOption.substring(0, index);
                    sortDirection = sortOption.substring(index + 1).trim();
                }

                if (sortDirection == null || !sortDirection.toLowerCase().equals("asc")) {
                    query.addSortField(fieldName, SolrQuery.ORDER.desc);
                } else {
                    query.addSortField(fieldName, SolrQuery.ORDER.asc);
                }
            }
        }

        if (query.getSortField() == null || query.getSortField().length() == 0) {
            //Always default to score if it's not defined
            query.addSortField("score", SolrQuery.ORDER.desc);
            //If two articles are ranked the same, give the one with a more recent publish date a bump
            query.addSortField("publication_date", SolrQuery.ORDER.desc);
            //If everything else is equal, order by id
            query.addSortField("id", SolrQuery.ORDER.desc);
        }
    }

    /**
     * Execute a Solr search composed from the contents of the <i>Find An Article</i> search block including the
     * properties: <code>volume</code>, <code>eNumber</code>, and/or <code>id</code> (DOI).
     * <p/>
     * The query is filtered by the <code>SearchParameters.filterJournals</code> property also contained in the
     * <code>searchParameters</code> parameter.
     * <p/>
     * No filter is created for date ranges or subject categories.
     *
     * @param searchParameters Contains all the parameters necessary to execute a search against the Solr query engine
     * @return A subset (determined by <code>SearchParameters.startPage</code> and <code>SearchParameters.pageSize</code>
     *         of the results of the Solr query generated from the contents of the <code>searchParameters</code>
     *         parameter
     * @throws ApplicationException Thrown during failed interactions with the Solr Server
     */
    public SearchResultSinglePage findAnArticleSearch(SearchParameters searchParameters)
            throws ApplicationException {
        SearchParameters sp = cleanStrings(searchParameters); // Does not impact unformattedQuery field.
        if (log.isDebugEnabled()) {
            log.debug("Solr Search performed on the following selection of the SearchParameters properties: "
                    + "{ filterJournals="
                    + (sp.getFilterJournals() == null ? null : Arrays.asList(sp.getFilterJournals()))
                    + "\', volume = " + sp.getVolume() + "\', eLocationId = " + sp.getELocationId() + "\', id = "
                    + sp.getId() + "\' }");
        }

        // We should always have exactly one journal.
        if (sp.getFilterJournals().length != 1) {
            throw new ApplicationException("Please select exactly one journal.");
        }

        SolrQuery query = createQuery(null, sp.getStartPage(), sp.getPageSize(), false);

        // If ID exists, then search on that first, ignoring all the other fields.
        if (sp.getId().length() > 0) {
            query.setQuery("id:\"" + sp.getId() + "\"");
            return search(query);
            //if (resultsFromId.getTotalNoOfResults() > 0) {
            //  return resultsFromId;
            //}
        }

        // If no ID or if ID search gives no results,
        // then attempt a query based on the other submitted fields, if those fields exist

        int volume = 0;
        try {
            volume = Integer.parseInt(sp.getVolume());
        } catch (Exception e) {
            if (log.isDebugEnabled()) {
                log.debug("Unable to create an integer from the String volume = " + sp.getVolume());
            }
        }

        StringBuilder q = new StringBuilder(); // The Query which will be submitted to Solr.

        if (volume > 0) {
            q.append(" volume:").append(volume);
        }
        if (sp.getELocationId().length() > 0) {
            if (q.length() > 0) {
                q.append(" AND ");
            }
            q.append(" elocation_id:").append(sp.getELocationId());
        }

        if (log.isDebugEnabled()) {
            log.debug("findAnArticleSearch: query = " + q.toString());
        }

        query.setQuery(q.toString());

        // Form field description: "Journals".  Query Filter.
        query.addFilterQuery(createFilterLimitForJournals(sp.getFilterJournals()));

        return search(query);
    }

    public void setConfiguration(Configuration config) throws ApplicationException {
        queryTimeout = config.getInt("ambra.services.search.timeout", 60000); // default to 1 min

        List sizes = config.getList("ambra.services.search.pageSizes.size");

        if (sizes == null) {
            throw new ApplicationException("ambra.services.search.pageSizes not defined " + "in configuration.");
        }

        pageSizes = sizes;

        if (config.containsKey("ambra.services.search.sortOptions.option")) {
            validSorts = new HashMap();
            displaySorts = new ArrayList();

            HierarchicalConfiguration hc = (HierarchicalConfiguration) config;
            List<HierarchicalConfiguration> sorts = hc.configurationsAt("ambra.services.search.sortOptions.option");

            for (HierarchicalConfiguration s : sorts) {
                String key = s.getString("[@displayName]");
                String value = s.getString("");
                validSorts.put(key, value);
                displaySorts.add(key);
            }

            ((HierarchicalConfiguration) config).setExpressionEngine(null);
        } else {
            throw new ApplicationException(
                    "ambra.services.search.sortOptions.option not defined " + "in configuration.");
        }

        if (config.containsKey("ambra.services.search.keywordFields.field")) {
            validKeywords = new HashMap();
            HierarchicalConfiguration hc = (HierarchicalConfiguration) config;
            List<HierarchicalConfiguration> sorts = hc
                    .configurationsAt("ambra.services.search.keywordFields.field");

            for (HierarchicalConfiguration s : sorts) {
                String key = s.getString("[@displayName]");
                String value = s.getString("");
                validKeywords.put(key, value);

            }
        } else {
            throw new ApplicationException(
                    "ambra.services.search.keywordFields.field not defined " + "in configuration.");
        }
    }

    public void setServerFactory(SolrServerFactory serverFactory) {
        this.serverFactory = serverFactory;
    }

    private void setFilters(SolrQuery query, SearchParameters sp, boolean ignoreJournals,
            boolean ignoreArticleTypes) {
        //Related to JO: http://joborder.plos.org/view.php?id=17480
        //(for now) we don't want to search on Issue Images
        query.addFilterQuery(createFilterNoIssueImageDocuments());

        // Form field description: "Journals".  Query Filter.
        if (!ignoreJournals) {
            if (sp.getFilterJournals() != null && sp.getFilterJournals().length > 0) {
                query.addFilterQuery(createFilterLimitForJournals(sp.getFilterJournals()));
            }
        }

        if (!ignoreArticleTypes) {
            // Form field description: "Article Types".  Query Filter.
            if (sp.getFilterArticleTypes() != null && sp.getFilterArticleTypes().length > 0) {
                query.addFilterQuery(createFilterLimitForArticleTypes(sp.getFilterArticleTypes()));
            }
        }

        // Form field description: "Subject Categories".  Query Filter.
        if (sp.getFilterSubjects() != null && sp.getFilterSubjects().length > 0) {
            query.addFilterQuery(createFilterLimitForSubject(sp.getFilterSubjects()));
        }

        // Not used in form, but in savedSearch alerts
        if (sp.getFilterSubjectsDisjunction() != null && sp.getFilterSubjectsDisjunction().length > 0) {
            query.addFilterQuery(createFilterLimitForSubjectDisjunction(sp.getFilterSubjectsDisjunction()));
        }

        // Form field description: "Authors".  Query Filter.
        if (sp.getFilterAuthors() != null && sp.getFilterAuthors().length > 0) {
            query.addFilterQuery(createFilterLimitForAuthor(sp.getFilterAuthors()));
        }

        if (sp.getFilterStartDate() != null && sp.getFilterEndDate() != null) {
            query.addFilterQuery(createFilterLimitForPublishDate(sp.getFilterStartDate(), sp.getFilterEndDate()));
        }
    }

    private String createFilterLimitForPublishDate(Date startDate, Date endDate) {
        StringBuilder fq = new StringBuilder();

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
        String sDate = sdf.format(startDate) + "T00:00:00Z";
        String eDate = sdf.format(endDate) + "T00:00:00Z";

        fq.append("publication_date:[" + sDate + " TO " + eDate + "]");

        return fq.toString();
    }

    private String createFilterLimitForJournals(String[] journals) {
        Arrays.sort(journals); // Consistent order so that each filter will only be cached once.
        StringBuilder fq = new StringBuilder();
        for (String journal : journals) {
            fq.append("cross_published_journal_key:").append(journal).append(" OR ");
        }
        return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
    }

    private String createFilterLimitForAuthor(String[] authors) {
        Arrays.sort(authors); // Consistent order so that each filter will only be cached once.
        StringBuilder fq = new StringBuilder();
        for (String author : authors) {
            fq.append("author:\"").append(author).append("\" AND ");
        }
        return fq.replace(fq.length() - 5, fq.length(), "").toString(); // Remove last " AND".
    }

    private String createFilterLimitForSubject(String[] subjects) {
        Arrays.sort(subjects); // Consistent order so that each filter will only be cached once.
        StringBuilder fq = new StringBuilder();
        for (String category : subjects) {
            fq.append("subject:\"").append(category).append("\" AND ");
        }
        return fq.replace(fq.length() - 5, fq.length(), "").toString(); // Remove last " AND".
    }

    private String createFilterLimitForSubjectDisjunction(String[] subjects) {
        Arrays.sort(subjects); // Consistent order so that each filter will only be cached once.
        StringBuilder fq = new StringBuilder();
        for (String category : subjects) {
            fq.append("subject:\"").append(category).append("\" OR ");
        }
        return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
    }

    private String createFilterLimitForArticleTypes(String[] articleTypes) {
        Arrays.sort(articleTypes); // Consistent order so that each filter will only be cached once.
        StringBuilder fq = new StringBuilder();
        for (String articleType : articleTypes) {
            fq.append("article_type:\"").append(articleType).append("\" OR ");
        }
        return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
    }

    /**
     * Filter that limits results to only the complete documents, excluding partial documents.
     *
     * @return A filter that excludes partial documents
     */
    private String createFilterFullDocuments() {
        return "doc_type:full";
    }

    private String createFilterPartialDocuments() {
        return "doc_type:partial";
    }

    private String createFilterNoIssueImageDocuments() {
        return "!article_type_facet:\"Issue Image\"";
    }

    private QueryResponse getSOLRResponse(SolrQuery query) throws ApplicationException {

        if (serverFactory.getServer() == null) {
            throw new ApplicationException("Search server is not configured");
        }

        QueryResponse queryResponse;
        try {
            log.info("SOLR Query: " + query.toString());
            queryResponse = serverFactory.getServer().query(query);
            log.info("SOLR Query response time(milliseconds): " + queryResponse.getElapsedTime());
        } catch (SolrServerException e) {
            throw new ApplicationException("Unable to execute a query on the Solr Server.", e);
        }

        return queryResponse;
    }

    private SearchResultSinglePage search(SolrQuery query) throws ApplicationException {
        QueryResponse queryResponse = getSOLRResponse(query);

        return readQueryResults(queryResponse, query);
    }

    private FacetField facetSearch(SolrQuery query, String name) throws ApplicationException {
        QueryResponse queryResponse = getSOLRResponse(query);

        FacetField facet = queryResponse.getFacetField(name);

        if (facet == null) {
            throw new ApplicationException("No facet found with name of:" + name);
        }

        return facet;
    }

    private List<Map> facetCountsToHashMap(FacetField field) {
        List<FacetField.Count> counts = field.getValues();
        ArrayList<Map> result = new ArrayList<Map>();

        if (counts != null) {
            for (FacetField.Count count : counts) {
                HashMap<String, Object> hm = new HashMap<String, Object>();
                hm.put("name", count.getName());
                hm.put("count", count.getCount());
                result.add(hm);
            }
            return result;
        } else {
            return null;
        }
    }

    private SolrQuery createQuery(String queryString, int startPage, int pageSize, boolean useDismax) {
        SolrQuery query = new SolrQuery(queryString);
        query.setTimeAllowed(queryTimeout);
        query.setIncludeScore(true); // The relevance (of each results element) to the search terms.
        query.setHighlight(false);

        if (useDismax) {
            query.set("defType", "dismax");
        }

        //TODO: Put The "options" from the "queryField" picklist into a config file.
        //This list matches the "options" from the "queryField" picklist on unformattedSearch.ftl,
        //without the "date" fields.

        query.setStart(startPage * pageSize); // Which results element to return first in this batch.
        query.setRows(pageSize); // The number of results elements to return.
        // request only fields that we need to display
        query.setFields("id", "score", "title_display", "publication_date", "eissn", "journal", "article_type",
                "author_display", "abstract", "abstract_primary_display", "striking_image", "figure_table_caption",
                "subject", "expression_of_concern", "retraction");
        query.addFacetField("subject_facet");
        query.addFacetField("author_facet");
        query.addFacetField("editor_facet");
        query.addFacetField("article_type_facet");
        query.addFacetField("affiliate_facet");
        query.set("facet.method", "fc");
        query.setFacetLimit(MAX_FACET_SIZE);
        query.setFacetMinCount(MIN_FACET_COUNT);
        // Add a filter to ensure that Solr never returns partial documents
        query.addFilterQuery(createFilterFullDocuments());

        return query;
    }

    private SolrQuery createFacetsQuery(String queryString, String field, boolean useDismax) {
        SolrQuery query = new SolrQuery(queryString);
        query.setTimeAllowed(queryTimeout);
        query.setIncludeScore(false);
        query.setHighlight(false);
        query.setRows(0);
        query.setFacetLimit(MAX_FACET_SIZE);
        query.setFacetMinCount(MIN_FACET_COUNT);

        if (useDismax) {
            query.set("defType", "dismax");
        }

        query.addFacetField(field);
        // Add a filter to ensure that Solr never returns partial documents
        query.addFilterQuery(createFilterFullDocuments());

        return query;
    }

    private SolrQuery createKeywordFacetQuery(String queryString) {
        SolrQuery query = new SolrQuery();
        query.setTimeAllowed(queryTimeout);
        query.setIncludeScore(false);
        query.setHighlight(false);
        query.setRows(0);
        query.set("defType", "dismax");
        query.set("qf", "doc_partial_body");
        query.addFacetField("doc_partial_type");
        query.setFacetLimit(MAX_FACET_SIZE);
        query.setFacetMinCount(MIN_FACET_COUNT);
        // Add a filter to ensure that Solr never returns partial documents
        query.addFilterQuery(createFilterPartialDocuments());
        query.setQuery(queryString);

        return query;
    }

    @SuppressWarnings("unchecked")
    private SearchResultSinglePage readQueryResults(QueryResponse queryResponse, SolrQuery query) {
        SolrDocumentList documentList = queryResponse.getResults();

        if (log.isInfoEnabled()) {
            StringBuilder filterQueriesForLog = new StringBuilder();
            if (query.getFilterQueries() != null && query.getFilterQueries().length > 0) {
                for (String filterQuery : query.getFilterQueries()) {
                    filterQueriesForLog.append(filterQuery).append(" , ");
                }
                if (filterQueriesForLog.length() > 3) {
                    filterQueriesForLog.replace(filterQueriesForLog.length() - 3, filterQueriesForLog.length(), "");
                } else {
                    filterQueriesForLog.append("No Filter Queries");
                }
            }

            log.info("query.getQuery():{ " + query.getQuery() + " }" + ", query.getSortFields():{ "
                    + (query.getSortFields() == null ? null : Arrays.asList(query.getSortFields())) + " }"
                    + ", query.getFilterQueries():{ " + filterQueriesForLog.toString() + " }" + ", found:"
                    + documentList.getNumFound() + ", start:" + documentList.getStart() + ", max_score:"
                    + documentList.getMaxScore() + ", QTime:" + queryResponse.getQTime() + "ms");

            // TODO: implement spell-checking in a meaningful manner.  This loop exists only to generate log output.
            // TODO: Add "spellcheckAlternatives" or something like it to the SearchHits class so it can be displayed to the user like Google's "did you mean..."
            // TODO: Turn off spellchecking for the "author" field.
            if (queryResponse.getSpellCheckResponse() != null
                    && queryResponse.getSpellCheckResponse().getSuggestionMap() != null
                    && queryResponse.getSpellCheckResponse().getSuggestionMap().keySet().size() > 0) {
                StringBuilder sb = new StringBuilder("Spellcheck alternative suggestions:");
                for (String token : queryResponse.getSpellCheckResponse().getSuggestionMap().keySet()) {
                    sb.append(" { ").append(token).append(" : ");
                    if (queryResponse.getSpellCheckResponse().getSuggestionMap().get(token).getAlternatives()
                            .size() < 1) {
                        sb.append("NO ALTERNATIVES");
                    } else {
                        for (String alternative : queryResponse.getSpellCheckResponse().getSuggestionMap()
                                .get(token).getAlternatives()) {
                            sb.append(alternative).append(", ");
                        }
                        sb.replace(sb.length() - 2, sb.length(), ""); // Remove last comma and space.
                    }
                    sb.append(" } ,");
                }
                log.info(sb.replace(sb.length() - 2, sb.length(), "").toString()); // Remove last comma and space.
            } else {
                log.info("Solr thinks everything in the query is spelled correctly.");
            }
        }

        List<SearchHit> searchResults = new ArrayList<SearchHit>();
        for (SolrDocument document : documentList) {

            String id = SolrServiceUtil.getFieldValue(document, "id", String.class, query.toString());
            String message = id == null ? query.toString() : id;
            Float score = SolrServiceUtil.getFieldValue(document, "score", Float.class, message);
            String title = SolrServiceUtil.getFieldValue(document, "title_display", String.class, message);
            Date publicationDate = SolrServiceUtil.getFieldValue(document, "publication_date", Date.class, message);
            String eissn = SolrServiceUtil.getFieldValue(document, "eissn", String.class, message);
            String journal = SolrServiceUtil.getFieldValue(document, "journal", String.class, message);
            String articleType = SolrServiceUtil.getFieldValue(document, "article_type", String.class, message);
            String strikingImage = SolrServiceUtil.getFieldValue(document, "striking_image", String.class, message);
            List<String> abstractText = SolrServiceUtil.getFieldMultiValue(document, "abstract", String.class,
                    message);
            List<String> abstractPrimary = SolrServiceUtil.getFieldMultiValue(document, "abstract_primary_display",
                    String.class, message);
            List<String> authorList = SolrServiceUtil.getFieldMultiValue(document, "author_display", String.class,
                    message);
            // TODO create a dedicated field for checking the existence of assets for a given article.
            List<String> figureTableCaptions = SolrServiceUtil.getFieldMultiValue(document, "figure_table_caption",
                    String.class, message);
            List<String> subjects = SolrServiceUtil.getFieldMultiValue(document, "subject", String.class, message);
            List<String> expressionOfconcern = SolrServiceUtil.getFieldMultiValue(document, "expression_of_concern",
                    String.class, message);
            String retraction = SolrServiceUtil.getFieldValue(document, "retraction", String.class, message);
            String abstractResult = "";

            //Use the primary abstract if it exists
            if (abstractPrimary.size() > 0) {
                abstractResult = StringUtils.join(abstractPrimary, ", ");
            } else {
                if (abstractText.size() > 0) {
                    abstractResult = StringUtils.join(abstractText, ", ");
                }
            }

            //Flatten the list of subjects to a unique set
            Set<String> flattenedSubjects = new HashSet<String>();
            for (String subject : subjects) {
                for (String temp : subject.split("/")) {
                    if (temp.trim().length() > 0) {
                        flattenedSubjects.add(temp);
                    }
                }
            }

            SearchHit hit = SearchHit.builder().setHitScore(score).setUri(id).setTitle(title)
                    .setListOfCreators(authorList).setDate(publicationDate).setIssn(eissn).setJournalTitle(journal)
                    .setArticleTypeForDisplay(articleType).setAbstractText(abstractResult)
                    .setStrikingImage(strikingImage).setHasAssets(figureTableCaptions.size() > 0)
                    .setSubjects(flattenedSubjects).setSubjectsPolyhierarchy(subjects)
                    .setExpressionOfConcern(expressionOfconcern).setRetraction(retraction).build();

            if (log.isDebugEnabled())
                log.debug(hit.toString());

            searchResults.add(hit);
        }

        //here we assume that number of hits is always going to be withing range of int
        SearchResultSinglePage results = new SearchResultSinglePage((int) documentList.getNumFound(), -1,
                searchResults, query.getQuery());

        if (queryResponse.getFacetField("subject_facet") != null) {
            List<Map> subjects = facetCountsToHashMap(queryResponse.getFacetField("subject_facet"));

            if (subjects != null) {
                List<Map> subjectResult = new ArrayList<Map>();
                SortedMap<String, Long> topSubjects = null;

                try {
                    topSubjects = getTopSubjects();
                } catch (ApplicationException ex) {
                    throw new RuntimeException(ex.getMessage(), ex);
                }

                //Remove top level 1 subjects from list, FEND-805
                for (Map<String, Object> m : subjects) {
                    if (!topSubjects.containsKey(m.get("name"))) {
                        HashMap<String, Object> hm = new HashMap<String, Object>();
                        hm.put("name", m.get("name"));
                        hm.put("count", m.get("count"));
                        subjectResult.add(hm);
                    }
                }

                results.setSubjectFacet(subjectResult);
            } else {
                results.setSubjectFacet(null);
            }
        }

        if (queryResponse.getFacetField("author_facet") != null) {
            results.setAuthorFacet(facetCountsToHashMap(queryResponse.getFacetField("author_facet")));
        }

        if (queryResponse.getFacetField("editor_facet") != null) {
            results.setEditorFacet(facetCountsToHashMap(queryResponse.getFacetField("editor_facet")));
        }

        if (queryResponse.getFacetField("article_type_facet") != null) {
            results.setArticleTypeFacet(facetCountsToHashMap(queryResponse.getFacetField("article_type_facet")));
        }

        if (queryResponse.getFacetField("affiliate_facet") != null) {
            results.setInstitutionFacet(facetCountsToHashMap(queryResponse.getFacetField("affiliate_facet")));
        }

        if (queryResponse.getFacetField("cross_published_journal_key") != null) {
            results.setJournalFacet(
                    facetCountsToHashMap(queryResponse.getFacetField("cross_published_journal_key")));
        }

        return results;
    }

    /**
     * @inheritDoc
     */
    public List savedSearchAlerts(SearchParameters sParams, Date lastSearchTime, Date currentSearchTime,
            int resultLimit) throws ApplicationException {
        SolrQuery query = null;
        SearchParameters sp = null;

        if (sParams.getUnformattedQuery() == null || sParams.getUnformattedQuery().equals("")) {
            if (log.isDebugEnabled()) {
                log.debug("Simple Saved Search performed on the unformattedSearch String: "
                        + sParams.getQuery().trim());
            }

            query = createQuery(sParams.getQuery(), 0, resultLimit, false);
            query.setQuery(sParams.getQuery());
            //If the keywords parameter is specified, we need to change what field we're querying against
            //aka, body, conclusions, materials and methods ... etc ...
            if (sParams.getFilterKeyword().length() > 0) {
                String fieldkey = sParams.getFilterKeyword();

                if (!validKeywords.containsKey(fieldkey)) {
                    throw new ApplicationException("Invalid filterKeyword value of " + fieldkey + " specified");
                }

                String fieldName = (String) validKeywords.get(fieldkey);

                //Set the field for dismax to use
                query.set("qf", fieldName);
            }
            setFilters(query, sParams, false, false);

        } else {

            log.debug("Advanced Saved Search performed on the unformattedSearch String: {}",
                    sParams.getUnformattedQuery().trim());
            sp = cleanStrings(sParams);
            query = createQuery(null, 0, resultLimit, false);
            query.setQuery(sParams.getUnformattedQuery());
            setFilters(query, sp, false, false);
        }

        query.addFilterQuery(createFilterLimitForPublishDate(lastSearchTime, currentSearchTime));

        SearchResultSinglePage results = search(query);

        return results.getHits();
    }

    /**
     * Remove dangerous and unwanted values from the Strings in selected fields in the SearchParameters parameter.
     * <p/>
     * Note that <code>SearchParameters.unformattedQuery</code> is excluded from this list, for the reason implied by its
     * name.
     *
     * @param searchParameters A SearchParameters object the needs to have some of its fields "cleaned"
     * @return The SearchParameters parameter with some of its fields "cleaned"
     */
    private SearchParameters cleanStrings(SearchParameters searchParameters) {
        SearchParameters sp = searchParameters.copy();
        sp.setQuery(cleanString(searchParameters.getQuery()));
        return sp;
    }

    /**
     * Change all input to lower case and, in front of each character that Solr recognizes as an operator, place a
     * backslash (i.e., \) so that these characters are "escaped" such that they may be used as normal characters in
     * searches.
     * <p/>
     * Since Solr uses upper case to define the operators <code>AND</code>,  <code>OR</code>, <code>NOT</code>, and
     * <code>TO</code>, setting these values to lower case means that they are not seen as operators by Solr.
     *
     * @param toBeCleaned String that will have each Solr operator-character "escaped" with a backslash
     * @return The original <code>toBeCleaned</code> object with each Solr operator-character "escaped" with a backslash
     */
    private String cleanString(String toBeCleaned) {
        return toBeCleaned.replaceAll("[:!&\"\'\\^\\+\\-\\|\\(\\)\\[\\]\\{\\}\\\\]", "\\\\$0").toLowerCase();
    }

    /**
     * The map of sorts that are valid for this provider
     *
     * @return
     */
    public List getSorts() {
        return this.displaySorts;
    }

    /**
     * The valid page sizes for this provider
     *
     * @return
     */
    public List getPageSizes() {
        return pageSizes;
    }

    private static final String DOI_SCHEME = "info:doi/";

    @Override
    public String fetchAbstractText(String articleDoi) throws ApplicationException {
        if (articleDoi.startsWith(DOI_SCHEME)) {
            articleDoi = articleDoi.substring(DOI_SCHEME.length());
        }
        SolrQuery query = new SolrQuery("id:\"" + articleDoi + "\"");
        query.setFields("abstract", "abstract_primary_display");
        List<SearchHit> hits = search(query).getHits();

        if (hits.size() != 1) {
            String message = (hits.isEmpty()) ? "Article not found" : "Non-unique ID";
            throw new ApplicationException(message + ": " + articleDoi);
        }
        String abstractText = hits.get(0).getAbstract();
        if (abstractText == null) {
            // Even an article with no abstract should have produced an empty (non-null) string
            throw new ApplicationException("Abstract not found for article: " + articleDoi);
        }
        return abstractText;
    }

    public void setCache(Cache cache) {
        this.cache = cache;
    }
}