uk.co.flax.ukmp.search.solr.SolrSearchEngine.java Source code

Java tutorial

Introduction

Here is the source code for uk.co.flax.ukmp.search.solr.SolrSearchEngine.java

Source

/**
 * Copyright (c) 2013 Lemur Consulting Ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package uk.co.flax.ukmp.search.solr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FacetField.Count;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import uk.co.flax.ukmp.api.Facet;
import uk.co.flax.ukmp.api.FacetList;
import uk.co.flax.ukmp.api.FacetQuery;
import uk.co.flax.ukmp.api.SearchResults;
import uk.co.flax.ukmp.api.SearchState;
import uk.co.flax.ukmp.api.Sentiment;
import uk.co.flax.ukmp.api.Tweet;
import uk.co.flax.ukmp.config.SolrConfiguration;
import uk.co.flax.ukmp.config.TermsConfiguration;
import uk.co.flax.ukmp.search.Query;
import uk.co.flax.ukmp.search.SearchEngine;
import uk.co.flax.ukmp.search.SearchEngineException;

/**
 * Solr implementation of the search engine interface.
 */
public class SolrSearchEngine implements SearchEngine {

    private static final Logger LOGGER = LoggerFactory.getLogger(SolrSearchEngine.class);

    public static final String ID_FIELD = "id";
    public static final String TEXT_FIELD = "text";
    public static final String PARTY_FIELD = "party";
    public static final String CREATED_FIELD = "created_at";
    public static final String COUNTRY_FIELD = "place_country";
    public static final String PLACE_NAME = "place_full_name";
    public static final String USER_SCREEN_NAME = "user_screen_name";
    public static final String USER_FULL_NAME = "user_full_name";
    public static final String SENTIMENT_FIELD = "sentiment";
    public static final String RETWEET_COUNT_FIELD = "retweet_count";
    public static final String FAVOURITE_COUNT_FIELD = "favorite_count";

    /**
     * Static instance of the Solr server - should be only one of these per
     * running application.
     */
    private static SolrClient server;
    /** The Solr configuration details */
    private SolrConfiguration config;

    public SolrSearchEngine(SolrConfiguration config) {
        initialiseServer(config);
        this.config = config;
    }

    /**
     * Unit testing constructor.
     * @param server mock server instance.
     */
    SolrSearchEngine(SolrClient server, SolrConfiguration config) {
        SolrSearchEngine.server = server;
        this.config = config;
    }

    /**
     * Initialise the single server connection.
     */
    private static void initialiseServer(SolrConfiguration config) {
        // Instantiate the server link
        server = new HttpSolrClient(config.getBaseUrl());
    }

    @Override
    public boolean isServerReady() throws SearchEngineException {
        boolean ready = false;

        try {
            SolrPingResponse response = server.ping();
            ready = (response != null && response.getStatus() == 0);

            if (!ready) {
                if (response == null) {
                    LOGGER.error("Search engine returned null response from ping()");
                } else {
                    LOGGER.error("Search engine is not ready: ", response.getResponse());
                }
            }
        } catch (SolrServerException e) {
            LOGGER.error("Server exception from ping(): {}", e.getMessage());
        } catch (IOException e) {
            LOGGER.error("IO exception when calling server: {}", e.getMessage());
        }

        return ready;
    }

    @Override
    public SearchResults search(Query query) throws SearchEngineException {
        SearchResults results;

        SolrQuery sQuery = new SolrQuery(query.getQuery());
        sQuery.setRows(query.getPageSize());
        int start = query.getPageSize() * query.getPageNumber();
        sQuery.setStart(start);
        sQuery.setSort(query.getSortField(), query.isSortAscending() ? ORDER.asc : ORDER.desc);

        if (query.getFilters() != null) {
            for (String fq : query.getFilters()) {
                sQuery.addFilterQuery(fq);
            }
        }

        // Set the request handler
        sQuery.setRequestHandler(config.getQueryHandler());

        try {
            QueryResponse response = server.query(sQuery);
            SolrDocumentList docs = response.getResults();

            Map<String, FacetList> availableFilters = extractAvailableFilters(response);
            availableFilters.putAll(extractFacetQueries(response));
            Map<String, FacetList> appliedFilters = extractAppliedFilters(query);
            trimAvailableFilters(availableFilters, appliedFilters);

            SearchState search = new SearchState(query.getQuery(), query.getSortField(), query.isSortAscending(),
                    query.getPageNumber(), availableFilters, null, appliedFilters, query.isHighlightingEnabled());
            List<Tweet> tweets = extractTweets(docs, response.getHighlighting(), query.isHighlightingEnabled());

            results = new SearchResults(start, docs.getNumFound(), query.getPageSize(), tweets, search);
        } catch (SolrServerException e) {
            LOGGER.error("Server exception caught for query {}: {}", sQuery.toString(), e.getMessage());
            throw new SearchEngineException(e);
        }

        return results;
    }

    private List<Tweet> extractTweets(SolrDocumentList docs, Map<String, Map<String, List<String>>> highlightMap,
            boolean highlighting) {
        List<Tweet> tweets;

        if (docs == null) {
            tweets = new ArrayList<Tweet>();
        } else {
            tweets = new ArrayList<Tweet>(docs.size());
            for (SolrDocument doc : docs) {
                Map<String, Object> fieldMap = doc.getFieldValueMap();

                Tweet tweet = new Tweet();
                String id = (String) fieldMap.get(ID_FIELD);
                tweet.setId(id);

                String text = (String) fieldMap.get(TEXT_FIELD);
                if (highlighting) {
                    text = getHighlight(text, TEXT_FIELD, highlightMap.get(id));
                }

                tweet.setText(text);
                tweet.setCreated((Date) fieldMap.get(CREATED_FIELD));
                tweet.setCountry((String) fieldMap.get(COUNTRY_FIELD));
                tweet.setPlaceName((String) fieldMap.get(PLACE_NAME));
                tweet.setUserScreenName((String) fieldMap.get(USER_SCREEN_NAME));
                tweet.setUserName((String) fieldMap.get(USER_FULL_NAME));
                tweet.setParty((String) fieldMap.get(PARTY_FIELD));
                if (fieldMap.containsKey(SENTIMENT_FIELD)) {
                    tweet.setSentiment((Integer) fieldMap.get(SENTIMENT_FIELD));
                } else {
                    // Default sentiment value to neutral
                    tweet.setSentiment(Sentiment.SENTIMENT_NEUTRAL);
                }
                if (fieldMap.containsKey(RETWEET_COUNT_FIELD)) {
                    tweet.setRetweetCount((Integer) fieldMap.get(RETWEET_COUNT_FIELD));
                }
                if (fieldMap.containsKey(FAVOURITE_COUNT_FIELD)) {
                    tweet.setFavouriteCount((Integer) fieldMap.get(FAVOURITE_COUNT_FIELD));
                }
                tweets.add(tweet);
            }
        }

        return tweets;
    }

    private String getHighlight(String base, String fieldName, Map<String, List<String>> itemHighlights) {
        String ret = base;

        if (itemHighlights != null && itemHighlights.containsKey(fieldName)) {
            ret = itemHighlights.get(fieldName).get(0);
        }

        return ret;
    }

    private Map<String, FacetList> extractAppliedFilters(Query query) {
        Map<String, FacetList> applied = new HashMap<String, FacetList>();
        Map<String, List<String>> filters = new HashMap<String, List<String>>();

        if (query.getFilters() != null) {
            for (String fq : query.getFilters()) {
                String[] fqParts = fq.split(":");
                if (!filters.containsKey(fqParts[0])) {
                    filters.put(fqParts[0], new ArrayList<String>());
                }
                String value = fqParts[1];
                if (value.startsWith("\"")) {
                    // Need to strip quotes from around the value
                    value = fqParts[1].substring(1, fqParts[1].length() - 1);
                }
                filters.get(fqParts[0]).add(value);
            }

            for (String field : filters.keySet()) {
                List<String> fList = filters.get(field);
                List<Facet> facets = new ArrayList<Facet>(fList.size());
                for (String value : fList) {
                    String label = value;
                    if (value.startsWith("[")) {
                        // This is a facetquery - need to get display label
                        label = config.getFacetQueryFields().get(field).get(value);
                    }
                    facets.add(new FacetQuery(field, value, 0, label));
                }

                applied.put(field, new FacetList(field, getFacetLabel(field), facets));
            }
        }

        return applied;
    }

    private Map<String, FacetList> extractAvailableFilters(QueryResponse response) {
        Map<String, FacetList> facets = new HashMap<String, FacetList>();

        if (response.getFacetFields() != null) {
            for (FacetField ff : response.getFacetFields()) {
                String field = ff.getName();
                List<Facet> facetList = new ArrayList<Facet>(ff.getValueCount());

                for (Count c : ff.getValues()) {
                    if (c.getCount() > 0) {
                        Facet f = new Facet(ff.getName(), c.getName(), c.getCount());
                        facetList.add(f);
                    }
                }

                facets.put(ff.getName(), new FacetList(field, getFacetLabel(field), facetList));
            }
        }

        return facets;
    }

    private String getFacetLabel(String field) {
        String ret = config.getFacetLabels().get(field);
        return ret == null ? field : ret;
    }

    /**
     * Trim filters that have already been applied from the set of available
     * filters. This is done in place on the passed availableFilters map.
     * @param availableFilters the available filters.
     * @param appliedFilters the filters already applied.
     */
    private void trimAvailableFilters(Map<String, FacetList> availableFilters,
            Map<String, FacetList> appliedFilters) {
        // Loop through the fields in the available filters map
        for (Iterator<String> fieldIter = availableFilters.keySet().iterator(); fieldIter.hasNext();) {
            String field = fieldIter.next();
            if (appliedFilters.containsKey(field)) {
                FacetList facetList = availableFilters.get(field);
                List<Facet> values = facetList.getFacets();
                FacetList applied = appliedFilters.get(field);
                // Loop through each value, checking against the list of currently applied filters
                for (Iterator<Facet> valueIter = values.iterator(); valueIter.hasNext();) {
                    Facet facet = valueIter.next();
                    if (applied.containsValue(facet.getValue())) {
                        // Filter has been applied - remove from the list
                        valueIter.remove();
                    }
                }
            }
        }
    }

    private Map<String, FacetList> extractFacetQueries(QueryResponse response) {
        Map<String, FacetList> fQuery = new HashMap<String, FacetList>();

        Map<String, Integer> facetQuery = response.getFacetQuery();
        if (facetQuery != null) {
            Map<String, List<Facet>> facetMap = new HashMap<String, List<Facet>>();

            for (String query : facetQuery.keySet()) {
                // Split into field, query
                String[] fqParts = query.split(":");

                if (!facetMap.containsKey(fqParts[0])) {
                    facetMap.put(fqParts[0], new ArrayList<Facet>());
                }

                Map<String, String> facetLabels = config.getFacetQueryFields().get(fqParts[0]);
                String label = facetLabels.get(fqParts[1]);
                if (label != null) {
                    Facet fq = new FacetQuery(fqParts[0], fqParts[1], facetQuery.get(query), label);
                    facetMap.get(fqParts[0]).add(fq);
                }
            }

            for (String field : facetMap.keySet()) {
                FacetList fl = new FacetList(field, getFacetLabel(field), facetMap.get(field));
                fQuery.put(field, fl);
            }
        }

        return fQuery;
    }

    @Override
    public SearchResults getTextBatch(int batchNum) throws SearchEngineException {
        SearchResults results;
        TermsConfiguration termsConfig = config.getTermsConfiguration();

        SolrQuery query = new SolrQuery("*:*");
        query.setRequestHandler(termsConfig.getHandler());
        query.setFields(termsConfig.getField());
        query.setRows(termsConfig.getBatchSize());
        query.setStart(batchNum * termsConfig.getBatchSize());
        query.addFilterQuery(termsConfig.getFilters().toArray(new String[0]));

        try {
            QueryResponse response = server.query(query);
            SolrDocumentList docs = response.getResults();

            // Attempt to ensure we always return at least one batch of results
            if (docs.getNumFound() == 0) {
                LOGGER.debug("No tweets found in text batch - running again without filters");
                docs = runQueryWithoutFilters(query, termsConfig.getFilters());
            }

            List<Tweet> tweets = new ArrayList<Tweet>(docs.size());
            for (SolrDocument doc : docs) {
                Tweet tweet = new Tweet();
                tweet.setText((String) doc.getFieldValue(termsConfig.getField()));
                tweets.add(tweet);
            }

            results = new SearchResults(query.getStart(), docs.getNumFound(), query.getRows(), tweets, null);
        } catch (SolrServerException e) {
            LOGGER.error("Server exception caught getting text batch {}: {}", batchNum, e.getMessage());
            throw new SearchEngineException(e);
        }

        return results;
    }

    /**
     * Run the given query, removing the given list of filters from the query itself.
     * @param query the query to be run.
     * @param filters the list of filters to remove.
     * @return a list of documents returned from the query.
     * @throws SolrServerException if a problem occurs accessing Solr.
     */
    private SolrDocumentList runQueryWithoutFilters(SolrQuery query, List<String> filters)
            throws SolrServerException {
        for (String fq : filters) {
            // Re-query without the filter queries set
            query.removeFilterQuery(fq);
        }
        QueryResponse response = server.query(query);
        SolrDocumentList docs = response.getResults();

        // Set numFound to the results list size, avoid reading every tweet in
        // the search engine
        if (docs.getNumFound() > 0) {
            docs.setNumFound(docs.size());
        }

        return docs;
    }

    @Override
    public void indexTweets(List<Tweet> tweets) throws SearchEngineException {
        try {
            server.addBeans(tweets);
            server.commit();
        } catch (SolrServerException e) {
            throw new SearchEngineException(e);
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
    }

    @Override
    public void deleteTweets(List<String> deleteIds) throws SearchEngineException {
        try {
            server.deleteById(deleteIds);
            server.commit();
        } catch (SolrServerException e) {
            throw new SearchEngineException(e);
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
    }

}