net.semanticmetadata.lire.solr.LireRequestHandler.java Source code

Introduction

Here is the source code for net.semanticmetadata.lire.solr.LireRequestHandler.java
Source

/*
 * This file is part of the LIRE project: http://www.semanticmetadata.net/lire
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval 
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2013 by Mathias Lux (mathias@juggle.at)
 *     http://www.semanticmetadata.net/lire, http://www.lire-project.net
 */

package net.semanticmetadata.lire.solr;

import net.semanticmetadata.lire.imageanalysis.features.GlobalFeature;
import net.semanticmetadata.lire.imageanalysis.features.global.ColorLayout;
import net.semanticmetadata.lire.indexers.hashing.BitSampling;
import net.semanticmetadata.lire.indexers.hashing.MetricSpaces;
import net.semanticmetadata.lire.utils.ImageUtils;
import net.semanticmetadata.lire.utils.StatsUtils;
import org.apache.commons.codec.binary.Base64;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.net.URL;
import java.util.*;

/**
 * This is the main LIRE RequestHandler for the Solr Plugin. It supports query by example using the indexed id,
 * an url or a feature vector. Furthermore, feature extraction and random selection of images are supported.
 *
 * @author Mathias Lux, mathias@juggle.at, 07.07.13
 */

public class LireRequestHandler extends RequestHandlerBase {
    //    private static HashMap<String, Class> fieldToClass = new HashMap<String, Class>(5);
    private long time = 0;
    private int countRequests = 0;
    private int defaultNumberOfResults = 60;
    /**
     * number of candidate results retrieved from the index. The higher this number, the slower,
     * the but more accurate the retrieval will be. 10k is a good value for starters.
     */
    private int numberOfCandidateResults = 10000;
    private static final int DEFAULT_NUMBER_OF_CANDIDATES = 10000;

    /**
     * The number of query terms that go along with the TermsFilter search. We need some to get a
     * score, the less the faster. I put down a minimum of three in the method, this value gives
     * the percentage of the overall number used (selected randomly).
     */
    private double numberOfQueryTerms = 0.33;
    private static final double DEFAULT_NUMBER_OF_QUERY_TERMS = 0.33;

    /**
     * If metric spaces should be used instead of BitSampling.
     */
    private boolean useMetricSpaces = true;
    private static final boolean DEFAULT_USE_METRIC_SPACES = true;

    static {
        HashingMetricSpacesManager.init(); // load reference points from disk.
    }

    @Override
    public void init(NamedList args) {
        super.init(args);
    }

    /**
     * Handles three types of requests.
     * <ol>
     * <li>search by already extracted images.</li>
     * <li>search by an image URL.</li>
     * <li>Random results.</li>
     * </ol>
     *
     * @param req
     * @param rsp
     * @throws Exception
     */
    @Override
    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        // (1) check if the necessary parameters are here
        if (req.getParams().get("hashes") != null) { // we are searching for hashes ...
            handleHashSearch(req, rsp); // not really supported, just here for legacy.
        } else if (req.getParams().get("url") != null) { // we are searching for an image based on an URL
            handleUrlSearch(req, rsp);
        } else if (req.getParams().get("id") != null) { // we are searching for an image based on an URL
            handleIdSearch(req, rsp);
        } else if (req.getParams().get("extract") != null) { // we are trying to extract from an image URL.
            handleExtract(req, rsp);
        } else { // lets return random results.
            handleRandomSearch(req, rsp);
        }
    }

    /**
     * Handles the get parameters id, field and rows.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void handleIdSearch(SolrQueryRequest req, SolrQueryResponse rsp)
            throws IOException, InstantiationException, IllegalAccessException {
        SolrIndexSearcher searcher = req.getSearcher();
        try {
            //            TopDocs hits = searcher.search(new TermQuery(new Term("id", req.getParams().get("id"))), 1);
            int queryDocId = searcher.getFirstMatch(new Term("id", req.getParams().get("id")));
            // get the parameters
            String paramField = req.getParams().get("field", "cl_ha");
            if (!paramField.endsWith("_ha"))
                paramField += "_ha";
            numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
            numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
            useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES);
            int paramRows = req.getParams().getInt("rows", defaultNumberOfResults);

            GlobalFeature queryFeature = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField)
                    .newInstance();
            rsp.add("QueryField", paramField);
            rsp.add("QueryFeature", queryFeature.getClass().getName());
            if (queryDocId > -1) {
                // Using DocValues to get the actual data from the index.
                BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(),
                        FeatureRegistry.getFeatureFieldName(paramField));
                if (binaryValues == null) {
                    rsp.add("Error",
                            "Could not find the DocValues of the query document. Are they in the index? Id: "
                                    + req.getParams().get("id"));
                    // System.err.println("Could not find the DocValues of the query document. Are they in the index?");
                }
                queryFeature.setByteArrayRepresentation(binaryValues.get(queryDocId).bytes,
                        binaryValues.get(queryDocId).offset, binaryValues.get(queryDocId).length);

                Query query = null;
                if (!useMetricSpaces) {
                    // check singleton cache if the term stats can be cached.
                    HashTermStatistics.addToStatistics(searcher, paramField);
                    // Re-generating the hashes to save space (instead of storing them in the index)
                    int[] hashes = BitSampling.generateHashes(queryFeature.getFeatureVector());
                    query = createQuery(hashes, paramField, numberOfQueryTerms);
                } else if (MetricSpaces.supportsFeature(queryFeature)) {
                    // ----< Metric Spaces >-----
                    int queryLength = (int) StatsUtils.clamp(
                            numberOfQueryTerms * MetricSpaces.getPostingListLength(queryFeature), 3,
                            MetricSpaces.getPostingListLength(queryFeature));
                    String msQuery = MetricSpaces.generateBoostedQuery(queryFeature, queryLength);
                    QueryParser qp = new QueryParser(paramField.replace("_ha", "_ms"), new WhitespaceAnalyzer());
                    query = qp.parse(msQuery);
                } else {
                    query = new MatchAllDocsQuery();
                    rsp.add("Error",
                            "Feature not supported by MetricSpaces: " + queryFeature.getClass().getSimpleName());
                }
                doSearch(req, rsp, searcher, paramField, paramRows, getFilterQuery(req.getParams().get("fq")),
                        query, queryFeature);
            } else {
                rsp.add("Error", "Did not find an image with the given id " + req.getParams().get("id"));
            }
        } catch (Exception e) {
            rsp.add("Error", "There was an error with your search for the image with the id "
                    + req.getParams().get("id") + ": " + e.getMessage());
        }
    }

    /**
     * Parses the fq param and adds it as a filter query or reverts to null if nothing is found
     * or an Exception is thrown.
     * @param fq the String attached to the query.
     * @return either a query from the QueryParser or null
     */
    private Query getFilterQuery(String fq) {
        if (fq == null)
            return null;
        QueryParser qp = new QueryParser("title", new WhitespaceAnalyzer());
        Query query = null;
        try {
            qp.parse(fq);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return query;
    }

    /**
     * Returns a random set of documents from the index. Mainly for testing purposes.
     *
     * @param req
     * @param rsp
     * @throws IOException
     */
    private void handleRandomSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
        SolrIndexSearcher searcher = req.getSearcher();
        DirectoryReader indexReader = searcher.getIndexReader();
        double maxDoc = indexReader.maxDoc();
        int paramRows = req.getParams().getInt("rows", defaultNumberOfResults);
        if (paramRows > maxDoc)
            paramRows = (int) Math.floor(maxDoc);
        if (maxDoc < 1)
            rsp.add("Error", "No documents in index");
        else {
            LinkedList list = new LinkedList();
            while (list.size() < paramRows) {
                Document d = searcher.doc((int) Math.floor(Math.random() * maxDoc));
                list.add(d);
            }
            rsp.addResponse(list);
        }
    }

    /**
     * Searches for an image given by an URL. Note that (i) extracting image features takes time and
     * (ii) not every image is readable by Java.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void handleUrlSearch(SolrQueryRequest req, SolrQueryResponse rsp)
            throws IOException, InstantiationException, IllegalAccessException {
        SolrParams params = req.getParams();
        String paramUrl = params.get("url");
        String paramField = req.getParams().get("field", "cl_ha");
        if (!paramField.endsWith("_ha"))
            paramField += "_ha";
        int paramRows = params.getInt("rows", defaultNumberOfResults);
        numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
        useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES);

        GlobalFeature feat = null;
        int[] hashes = null;
        Query query = null;
        // wrapping the whole part in the try
        try {
            BufferedImage img = ImageIO.read(new URL(paramUrl).openStream());
            img = ImageUtils.trimWhiteSpace(img);
            // getting the right feature per field:
            if (FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered.
                feat = new ColorLayout();
            else {
                feat = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
            }
            feat.extract(img);

            if (!useMetricSpaces) {
                // Re-generating the hashes to save space (instead of storing them in the index)
                HashTermStatistics.addToStatistics(req.getSearcher(), paramField);
                hashes = BitSampling.generateHashes(feat.getFeatureVector());
                query = createQuery(hashes, paramField, numberOfQueryTerms);
            } else if (MetricSpaces.supportsFeature(feat)) {
                // ----< Metric Spaces >-----
                int queryLength = (int) StatsUtils.clamp(
                        numberOfQueryTerms * MetricSpaces.getPostingListLength(feat), 3,
                        MetricSpaces.getPostingListLength(feat));
                String msQuery = MetricSpaces.generateBoostedQuery(feat, queryLength);
                QueryParser qp = new QueryParser(paramField.replace("_ha", "_ms"), new WhitespaceAnalyzer());
                query = qp.parse(msQuery);
            } else {
                rsp.add("Error", "Feature not supported by MetricSpaces: " + feat.getClass().getSimpleName());
                query = new MatchAllDocsQuery();
            }

        } catch (Exception e) {
            rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage());
            e.printStackTrace();
        }
        // search if the feature has been extracted and query is there.
        if (feat != null && query != null) {
            doSearch(req, rsp, req.getSearcher(), paramField, paramRows, getFilterQuery(req.getParams().get("fq")),
                    query, feat);
        }
    }

    /**
     * Methods orders around the hashes already by docFreq removing those with docFreq == 0
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void handleExtract(SolrQueryRequest req, SolrQueryResponse rsp)
            throws IOException, InstantiationException, IllegalAccessException {
        SolrParams params = req.getParams();
        String paramUrl = params.get("extract");
        String paramField = req.getParams().get("field", "cl_ha");
        if (!paramField.endsWith("_ha"))
            paramField += "_ha";
        useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES);
        double accuracy = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        GlobalFeature feat;
        // wrapping the whole part in the try
        try {
            BufferedImage img = ImageIO.read(new URL(paramUrl).openStream());
            img = ImageUtils.trimWhiteSpace(img);
            // getting the right feature per field:
            if (FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered.
                feat = new ColorLayout();
            else {
                feat = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
            }
            feat.extract(img);
            rsp.add("histogram", Base64.encodeBase64String(feat.getByteArrayRepresentation()));
            if (!useMetricSpaces || true) { // only if the field is available was the original way
                HashTermStatistics.addToStatistics(req.getSearcher(), paramField);
                int[] hashes = BitSampling.generateHashes(feat.getFeatureVector());
                List<String> hashStrings = orderHashes(hashes, paramField, false);
                rsp.add("bs_list", hashStrings);
                List<String> hashQuery = orderHashes(hashes, paramField, true);
                int queryLength = (int) StatsUtils.clamp(accuracy * hashes.length, 3, hashQuery.size());
                rsp.add("bs_query", String.join(" ", hashQuery.subList(0, queryLength)));
            }
            if (MetricSpaces.supportsFeature(feat)) {
                rsp.add("ms_list", MetricSpaces.generateHashList(feat));
                int queryLength = (int) StatsUtils.clamp(accuracy * MetricSpaces.getPostingListLength(feat), 3,
                        MetricSpaces.getPostingListLength(feat));
                rsp.add("ms_query", MetricSpaces.generateBoostedQuery(feat, queryLength));
            }
        } catch (Exception e) {
            rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage());
            e.printStackTrace();
        }
    }

    /**
     * Search based on the given image hashes.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws IllegalAccessException
     * @throws InstantiationException
     */
    private void handleHashSearch(SolrQueryRequest req, SolrQueryResponse rsp)
            throws IOException, IllegalAccessException, InstantiationException {
        SolrParams params = req.getParams();
        SolrIndexSearcher searcher = req.getSearcher();
        // get the params needed:
        // hashes=x y z ...
        // feature=<base64>
        // field=<cl_ha|ph_ha|...>

        byte[] featureVector = Base64.decodeBase64(params.get("feature"));
        String paramField = req.getParams().get("field", "cl_ha");
        if (!paramField.endsWith("_ha"))
            paramField += "_ha";
        int paramRows = params.getInt("rows", defaultNumberOfResults);
        numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
        useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES);

        // query feature
        GlobalFeature queryFeature = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
        queryFeature.setByteArrayRepresentation(featureVector);

        if (!useMetricSpaces)
            HashTermStatistics.addToStatistics(req.getSearcher(), paramField); // caching the term statistics.

        QueryParser qp = null;
        String queryString = null;
        if (params.get("hashes") == null) {
            // we have to create the hashes first ...
            if (!useMetricSpaces) {

            } else if (MetricSpaces.supportsFeature(queryFeature)) {
                int queryLength = (int) StatsUtils.clamp(
                        numberOfQueryTerms * MetricSpaces.getPostingListLength(queryFeature), 3,
                        MetricSpaces.getPostingListLength(queryFeature));
                queryString = MetricSpaces.generateBoostedQuery(queryFeature, queryLength);
            } else {
                queryString = "*:*";
            }
        } else {
            queryString = params.get("hashes").trim();
            if (!useMetricSpaces) {
                qp = new QueryParser(paramField, new WhitespaceAnalyzer());
            } else {
                qp = new QueryParser(paramField.replace("_ha", "_ms"), new WhitespaceAnalyzer());
            }
        }
        Query query = null;
        try {
            query = qp.parse(queryString);
        } catch (ParseException e) {
            e.printStackTrace();
        }

        // get results:
        doSearch(req, rsp, searcher, paramField, paramRows, getFilterQuery(req.getParams().get("fq")), query,
                queryFeature);
    }

    /**
     * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
     *
     * @param req           the SolrQueryRequest
     * @param rsp           the response to write the data to
     * @param searcher      the actual index searcher object to search the index
     * @param hashFieldName the name of the field the hashes can be found
     * @param maximumHits   the maximum number of hits, the smaller the faster
     * @param filterQuery   can be null
     * @param query         the (Boolean) query for querying the candidates from the IndexSearcher
     * @param queryFeature  the image feature used for re-ranking the results
     * @throws IOException
     * @throws IllegalAccessException
     * @throws InstantiationException
     */
    private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher,
            String hashFieldName, int maximumHits, Query filterQuery, Query query, GlobalFeature queryFeature)
            throws IOException, IllegalAccessException, InstantiationException {
        // temp feature instance
        GlobalFeature tmpFeature = queryFeature.getClass().newInstance();
        // Taking the time of search for statistical purposes.
        time = System.currentTimeMillis();

        String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
        BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName);

        time = System.currentTimeMillis() - time;
        rsp.add("DocValuesOpenTime", time + "");

        Iterator<Integer> docIterator;
        int numberOfResults = 0;
        time = System.currentTimeMillis();
        if (filterQuery != null) {
            DocList docList = searcher.getDocList(query, filterQuery, Sort.RELEVANCE, 0, numberOfCandidateResults);
            numberOfResults = docList.size();
            docIterator = docList.iterator();
        } else {
            TopDocs docs = searcher.search(query, numberOfCandidateResults);
            numberOfResults = docs.totalHits;
            docIterator = new TopDocsIterator(docs);
        }
        time = System.currentTimeMillis() - time;
        rsp.add("RawDocsCount", numberOfResults + "");
        rsp.add("RawDocsSearchTime", time + "");
        time = System.currentTimeMillis();
        TreeSet<CachingSimpleResult> resultScoreDocs = getReRankedResults(docIterator, binaryValues, queryFeature,
                tmpFeature, maximumHits, searcher);

        // Creating response ...
        time = System.currentTimeMillis() - time;
        rsp.add("ReRankSearchTime", time + "");
        LinkedList list = new LinkedList();
        for (Iterator<CachingSimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) {
            CachingSimpleResult result = it.next();
            HashMap m = new HashMap(2);
            m.put("d", result.getDistance());
            // add fields as requested:
            if (req.getParams().get("fl") == null) {
                m.put("id", result.getDocument().get("id"));
                if (result.getDocument().get("title") != null)
                    m.put("title", result.getDocument().get("title"));
            } else {
                String fieldsRequested = req.getParams().get("fl");
                if (fieldsRequested.contains("score")) {
                    m.put("score", result.getDistance());
                }
                if (fieldsRequested.contains("*")) {
                    // all fields
                    for (IndexableField field : result.getDocument().getFields()) {
                        String tmpField = field.name();

                        if (result.getDocument().getFields(tmpField).length > 1) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(),
                                    result.getDocument().getValues(tmpField));
                        } else if (result.getDocument().getFields(tmpField).length > 0) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(),
                                    result.getDocument().getFields(tmpField)[0].stringValue());
                        }
                    }
                } else {
                    StringTokenizer st;
                    if (fieldsRequested.contains(","))
                        st = new StringTokenizer(fieldsRequested, ",");
                    else
                        st = new StringTokenizer(fieldsRequested, " ");
                    while (st.hasMoreElements()) {
                        String tmpField = st.nextToken();
                        if (result.getDocument().getFields(tmpField).length > 1) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(),
                                    result.getDocument().getValues(tmpField));
                        } else if (result.getDocument().getFields(tmpField).length > 0) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(),
                                    result.getDocument().getFields(tmpField)[0].stringValue());
                        }
                    }
                }
            }
            //            m.put(field, result.getDocument().get(field));
            //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
            list.add(m);
        }
        rsp.add("docs", list);
        // rsp.add("Test-name", "Test-val");
    }

    private TreeSet<CachingSimpleResult> getReRankedResults(Iterator<Integer> docIterator,
            BinaryDocValues binaryValues, GlobalFeature queryFeature, GlobalFeature tmpFeature, int maximumHits,
            IndexSearcher searcher) throws IOException {
        TreeSet<CachingSimpleResult> resultScoreDocs = new TreeSet<>();
        double maxDistance = -1f;
        double tmpScore;
        BytesRef bytesRef;
        CachingSimpleResult tmpResult;
        while (docIterator.hasNext()) {
            // using DocValues to retrieve the field values ...
            int doc = docIterator.next();
            bytesRef = binaryValues.get(doc);
            tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
            // Getting the document from the index.
            // This is the slow step based on the field compression of stored fields.
            //            tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
            tmpScore = queryFeature.getDistance(tmpFeature);
            if (resultScoreDocs.size() < maximumHits) {
                resultScoreDocs.add(new CachingSimpleResult(tmpScore, searcher.doc(doc), doc));
                maxDistance = resultScoreDocs.last().getDistance();
            } else if (tmpScore < maxDistance) {
                // if it is nearer to the sample than at least one of the current set:
                // remove the last one ...
                tmpResult = resultScoreDocs.last();
                resultScoreDocs.remove(tmpResult);
                // set it with new values and re-insert.
                tmpResult.set(tmpScore, searcher.doc(doc), doc);
                resultScoreDocs.add(tmpResult);
                // and set our new distance border ...
                maxDistance = resultScoreDocs.last().getDistance();
            }
        }
        return resultScoreDocs;
    }

    @Override
    public String getDescription() {
        return "LIRE Request Handler to add images to an index and search them. Search images by id, by url and by extracted features.";
    }

    @Override
    public String getSource() {
        return "http://lire-project.net";
    }

    @Override
    public NamedList<Object> getStatistics() {
        // Change stats here to get an insight in the admin console.
        NamedList<Object> statistics = super.getStatistics();
        statistics.add("Number of Requests", countRequests);
        return statistics;
    }

    /**
     * Makes a Boolean query out of a list of hashes by ordering them ascending using their docFreq and
     * then only using the most distinctive ones, defined by size in [0.1, 1], size=1 takes all.
     *
     * @param hashes
     * @param paramField
     * @param size       in [0.1, 1]
     * @return
     */
    private BooleanQuery createQuery(int[] hashes, String paramField, double size) {
        size = Math.max(0.1, Math.min(size, 1d)); // clamp size.
        List<String> hList = orderHashes(hashes, paramField, true);
        int numHashes = (int) Math.min(hList.size(), Math.floor(hashes.length * size));
        // a minimum of 3 hashes ...
        if (numHashes < 3)
            numHashes = 3;

        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        for (int i = 0; i < numHashes; i++) {
            // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
            queryBuilder.add(new BooleanClause(new TermQuery(new Term(paramField, hList.get(i))),
                    BooleanClause.Occur.SHOULD));
        }
        BooleanQuery query = queryBuilder.build();
        // this query is just for boosting the results with more matching hashes. We'd need to match it to all docs.
        //        query.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD));
        return query;
    }

    /**
     * Sorts the hashes to put those first, that do not show up in a large number of documents
     * while deleting those that are not in the index at all. Meaning: terms sorted by docFreq ascending, removing
     * those with docFreq == 0
     *
     * @param hashes     the int[] of hashes
     * @param paramField the field in the index.
     * @param removeZeroDocFreqTerms
     * @return
     */
    private List<String> orderHashes(int[] hashes, String paramField, boolean removeZeroDocFreqTerms) {
        List<String> hList = new ArrayList<>(hashes.length);
        // creates a list of terms.
        for (int i = 0; i < hashes.length; i++) {
            hList.add(Integer.toHexString(hashes[i]));
        }
        // uses our predetermined hash term stats object to sort the list
        Collections.sort(hList, (o1, o2) -> HashTermStatistics.docFreq(paramField, o1)
                - HashTermStatistics.docFreq(paramField, o2));
        // removing those with zero entries but leaving at least three.
        while (HashTermStatistics.docFreq(paramField, hList.get(0)) < 1 && hList.size() > 3)
            hList.remove(0);
        return hList;
    }

    /**
     * This is used to create a TermsFilter ... should be used to select in the index based on many terms.
     * We just need to integrate a minimum query too, else we'd not get the appropriate results.
     * TODO: This is wrong.
     *
     * @param hashes
     * @param paramField
     * @return
     */
    private List<Term> createTermFilter(int[] hashes, String paramField, double size) {
        List<String> hList = new ArrayList<>(hashes.length);
        // creates a list of terms.
        for (int i = 0; i < hashes.length; i++) {
            hList.add(Integer.toHexString(hashes[i]));
        }
        // uses our predetermined hash term stats object to sort the list
        Collections.sort(hList, (o1, o2) -> HashTermStatistics.docFreq(paramField, o1)
                - HashTermStatistics.docFreq(paramField, o2));
        // removing those with zero entries but leaving at least three.
        while (HashTermStatistics.docFreq(paramField, hList.get(0)) < 1 && hList.size() > 3)
            hList.remove(0);
        int numHashes = (int) Math.min(hList.size(), Math.floor(hashes.length * size));
        // a minimum of 3 hashes ...
        if (numHashes < 3)
            numHashes = 3;
        LinkedList<Term> termFilter = new LinkedList<Term>();
        for (int i = 0; i < numHashes; i++) {
            // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
            termFilter.add(new Term(paramField, Integer.toHexString(hashes[i])));
        }
        return termFilter;
    }
}