org.eclipse.smila.search.FederatedQueryHandling.java Source code

Java tutorial

Introduction

Here is the source code for org.eclipse.smila.search.FederatedQueryHandling.java

Source

/*******************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: August Georg Schmidt (brox IT Solutions GmbH) - initial API and implementation
 **********************************************************************************************************************/
package org.eclipse.smila.search;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;

import org.apache.commons.collections.comparators.ReverseComparator;
import org.eclipse.smila.search.utils.searchresult.DHit;
import org.eclipse.smila.search.utils.searchresult.DHitDistribution;

/**
 * @author August Georg Schmidt (BROX)
 * 
 *         This class is a utility class for federated query handling.
 */
public final class FederatedQueryHandling {

    /**
     * Hide constructor. Class is used in a static way.
     */
    private FederatedQueryHandling() {
    }

    /**
     * This method creates a list of queries to be executed during search process.
     * 
     * @param indexNames
     *          Index names from search query.
     * @param startHits
     *          Start of hits in logical result structure.
     * @param maxHits
     *          Maximum number of hits to be returned.
     * @param hitDistributions
     *          Hit Distributions of all indices.
     * @return Array of QueryScope objects to be processed.
     */
    public static QueryScope[] calculateQueries(String[] indexNames, int startHits, int maxHits,
            HashMap<String, DHitDistribution> hitDistributions) {

        // prepare data structure containing all hits grouped/sorted by score and index name.
        final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel = calculateIndicesPerHitLevel(indexNames,
                hitDistributions);

        final HashMap<String, QueryScope> queryScopes = calculateQueryScopes(startHits, maxHits,
                indicesPerHitLevel);

        final HashMap<String, Integer> resolvedRecords = calculateStartPositionForQueryScopes(startHits, maxHits,
                indicesPerHitLevel);

        // assign selection start
        for (QueryScope queryScope : queryScopes.values()) {
            if (resolvedRecords.containsKey(queryScope.getIndexName())) {
                queryScope.setStartSelection(resolvedRecords.get(queryScope.getIndexName()));
            }
        }

        // prepare query scope return in correct order and do query scope extension
        final ArrayList<QueryScope> queryScopeOrderedResult = new ArrayList<QueryScope>();
        for (String indexName : indexNames) {
            if (queryScopes.containsKey(indexName)) {

                QueryScope queryScope = queryScopes.get(indexName);
                queryScopeOrderedResult.add(queryScope);

                final int alreadySpentRecords = queryScope.getStartSelection() - queryScope.getStart() - 1;
                final int spendAndSelectedRecords = (alreadySpentRecords + queryScope.getRecordsToSelect());
                if (spendAndSelectedRecords > queryScope.getHits()) {
                    final int oldRecordsToSelect = queryScope.getRecordsToSelect();
                    queryScope.setRecordsToSelect(queryScope.getHits() - alreadySpentRecords);

                    final int newRecordsToSelect = oldRecordsToSelect - queryScope.getRecordsToSelect();
                    queryScope = new QueryScope(queryScope.getIndexName(),
                            queryScope.getStart() + queryScope.getHits(), queryScope.getHits(), newRecordsToSelect,
                            queryScope.getStart() + queryScope.getHits() + 1);
                    queryScopeOrderedResult.add(queryScope);
                }
            }
        }

        return queryScopeOrderedResult.toArray(new QueryScope[0]);
    }

    /**
     * This method calculates the start position for query scopes.
     * 
     * @param startHits
     *          Start of hits in search result.
     * @param maxHits
     *          Maximum number of hits in search result.
     * @param indicesPerHitLevel
     *          Structure with hit distribution gouped by score.
     * @return Start positions of a query scope by index name.
     */
    private static HashMap<String, Integer> calculateStartPositionForQueryScopes(int startHits, int maxHits,
            final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel) {

        final HashMap<String, Integer> positionPerIndex = new HashMap<String, Integer>();
        final HashMap<String, Integer> startPositionPerIndex = new HashMap<String, Integer>();
        int hitsFetched = 0;
        int position = 0;
        for (List<HitsPerIndex> hitsPerIndexList : indicesPerHitLevel.values()) {
            for (final HitsPerIndex hitsPerIndex : hitsPerIndexList) {

                final String indexName = hitsPerIndex.getIndexName();
                if (!positionPerIndex.containsKey(indexName)) {
                    positionPerIndex.put(indexName, 0);
                }

                int positionInIndex = 0;
                positionInIndex = positionPerIndex.get(indexName);

                for (int i = 0; i < hitsPerIndex.getHits(); i++) {
                    position++;
                    positionInIndex++;
                    positionPerIndex.put(indexName, positionInIndex);

                    final boolean hitsShouldBeFetched = hitsFetched < maxHits;

                    if ((hitsShouldBeFetched) && (position > startHits)) {

                        hitsFetched++;
                        if (!startPositionPerIndex.containsKey(indexName)) {
                            startPositionPerIndex.put(indexName, positionInIndex);
                        }
                    } else {
                        if (!hitsShouldBeFetched) {
                            return startPositionPerIndex;
                        }
                    }
                }
            }
        }

        return startPositionPerIndex;
    }

    /**
     * This method calculates the query scopes for several results.
     * 
     * @param startHits
     *          Start of hits in search result.
     * @param maxHits
     *          Maximum number of hits in search result.
     * @param indicesPerHitLevel
     *          Structure with hit distribution gouped by score.
     * @return Query scopes.
     */
    private static HashMap<String, QueryScope> calculateQueryScopes(int startHits, int maxHits,
            final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel) {

        final HashMap<String, QueryScope> queryScopes = new HashMap<String, QueryScope>();
        final HashMap<String, Integer> positionPerIndex = new HashMap<String, Integer>();
        int recordsCollected = 0;
        int currentPosition = 0;
        for (List<HitsPerIndex> hitsPerIndexList : indicesPerHitLevel.values()) {

            for (final HitsPerIndex hitsPerIndex : hitsPerIndexList) {

                final String indexName = hitsPerIndex.getIndexName();
                if (!positionPerIndex.containsKey(indexName)) {
                    positionPerIndex.put(indexName, 0);
                }

                int positionInIndex = 0;
                positionInIndex = positionPerIndex.get(indexName);

                for (int i = 0; i < hitsPerIndex.getHits(); i++) {

                    currentPosition++;
                    positionInIndex++;
                    positionPerIndex.put(indexName, positionInIndex);
                    if (currentPosition <= startHits) {
                        continue;
                    }

                    if (recordsCollected < maxHits) {
                        QueryScope queryScope = null;
                        if (!queryScopes.containsKey(indexName)) {
                            final int start = (((int) Math.floor((float) (positionInIndex - 1) / maxHits))
                                    * maxHits);
                            queryScope = new QueryScope(indexName, start, maxHits);
                            queryScopes.put(indexName, queryScope);
                        } else {
                            queryScope = queryScopes.get(indexName);
                        }
                        recordsCollected++;
                        queryScope.setRecordsToSelect(queryScope.getRecordsToSelect() + 1);
                    } else {
                        return queryScopes;
                    }
                }
            }
        }
        return queryScopes;
    }

    /**
     * This method creates a sorted map containing all indices and hits grouped by score.
     * 
     * @param indexNames
     *          Name of indices.
     * @param hitDistributions
     *          Hit distributions.
     * @return Sorted set containing all hits grouped by score and index.
     */
    @SuppressWarnings("unchecked")
    private static SortedMap<Integer, List<HitsPerIndex>> calculateIndicesPerHitLevel(String[] indexNames,
            HashMap<String, DHitDistribution> hitDistributions) {
        final SortedMap<Integer, List<HitsPerIndex>> indicesPerHitLevel = new TreeMap<Integer, List<HitsPerIndex>>(
                new ReverseComparator());

        for (String indexName : indexNames) {

            if (!hitDistributions.containsKey(indexName)) {
                continue;
            }

            final DHitDistribution hitDistribution = hitDistributions.get(indexName);

            for (final Enumeration<DHit> hits = hitDistribution.getHits(); hits.hasMoreElements();) {
                final DHit hit = hits.nextElement();

                if (!indicesPerHitLevel.containsKey(hit.getScore())) {
                    indicesPerHitLevel.put(hit.getScore(), new ArrayList<HitsPerIndex>());
                }
                indicesPerHitLevel.get(hit.getScore())
                        .add(new HitsPerIndex(indexName, hit.getScore(), hit.getHits()));
            }
        }
        return indicesPerHitLevel;
    }

}