org.apache.usergrid.persistence.geo.GeoIndexSearcher.java Source code

Introduction

Here is the source code for org.apache.usergrid.persistence.geo.GeoIndexSearcher.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.usergrid.persistence.geo;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.usergrid.persistence.EntityManager;
import org.apache.usergrid.persistence.IndexBucketLocator;
import org.apache.usergrid.persistence.IndexBucketLocator.IndexType;
import org.apache.usergrid.persistence.cassandra.CassandraService;
import org.apache.usergrid.persistence.cassandra.GeoIndexManager;
import org.apache.usergrid.persistence.cassandra.index.IndexMultiBucketSetLoader;
import org.apache.usergrid.persistence.geo.model.Point;
import org.apache.usergrid.persistence.geo.model.Tuple;

import org.apache.commons.lang.StringUtils;

import me.prettyprint.hector.api.beans.AbstractComposite.ComponentEquality;
import me.prettyprint.hector.api.beans.DynamicComposite;
import me.prettyprint.hector.api.beans.HColumn;

import static org.apache.usergrid.persistence.Schema.DICTIONARY_GEOCELL;
import static org.apache.usergrid.persistence.cassandra.ApplicationCF.ENTITY_INDEX;
import static org.apache.usergrid.persistence.cassandra.CassandraPersistenceUtils.key;
import static org.apache.usergrid.utils.CompositeUtils.setEqualityFlag;
import static org.apache.usergrid.persistence.cassandra.Serializers.*;

public abstract class GeoIndexSearcher {

    private static final Logger logger = LoggerFactory.getLogger(GeoIndexSearcher.class);

    private static final EntityLocationRefDistanceComparator COMP = new EntityLocationRefDistanceComparator();

    // The maximum *practical* geocell resolution.
    private static final int MAX_GEOCELL_RESOLUTION = GeoIndexManager.MAX_RESOLUTION;

    /** Max number of records to read+parse from cass per tile */
    private static final int MAX_FETCH_SIZE = 1000;

    protected final EntityManager em;
    protected final IndexBucketLocator locator;
    protected final CassandraService cass;

    public GeoIndexSearcher(EntityManager entityManager, IndexBucketLocator locator, CassandraService cass) {
        this.em = entityManager;
        this.locator = locator;
        this.cass = cass;
    }

    /**
     * Perform a search from the center. The corresponding entities returned must be >= minDistance(inclusive) and <
     * maxDistance (exclusive)
     *
     * @param maxResults The maximum number of results to include
     * @param minDistance The minimum distance (inclusive)
     * @param maxDistance The maximum distance (exclusive)
     * @param entityClass The entity class
     * @param baseQuery The base query
     * @param queryEngine The query engine to use
     * @param maxGeocellResolution The max resolution to use when searching
     */
    public final SearchResults proximitySearch(final EntityLocationRef minMatch, final List<String> geoCells,
            Point searchPoint, String propertyName, double minDistance, double maxDistance, final int maxResults)
            throws Exception {

        List<EntityLocationRef> entityLocations = new ArrayList<EntityLocationRef>(maxResults);

        List<String> curGeocells = new ArrayList<String>();
        String curContainingGeocell = null;

        // we have some cells used from last time, re-use them
        if (geoCells != null && geoCells.size() > 0) {
            curGeocells.addAll(geoCells);
            curContainingGeocell = geoCells.get(0);
        }
        // start at the bottom
        else {

            /*
             * The currently-being-searched geocells. NOTES: Start with max possible.
             * Must always be of the same resolution. Must always form a rectangular
             * region. One of these must be equal to the cur_containing_geocell.
             */
            curContainingGeocell = GeocellUtils.compute(searchPoint, MAX_GEOCELL_RESOLUTION);
            curGeocells.add(curContainingGeocell);
        }

        if (minMatch != null) {
            minMatch.calcDistance(searchPoint);
        }
        // Set of already searched cells
        Set<String> searchedCells = new HashSet<String>();

        List<String> curGeocellsUnique = null;

        double closestPossibleNextResultDist = 0;

        /*
         * Assumes both a and b are lists of (entity, dist) tuples, *sorted by
         * dist*. NOTE: This is an in-place merge, and there are guaranteed no
         * duplicates in the resulting list.
         */

        int noDirection[] = { 0, 0 };
        List<Tuple<int[], Double>> sortedEdgesDistances = Arrays.asList(new Tuple<int[], Double>(noDirection, 0d));
        boolean done = false;
        UUID lastReturned = null;

        while (!curGeocells.isEmpty() && entityLocations.size() < maxResults) {
            closestPossibleNextResultDist = sortedEdgesDistances.get(0).getSecond();
            if (maxDistance > 0 && closestPossibleNextResultDist > maxDistance) {
                break;
            }

            Set<String> curTempUnique = new HashSet<String>(curGeocells);
            curTempUnique.removeAll(searchedCells);
            curGeocellsUnique = new ArrayList<String>(curTempUnique);

            Set<HColumn<ByteBuffer, ByteBuffer>> queryResults = null;

            lastReturned = null;

            // we need to keep searching everything in our tiles until we don't get
            // any more results, then we'll have the closest points and can move on
            // do the next tiles
            do {
                queryResults = doSearch(curGeocellsUnique, lastReturned, searchPoint, propertyName, MAX_FETCH_SIZE);

                if (logger.isDebugEnabled()) {
                    logger.debug("fetch complete for: {}", StringUtils.join(curGeocellsUnique, ", "));
                }

                searchedCells.addAll(curGeocells);

                // Begin storing distance from the search result entity to the
                // search center along with the search result itself, in a tuple.

                // Merge new_results into results
                for (HColumn<ByteBuffer, ByteBuffer> column : queryResults) {

                    DynamicComposite composite = DynamicComposite.fromByteBuffer(column.getName());

                    UUID uuid = composite.get(0, ue);

                    lastReturned = uuid;

                    String type = composite.get(1, se);
                    UUID timestampUuid = composite.get(2, ue);
                    composite = DynamicComposite.fromByteBuffer(column.getValue());
                    Double latitude = composite.get(0, de);
                    Double longitude = composite.get(1, de);

                    EntityLocationRef entityLocation = new EntityLocationRef(type, uuid, timestampUuid, latitude,
                            longitude);

                    double distance = entityLocation.calcDistance(searchPoint);

                    // discard, it's too close or too far, of closer than the minimum we
                    // should match, skip it
                    if (distance < minDistance || (maxDistance != 0 && distance > maxDistance)
                            || (minMatch != null && COMP.compare(entityLocation, minMatch) <= 0)) {
                        continue;
                    }

                    int index = Collections.binarySearch(entityLocations, entityLocation, COMP);

                    // already in the index
                    if (index > -1) {
                        continue;
                    }

                    // set the insert index
                    index = (index + 1) * -1;

                    // no point in adding it
                    if (index >= maxResults) {
                        continue;
                    }

                    // results.add(index, entity);
                    // distances.add(index, distance);
                    entityLocations.add(index, entityLocation);

                    /**
                     * Discard an additional entries as we iterate to avoid holding them
                     * all in ram
                     */
                    while (entityLocations.size() > maxResults) {
                        entityLocations.remove(entityLocations.size() - 1);
                    }
                }
            } while (queryResults != null && queryResults.size() == MAX_FETCH_SIZE);

            /**
             * We've searched everything and have a full set, we want to return the
             * "current" tiles to search next time for the cursor, since cass could
             * contain more results
             */
            if (done || entityLocations.size() == maxResults) {
                break;
            }

            sortedEdgesDistances = GeocellUtils.distanceSortedEdges(curGeocells, searchPoint);

            if (queryResults.size() == 0 || curGeocells.size() == 4) {
                /*
                 * Either no results (in which case we optimize by not looking at
                 * adjacents, go straight to the parent) or we've searched 4 adjacent
                 * geocells, in which case we should now search the parents of those
                 * geocells.
                 */
                curContainingGeocell = curContainingGeocell.substring(0,
                        Math.max(curContainingGeocell.length() - 1, 0));
                if (curContainingGeocell.length() == 0) {
                    // final check - top level tiles
                    curGeocells.clear();
                    String[] items = "0123456789abcdef".split("(?!^)");
                    Collections.addAll(curGeocells, items);
                    done = true;
                } else {
                    List<String> oldCurGeocells = new ArrayList<String>(curGeocells);
                    curGeocells.clear();
                    for (String cell : oldCurGeocells) {
                        if (cell.length() > 0) {
                            String newCell = cell.substring(0, cell.length() - 1);
                            if (!curGeocells.contains(newCell)) {
                                curGeocells.add(newCell);
                            }
                        }
                    }
                }
            } else if (curGeocells.size() == 1) {
                // Get adjacent in one direction.
                // TODO(romannurik): Watch for +/- 90 degree latitude edge case
                // geocells.
                for (Tuple<int[], Double> sortedEdgesDistance : sortedEdgesDistances) {

                    int nearestEdge[] = sortedEdgesDistance.getFirst();
                    String edge = GeocellUtils.adjacent(curGeocells.get(0), nearestEdge);

                    // we're at the edge of the world, search in a different direction
                    if (edge == null) {
                        continue;
                    }

                    curGeocells.add(edge);
                    break;
                }
            } else if (curGeocells.size() == 2) {
                // Get adjacents in perpendicular direction.
                int nearestEdge[] = GeocellUtils
                        .distanceSortedEdges(Arrays.asList(curContainingGeocell), searchPoint).get(0).getFirst();
                int[] perpendicularNearestEdge = { 0, 0 };
                if (nearestEdge[0] == 0) {
                    // Was vertical, perpendicular is horizontal.
                    for (Tuple<int[], Double> edgeDistance : sortedEdgesDistances) {
                        if (edgeDistance.getFirst()[0] != 0) {
                            perpendicularNearestEdge = edgeDistance.getFirst();
                            break;
                        }
                    }
                } else {
                    // Was horizontal, perpendicular is vertical.
                    for (Tuple<int[], Double> edgeDistance : sortedEdgesDistances) {
                        if (edgeDistance.getFirst()[0] == 0) {
                            perpendicularNearestEdge = edgeDistance.getFirst();
                            break;
                        }
                    }
                }
                List<String> tempCells = new ArrayList<String>();
                for (String cell : curGeocells) {
                    tempCells.add(GeocellUtils.adjacent(cell, perpendicularNearestEdge));
                }
                curGeocells.addAll(tempCells);
            }

            logger.debug("{} results found.", entityLocations.size());
        }

        // now we have our final sets, construct the results

        return new SearchResults(entityLocations, curGeocells);
    }

    protected TreeSet<HColumn<ByteBuffer, ByteBuffer>> query(Object key, List<String> curGeocellsUnique,
            Point searchPoint, UUID startId, int count) throws Exception {

        List<Object> keys = new ArrayList<Object>();

        UUID appId = em.getApplicationRef().getUuid();

        for (String geoCell : curGeocellsUnique) {

            // add buckets for each geoCell

            for (String indexBucket : locator.getBuckets(appId, IndexType.GEO, geoCell)) {
                keys.add(key(key, DICTIONARY_GEOCELL, geoCell, indexBucket));
            }
        }

        DynamicComposite start = null;

        if (startId != null) {
            start = new DynamicComposite(startId);
            setEqualityFlag(start, ComponentEquality.GREATER_THAN_EQUAL);
        }

        TreeSet<HColumn<ByteBuffer, ByteBuffer>> columns = IndexMultiBucketSetLoader.load(cass, ENTITY_INDEX, appId,
                keys, start, null, count, false);

        return columns;
    }

    protected abstract TreeSet<HColumn<ByteBuffer, ByteBuffer>> doSearch(List<String> geoCells, UUID startId,
            Point searchPoint, String propertyName, int pageSize) throws Exception;

    public static class SearchResults {

        public final List<EntityLocationRef> entityLocations;
        public final List<String> lastSearchedGeoCells;

        /**
         * @param entityLocations
         * @param lastSearchedGeoCells
         */
        public SearchResults(List<EntityLocationRef> entityLocations, List<String> lastSearchedGeoCells) {
            this.entityLocations = entityLocations;
            this.lastSearchedGeoCells = lastSearchedGeoCells;
        }
    }
}