org.apache.lucene.spatial.prefix.PrefixTreeStrategy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.spatial.prefix.PrefixTreeStrategy.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;

/**
 * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
 * subclasses are {@link RecursivePrefixTreeStrategy} and {@link
 * TermQueryPrefixTreeStrategy}.  This strategy is most effective as a fast
 * approximate spatial search filter.
 * <p>
 * <b>Characteristics:</b>
 * <br>
 * <ul>
 * <li>Can index any shape; however only {@link RecursivePrefixTreeStrategy}
 * can effectively search non-point shapes.</li>
 * <li>Can index a variable number of shapes per field value. This strategy
 * can do it via multiple calls to {@link #createIndexableFields(org.locationtech.spatial4j.shape.Shape)}
 * for a document or by giving it some sort of Shape aggregate (e.g. JTS
 * WKT MultiPoint).  The shape's boundary is approximated to a grid precision.
 * </li>
 * <li>Can query with any shape.  The shape's boundary is approximated to a grid
 * precision.</li>
 * <li>Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects}
 * is supported.  If only points are indexed then this is effectively equivalent
 * to IsWithin.</li>
 * <li>The strategy supports {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point,double)}
 * even for multi-valued data, so long as the indexed data is all points; the
 * behavior is undefined otherwise.  However, <em>it will likely be removed in
 * the future</em> in lieu of using another strategy with a more scalable
 * implementation.  Use of this call is the only
 * circumstance in which a cache is used.  The cache is simple but as such
 * it doesn't scale to large numbers of points nor is it real-time-search
 * friendly.</li>
 * </ul>
 * <p>
 * <b>Implementation:</b>
 * <p>
 * The {@link SpatialPrefixTree} does most of the work, for example returning
 * a list of terms representing grids of various sizes for a supplied shape.
 * An important
 * configuration item is {@link #setDistErrPct(double)} which balances
 * shape precision against scalability.  See those javadocs.
 *
 * @lucene.experimental
 */
public abstract class PrefixTreeStrategy extends SpatialStrategy {
    protected final SpatialPrefixTree grid;
    private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<>();
    protected int defaultFieldValuesArrayLen = 2;
    protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
    protected boolean pointsOnly = false;//if true, there are no leaves

    public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName) {
        super(grid.getSpatialContext(), fieldName);
        this.grid = grid;
    }

    public SpatialPrefixTree getGrid() {
        return grid;
    }

    /**
     * A memory hint used by {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}
     * for how big the initial size of each Document's array should be. The
     * default is 2.  Set this to slightly more than the default expected number
     * of points per document.
     */
    public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) {
        this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;
    }

    public double getDistErrPct() {
        return distErrPct;
    }

    /**
     * The default measure of shape precision affecting shapes at index and query
     * times. Points don't use this as they are always indexed at the configured
     * maximum precision ({@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()});
     * this applies to all other shapes. Specific shapes at index and query time
     * can use something different than this default value.  If you don't set a
     * default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} --
     * 2.5%.
     *
     * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
     */
    public void setDistErrPct(double distErrPct) {
        this.distErrPct = distErrPct;
    }

    public boolean isPointsOnly() {
        return pointsOnly;
    }

    /** True if only indexed points shall be supported. There are no "leafs" in such a case, except those
     * at maximum precision. */
    public void setPointsOnly(boolean pointsOnly) {
        this.pointsOnly = pointsOnly;
    }

    @Override
    public Field[] createIndexableFields(Shape shape) {
        double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
        return createIndexableFields(shape, distErr);
    }

    /**
     * Turns {@link SpatialPrefixTree#getTreeCellIterator(Shape, int)} into a
     * {@link org.apache.lucene.analysis.TokenStream}.
     */
    public Field[] createIndexableFields(Shape shape, double distErr) {
        int detailLevel = grid.getLevelForDistance(distErr);
        return createIndexableFields(shape, detailLevel);
    }

    public Field[] createIndexableFields(Shape shape, int detailLevel) {
        //TODO re-use TokenStream LUCENE-5776: Subclass Field, put cell iterator there, override tokenStream()
        Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null);
        CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
        cellToBytesRefIterator.reset(cells);
        BytesRefIteratorTokenStream tokenStream = new BytesRefIteratorTokenStream();
        tokenStream.setBytesRefIterator(cellToBytesRefIterator);
        Field field = new Field(getFieldName(), tokenStream, FIELD_TYPE);
        return new Field[] { field };
    }

    public class ShapeTokenStream extends BytesRefIteratorTokenStream {

        public void setShape(Shape shape) {
            double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
            int detailLevel = grid.getLevelForDistance(distErr);
            Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null);
            CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
            cellToBytesRefIterator.reset(cells);
            setBytesRefIterator(cellToBytesRefIterator);
        }

    }

    public ShapeTokenStream tokenStream() {
        return new ShapeTokenStream();
    }

    protected CellToBytesRefIterator newCellToBytesRefIterator() {
        //subclasses could return one that never emits leaves, or does both, or who knows.
        return new CellToBytesRefIterator();
    }

    protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
        if (pointsOnly && !isPointShape(shape)) {
            throw new IllegalArgumentException(
                    "pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
        }
        return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator
    }

    /* Indexed, tokenized, not stored. */
    public static final FieldType FIELD_TYPE = new FieldType();

    static {
        FIELD_TYPE.setTokenized(true);
        FIELD_TYPE.setOmitNorms(true);
        FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
        FIELD_TYPE.freeze();
    }

    @Override
    public DoubleValuesSource makeDistanceValueSource(Point queryPoint, double multiplier) {
        PointPrefixTreeFieldCacheProvider p = provider.get(getFieldName());
        if (p == null) {
            synchronized (this) {//double checked locking idiom is okay since provider is threadsafe
                p = provider.get(getFieldName());
                if (p == null) {
                    p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen);
                    provider.put(getFieldName(), p);
                }
            }
        }

        return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint, multiplier);
    }

    /**
     * Computes spatial facets in two dimensions as a grid of numbers.  The data is often visualized as a so-called
     * "heatmap".
     *
     * @see HeatmapFacetCounter#calcFacets(PrefixTreeStrategy, IndexReaderContext, Bits, Shape, int, int)
     */
    public HeatmapFacetCounter.Heatmap calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape inputShape,
            final int facetLevel, int maxCells) throws IOException {
        return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
    }

    /**
     * Returns true if the {@code shape} is a {@link Point}.  For custom spatial contexts, it may make sense to
     * have certain other shapes return true.
     * @lucene.experimental
     */
    protected boolean isPointShape(Shape shape) {
        return shape instanceof Point;
    }
}