org.apache.lucene.search.PointRangeQuery.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.search.PointRangeQuery.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;

import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.FutureArrays;

/** 
 * Abstract class for range queries against single or multidimensional points such as
 * {@link IntPoint}.
 * <p>
 * This is for subclasses and works on the underlying binary encoding: to
 * create range queries for lucene's standard {@code Point} types, refer to factory
 * methods on those classes, e.g. {@link IntPoint#newRangeQuery IntPoint.newRangeQuery()} for 
 * fields indexed with {@link IntPoint}.
 * <p>
 * For a single-dimensional field this query is a simple range query; in a multi-dimensional field it's a box shape.
 * @see PointValues
 * @lucene.experimental
 */
public abstract class PointRangeQuery extends Query {
    final String field;
    final int numDims;
    final int bytesPerDim;
    final byte[] lowerPoint;
    final byte[] upperPoint;

    /** 
     * Expert: create a multidimensional range query for point values.
     *
     * @param field field name. must not be {@code null}.
     * @param lowerPoint lower portion of the range (inclusive).
     * @param upperPoint upper portion of the range (inclusive).
     * @param numDims number of dimensions.
     * @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length}
     */
    protected PointRangeQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) {
        checkArgs(field, lowerPoint, upperPoint);
        this.field = field;
        if (numDims <= 0) {
            throw new IllegalArgumentException("numDims must be positive, got " + numDims);
        }
        if (lowerPoint.length == 0) {
            throw new IllegalArgumentException("lowerPoint has length of zero");
        }
        if (lowerPoint.length % numDims != 0) {
            throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims");
        }
        if (lowerPoint.length != upperPoint.length) {
            throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length
                    + " but upperPoint has different length=" + upperPoint.length);
        }
        this.numDims = numDims;
        this.bytesPerDim = lowerPoint.length / numDims;

        this.lowerPoint = lowerPoint;
        this.upperPoint = upperPoint;
    }

    /** 
     * Check preconditions for all factory methods
     * @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null.
     */
    public static void checkArgs(String field, Object lowerPoint, Object upperPoint) {
        if (field == null) {
            throw new IllegalArgumentException("field must not be null");
        }
        if (lowerPoint == null) {
            throw new IllegalArgumentException("lowerPoint must not be null");
        }
        if (upperPoint == null) {
            throw new IllegalArgumentException("upperPoint must not be null");
        }
    }

    @Override
    public void visit(QueryVisitor visitor) {
        if (visitor.acceptField(field)) {
            visitor.visitLeaf(this);
        }
    }

    @Override
    public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

        // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
        // This is an inverted structure and should be used in the first pass:

        return new ConstantScoreWeight(this, boost) {

            private boolean matches(byte[] packedValue) {
                for (int dim = 0; dim < numDims; dim++) {
                    int offset = dim * bytesPerDim;
                    if (FutureArrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, lowerPoint, offset,
                            offset + bytesPerDim) < 0) {
                        // Doc's value is too low, in this dimension
                        return false;
                    }
                    if (FutureArrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, upperPoint, offset,
                            offset + bytesPerDim) > 0) {
                        // Doc's value is too high, in this dimension
                        return false;
                    }
                }
                return true;
            }

            private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {

                boolean crosses = false;

                for (int dim = 0; dim < numDims; dim++) {
                    int offset = dim * bytesPerDim;

                    if (FutureArrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim, upperPoint,
                            offset, offset + bytesPerDim) > 0
                            || FutureArrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim,
                                    lowerPoint, offset, offset + bytesPerDim) < 0) {
                        return Relation.CELL_OUTSIDE_QUERY;
                    }

                    crosses |= FutureArrays.compareUnsigned(minPackedValue, offset, offset + bytesPerDim,
                            lowerPoint, offset, offset + bytesPerDim) < 0
                            || FutureArrays.compareUnsigned(maxPackedValue, offset, offset + bytesPerDim,
                                    upperPoint, offset, offset + bytesPerDim) > 0;
                }

                if (crosses) {
                    return Relation.CELL_CROSSES_QUERY;
                } else {
                    return Relation.CELL_INSIDE_QUERY;
                }
            }

            private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result) {
                return new IntersectVisitor() {

                    DocIdSetBuilder.BulkAdder adder;

                    @Override
                    public void grow(int count) {
                        adder = result.grow(count);
                    }

                    @Override
                    public void visit(int docID) {
                        adder.add(docID);
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        if (matches(packedValue)) {
                            visit(docID);
                        }
                    }

                    @Override
                    public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
                        if (matches(packedValue)) {
                            int docID;
                            while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                visit(docID);
                            }
                        }
                    }

                    @Override
                    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                        return relate(minPackedValue, maxPackedValue);
                    }
                };
            }

            /**
             * Create a visitor that clears documents that do NOT match the range.
             */
            private IntersectVisitor getInverseIntersectVisitor(FixedBitSet result, int[] cost) {
                return new IntersectVisitor() {

                    @Override
                    public void visit(int docID) {
                        result.clear(docID);
                        cost[0]--;
                    }

                    @Override
                    public void visit(int docID, byte[] packedValue) {
                        if (matches(packedValue) == false) {
                            visit(docID);
                        }
                    }

                    @Override
                    public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
                        if (matches(packedValue) == false) {
                            int docID;
                            while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                visit(docID);
                            }
                        }
                    }

                    @Override
                    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                        Relation relation = relate(minPackedValue, maxPackedValue);
                        switch (relation) {
                        case CELL_INSIDE_QUERY:
                            // all points match, skip this subtree
                            return Relation.CELL_OUTSIDE_QUERY;
                        case CELL_OUTSIDE_QUERY:
                            // none of the points match, clear all documents
                            return Relation.CELL_INSIDE_QUERY;
                        default:
                            return relation;
                        }
                    }
                };
            }

            @Override
            public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
                LeafReader reader = context.reader();

                PointValues values = reader.getPointValues(field);
                if (values == null) {
                    // No docs in this segment/field indexed any points
                    return null;
                }

                if (values.getNumIndexDimensions() != numDims) {
                    throw new IllegalArgumentException(
                            "field=\"" + field + "\" was indexed with numIndexDimensions="
                                    + values.getNumIndexDimensions() + " but this query has numDims=" + numDims);
                }
                if (bytesPerDim != values.getBytesPerDimension()) {
                    throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim="
                            + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
                }

                boolean allDocsMatch;
                if (values.getDocCount() == reader.maxDoc()) {
                    final byte[] fieldPackedLower = values.getMinPackedValue();
                    final byte[] fieldPackedUpper = values.getMaxPackedValue();
                    allDocsMatch = true;
                    for (int i = 0; i < numDims; ++i) {
                        int offset = i * bytesPerDim;
                        if (FutureArrays.compareUnsigned(lowerPoint, offset, offset + bytesPerDim, fieldPackedLower,
                                offset, offset + bytesPerDim) > 0
                                || FutureArrays.compareUnsigned(upperPoint, offset, offset + bytesPerDim,
                                        fieldPackedUpper, offset, offset + bytesPerDim) < 0) {
                            allDocsMatch = false;
                            break;
                        }
                    }
                } else {
                    allDocsMatch = false;
                }

                final Weight weight = this;
                if (allDocsMatch) {
                    // all docs have a value and all points are within bounds, so everything matches
                    return new ScorerSupplier() {
                        @Override
                        public Scorer get(long leadCost) {
                            return new ConstantScoreScorer(weight, score(), scoreMode,
                                    DocIdSetIterator.all(reader.maxDoc()));
                        }

                        @Override
                        public long cost() {
                            return reader.maxDoc();
                        }
                    };
                } else {
                    return new ScorerSupplier() {

                        final DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
                        final IntersectVisitor visitor = getIntersectVisitor(result);
                        long cost = -1;

                        @Override
                        public Scorer get(long leadCost) throws IOException {
                            if (values.getDocCount() == reader.maxDoc() && values.getDocCount() == values.size()
                                    && cost() > reader.maxDoc() / 2) {
                                // If all docs have exactly one value and the cost is greater
                                // than half the leaf size then maybe we can make things faster
                                // by computing the set of documents that do NOT match the range
                                final FixedBitSet result = new FixedBitSet(reader.maxDoc());
                                result.set(0, reader.maxDoc());
                                int[] cost = new int[] { reader.maxDoc() };
                                values.intersect(getInverseIntersectVisitor(result, cost));
                                final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
                                return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
                            }

                            values.intersect(visitor);
                            DocIdSetIterator iterator = result.build().iterator();
                            return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
                        }

                        @Override
                        public long cost() {
                            if (cost == -1) {
                                // Computing the cost may be expensive, so only do it if necessary
                                cost = values.estimateDocCount(visitor);
                                assert cost >= 0;
                            }
                            return cost;
                        }
                    };
                }
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                ScorerSupplier scorerSupplier = scorerSupplier(context);
                if (scorerSupplier == null) {
                    return null;
                }
                return scorerSupplier.get(Long.MAX_VALUE);
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    }

    public String getField() {
        return field;
    }

    public int getNumDims() {
        return numDims;
    }

    public int getBytesPerDim() {
        return bytesPerDim;
    }

    public byte[] getLowerPoint() {
        return lowerPoint.clone();
    }

    public byte[] getUpperPoint() {
        return upperPoint.clone();
    }

    @Override
    public final int hashCode() {
        int hash = classHash();
        hash = 31 * hash + field.hashCode();
        hash = 31 * hash + Arrays.hashCode(lowerPoint);
        hash = 31 * hash + Arrays.hashCode(upperPoint);
        hash = 31 * hash + numDims;
        hash = 31 * hash + Objects.hashCode(bytesPerDim);
        return hash;
    }

    @Override
    public final boolean equals(Object o) {
        return sameClassAs(o) && equalsTo(getClass().cast(o));
    }

    private boolean equalsTo(PointRangeQuery other) {
        return Objects.equals(field, other.field) && numDims == other.numDims && bytesPerDim == other.bytesPerDim
                && Arrays.equals(lowerPoint, other.lowerPoint) && Arrays.equals(upperPoint, other.upperPoint);
    }

    @Override
    public final String toString(String field) {
        final StringBuilder sb = new StringBuilder();
        if (this.field.equals(field) == false) {
            sb.append(this.field);
            sb.append(':');
        }

        // print ourselves as "range per dimension"
        for (int i = 0; i < numDims; i++) {
            if (i > 0) {
                sb.append(',');
            }

            int startOffset = bytesPerDim * i;

            sb.append('[');
            sb.append(toString(i, ArrayUtil.copyOfSubArray(lowerPoint, startOffset, startOffset + bytesPerDim)));
            sb.append(" TO ");
            sb.append(toString(i, ArrayUtil.copyOfSubArray(upperPoint, startOffset, startOffset + bytesPerDim)));
            sb.append(']');
        }

        return sb.toString();
    }

    /**
     * Returns a string of a single value in a human-readable format for debugging.
     * This is used by {@link #toString()}.
     *
     * @param dimension dimension of the particular value
     * @param value single value, never null
     * @return human readable value for debugging
     */
    protected abstract String toString(int dimension, byte[] value);
}