org.apache.lucene.search.SortedSetSelector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.search.SortedSetSelector.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;

import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;

/** Selects a value from the document's set to use as the representative value */
public class SortedSetSelector {

    /** 
     * Type of selection to perform.
     * <p>
     * Limitations:
     * <ul>
     *   <li>Fields containing {@link Integer#MAX_VALUE} or more unique values
     *       are unsupported.
     *   <li>Selectors other than ({@link Type#MIN}) require 
     *       optional codec support. However several codecs provided by Lucene, 
     *       including the current default codec, support this.
     * </ul>
     */
    public enum Type {
        /** 
         * Selects the minimum value in the set 
         */
        MIN,
        /** 
         * Selects the maximum value in the set 
         */
        MAX,
        /** 
         * Selects the middle value in the set.
         * <p>
         * If the set has an even number of values, the lower of the middle two is chosen.
         */
        MIDDLE_MIN,
        /** 
         * Selects the middle value in the set.
         * <p>
         * If the set has an even number of values, the higher of the middle two is chosen
         */
        MIDDLE_MAX
    }

    /** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
    public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
        if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
            throw new UnsupportedOperationException(
                    "fields containing more than " + (Integer.MAX_VALUE - 1) + " unique terms are unsupported");
        }

        SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
        if (singleton != null) {
            // it's actually single-valued in practice, but indexed as multi-valued,
            // so just sort on the underlying single-valued dv directly.
            // regardless of selector type, this optimization is safe!
            return singleton;
        } else {
            switch (selector) {
            case MIN:
                return new MinValue(sortedSet);
            case MAX:
                return new MaxValue(sortedSet);
            case MIDDLE_MIN:
                return new MiddleMinValue(sortedSet);
            case MIDDLE_MAX:
                return new MiddleMaxValue(sortedSet);
            default:
                throw new AssertionError();
            }
        }
    }

    /** Wraps a SortedSetDocValues and returns the first ordinal (min) */
    static class MinValue extends SortedDocValues {
        final SortedSetDocValues in;
        private int ord;

        MinValue(SortedSetDocValues in) {
            this.in = in;
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public int nextDoc() throws IOException {
            in.nextDoc();
            setOrd();
            return docID();
        }

        @Override
        public int advance(int target) throws IOException {
            in.advance(target);
            setOrd();
            return docID();
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            if (in.advanceExact(target)) {
                setOrd();
                return true;
            }
            return false;
        }

        @Override
        public long cost() {
            return in.cost();
        }

        @Override
        public int ordValue() {
            return ord;
        }

        @Override
        public BytesRef lookupOrd(int ord) throws IOException {
            return in.lookupOrd(ord);
        }

        @Override
        public int getValueCount() {
            return (int) in.getValueCount();
        }

        @Override
        public int lookupTerm(BytesRef key) throws IOException {
            return (int) in.lookupTerm(key);
        }

        private void setOrd() throws IOException {
            if (docID() != NO_MORE_DOCS) {
                ord = (int) in.nextOrd();
            } else {
                ord = (int) NO_MORE_ORDS;
            }
        }
    }

    /** Wraps a SortedSetDocValues and returns the last ordinal (max) */
    static class MaxValue extends SortedDocValues {
        final SortedSetDocValues in;
        private int ord;

        MaxValue(SortedSetDocValues in) {
            this.in = in;
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public int nextDoc() throws IOException {
            in.nextDoc();
            setOrd();
            return docID();
        }

        @Override
        public int advance(int target) throws IOException {
            in.advance(target);
            setOrd();
            return docID();
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            if (in.advanceExact(target)) {
                setOrd();
                return true;
            }
            return false;
        }

        @Override
        public long cost() {
            return in.cost();
        }

        @Override
        public int ordValue() {
            return ord;
        }

        @Override
        public BytesRef lookupOrd(int ord) throws IOException {
            return in.lookupOrd(ord);
        }

        @Override
        public int getValueCount() {
            return (int) in.getValueCount();
        }

        @Override
        public int lookupTerm(BytesRef key) throws IOException {
            return (int) in.lookupTerm(key);
        }

        private void setOrd() throws IOException {
            if (docID() != NO_MORE_DOCS) {
                while (true) {
                    long nextOrd = in.nextOrd();
                    if (nextOrd == NO_MORE_ORDS) {
                        break;
                    }
                    ord = (int) nextOrd;
                }
            } else {
                ord = (int) NO_MORE_ORDS;
            }
        }
    }

    /** Wraps a SortedSetDocValues and returns the middle ordinal (or min of the two) */
    static class MiddleMinValue extends SortedDocValues {
        final SortedSetDocValues in;
        private int ord;
        private int[] ords = new int[8];

        MiddleMinValue(SortedSetDocValues in) {
            this.in = in;
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public int nextDoc() throws IOException {
            in.nextDoc();
            setOrd();
            return docID();
        }

        @Override
        public int advance(int target) throws IOException {
            in.advance(target);
            setOrd();
            return docID();
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            if (in.advanceExact(target)) {
                setOrd();
                return true;
            }
            return false;
        }

        @Override
        public long cost() {
            return in.cost();
        }

        @Override
        public int ordValue() {
            return ord;
        }

        @Override
        public BytesRef lookupOrd(int ord) throws IOException {
            return in.lookupOrd(ord);
        }

        @Override
        public int getValueCount() {
            return (int) in.getValueCount();
        }

        @Override
        public int lookupTerm(BytesRef key) throws IOException {
            return (int) in.lookupTerm(key);
        }

        private void setOrd() throws IOException {
            if (docID() != NO_MORE_DOCS) {
                int upto = 0;
                while (true) {
                    long nextOrd = in.nextOrd();
                    if (nextOrd == NO_MORE_ORDS) {
                        break;
                    }
                    if (upto == ords.length) {
                        ords = ArrayUtil.grow(ords);
                    }
                    ords[upto++] = (int) nextOrd;
                }

                if (upto == 0) {
                    // iterator should not have returned this docID if it has no ords:
                    assert false;
                    ord = (int) NO_MORE_ORDS;
                } else {
                    ord = ords[(upto - 1) >>> 1];
                }
            } else {
                ord = (int) NO_MORE_ORDS;
            }
        }
    }

    /** Wraps a SortedSetDocValues and returns the middle ordinal (or max of the two) */
    static class MiddleMaxValue extends SortedDocValues {
        final SortedSetDocValues in;
        private int ord;
        private int[] ords = new int[8];

        MiddleMaxValue(SortedSetDocValues in) {
            this.in = in;
        }

        @Override
        public int docID() {
            return in.docID();
        }

        @Override
        public int nextDoc() throws IOException {
            in.nextDoc();
            setOrd();
            return docID();
        }

        @Override
        public int advance(int target) throws IOException {
            in.advance(target);
            setOrd();
            return docID();
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
            if (in.advanceExact(target)) {
                setOrd();
                return true;
            }
            return false;
        }

        @Override
        public long cost() {
            return in.cost();
        }

        @Override
        public int ordValue() {
            return ord;
        }

        @Override
        public BytesRef lookupOrd(int ord) throws IOException {
            return in.lookupOrd(ord);
        }

        @Override
        public int getValueCount() {
            return (int) in.getValueCount();
        }

        @Override
        public int lookupTerm(BytesRef key) throws IOException {
            return (int) in.lookupTerm(key);
        }

        private void setOrd() throws IOException {
            if (docID() != NO_MORE_DOCS) {
                int upto = 0;
                while (true) {
                    long nextOrd = in.nextOrd();
                    if (nextOrd == NO_MORE_ORDS) {
                        break;
                    }
                    if (upto == ords.length) {
                        ords = ArrayUtil.grow(ords);
                    }
                    ords[upto++] = (int) nextOrd;
                }

                if (upto == 0) {
                    // iterator should not have returned this docID if it has no ords:
                    assert false;
                    ord = (int) NO_MORE_ORDS;
                } else {
                    ord = ords[upto >>> 1];
                }
            } else {
                ord = (int) NO_MORE_ORDS;
            }
        }
    }
}