org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState.java Source code

Introduction

Here is the source code for org.apache.lucene.facet.sortedset.DefaultSortedSetDocValuesReaderState.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet.sortedset;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;

/**
 * Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original
 * {@link IndexReader} passed to the constructor is not closed whenever you use this class!
 */
public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState {

    private final String field;
    private final int valueCount;

    /** {@link IndexReader} passed to the constructor. */
    public final IndexReader reader;

    private final Map<String, OrdinalMap> cachedOrdMaps = new HashMap<>();

    private final Map<String, OrdRange> prefixToOrdRange = new HashMap<>();

    /** Creates this, pulling doc values from the default {@link
     *  FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */
    public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException {
        this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
    }

    /** Creates this, pulling doc values from the specified
     *  field. */
    public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
        this.field = field;
        this.reader = reader;

        // We need this to create thread-safe MultiSortedSetDV
        // per collector:
        SortedSetDocValues dv = getDocValues();
        if (dv == null) {
            throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
        }
        if (dv.getValueCount() > Integer.MAX_VALUE) {
            throw new IllegalArgumentException(
                    "can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
        }
        valueCount = (int) dv.getValueCount();

        // TODO: we can make this more efficient if eg we can be
        // "involved" when OrdinalMap is being created?  Ie see
        // each term/ord it's assigning as it goes...
        String lastDim = null;
        int startOrd = -1;

        // TODO: this approach can work for full hierarchy?;
        // TaxoReader can't do this since ords are not in
        // "sorted order" ... but we should generalize this to
        // support arbitrary hierarchy:
        for (int ord = 0; ord < valueCount; ord++) {
            final BytesRef term = dv.lookupOrd(ord);
            String[] components = FacetsConfig.stringToPath(term.utf8ToString());
            if (components.length != 2) {
                throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: "
                        + Arrays.toString(components) + " " + term.utf8ToString());
            }
            if (!components[0].equals(lastDim)) {
                if (lastDim != null) {
                    prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord - 1));
                }
                startOrd = ord;
                lastDim = components[0];
            }
        }

        if (lastDim != null) {
            prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount - 1));
        }
    }

    /**
     * Return the memory usage of this object in bytes. Negative values are illegal.
     */
    @Override
    public long ramBytesUsed() {
        synchronized (cachedOrdMaps) {
            long bytes = 0;
            for (OrdinalMap map : cachedOrdMaps.values()) {
                bytes += map.ramBytesUsed();
            }

            return bytes;
        }
    }

    /**
     * Returns nested resources of this class. 
     * The result should be a point-in-time snapshot (to avoid race conditions).
     * @see Accountables
     */
    @Override
    public Collection<Accountable> getChildResources() {
        synchronized (cachedOrdMaps) {
            return Accountables.namedAccountables("DefaultSortedSetDocValuesReaderState", cachedOrdMaps);
        }
    }

    @Override
    public String toString() {
        return "DefaultSortedSetDocValuesReaderState(field=" + field + " reader=" + reader + ")";
    }

    /** Return top-level doc values. */
    @Override
    public SortedSetDocValues getDocValues() throws IOException {
        // TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share?
        OrdinalMap map = null;
        // TODO: why are we lazy about this?  It's better if ctor pays the cost, not first query?  Oh, but we
        // call this method from ctor, ok.  Also, we only ever store one entry in the map (for key=field) so
        // why are we using a map?
        synchronized (cachedOrdMaps) {
            map = cachedOrdMaps.get(field);
            if (map == null) {
                // uncached, or not a multi dv
                SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field);
                if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
                    map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
                    IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
                    if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
                        cachedOrdMaps.put(field, map);
                    }
                }
                return dv;
            }
        }

        assert map != null;
        int size = reader.leaves().size();
        final SortedSetDocValues[] values = new SortedSetDocValues[size];
        final int[] starts = new int[size + 1];
        long cost = 0;
        for (int i = 0; i < size; i++) {
            LeafReaderContext context = reader.leaves().get(i);
            final LeafReader reader = context.reader();
            final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
            if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) {
                return null;
            }
            SortedSetDocValues v = reader.getSortedSetDocValues(field);
            if (v == null) {
                v = DocValues.emptySortedSet();
            }
            values[i] = v;
            starts[i] = context.docBase;
            cost += v.cost();
        }
        starts[size] = reader.maxDoc();
        return new MultiSortedSetDocValues(values, starts, map, cost);
    }

    /** Returns mapping from prefix to {@link OrdRange}. */
    @Override
    public Map<String, OrdRange> getPrefixToOrdRange() {
        return prefixToOrdRange;
    }

    /** Returns the {@link OrdRange} for this dimension. */
    @Override
    public OrdRange getOrdRange(String dim) {
        return prefixToOrdRange.get(dim);
    }

    /** Indexed field we are reading. */
    @Override
    public String getField() {
        return field;
    }

    @Override
    public IndexReader getReader() {
        return reader;
    }

    /** Number of unique labels. */
    @Override
    public int getSize() {
        return valueCount;
    }

}