dk.statsbiblioteket.netark.dvenabler.wrapper.DVAtomicReader.java Source code

Java tutorial

Introduction

Here is the source code for dk.statsbiblioteket.netark.dvenabler.wrapper.DVAtomicReader.java

Source

/*
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
package dk.statsbiblioteket.netark.dvenabler.wrapper;

import dk.statsbiblioteket.netark.dvenabler.DVConfig;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.lucene.index.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;

import java.io.IOException;
import java.util.*;

/**
 * Wraps a given AtomicReader and exposes the stored values in the stated fields as DocValues.
 */
public class DVAtomicReader extends FilterAtomicReader {
    private static Log log = LogFactory.getLog(DVAtomicReader.class);

    private final Map<String, DVConfig> dvConfigs;
    private final HashMap<String, Bits> dvContent = new HashMap<>();
    private final long constructionTime = System.nanoTime();

    @Override
    public FieldInfos getFieldInfos() {
        log.info("Merging getFieldInfos called with " + maxDoc() + " docs");
        long startTime = System.nanoTime();
        FieldInfos original = super.getFieldInfos();
        FieldInfo[] modified = new FieldInfo[original.size()];
        int index = 0;
        for (FieldInfo oInfo : original) {
            modified[index++] = dvConfigs.containsKey(oInfo.name) ? dvConfigs.get(oInfo.name).getFieldInfo()
                    : oInfo;
        }
        /*FieldInfo mInfo = new FieldInfo(
                oInfo.name, oInfo.isIndexed(), oInfo.number, oInfo.hasVectors(),
                oInfo.omitsNorms(), oInfo.hasPayloads(), oInfo.getIndexOptions(),
                mDocValuesType, oInfo.getNormType(), oInfo.attributes());        */
        log.info("Merged " + original.size() + " original and " + dvConfigs.size() + " tweaked FieldInfos for "
                + maxDoc() + " docs in " + (System.nanoTime() - startTime) / 1000000 + "ms");
        return new FieldInfos(modified);
    }
    /*
        private FieldType.NumericType inferNumericType(String field) {
    try {
        Terms terms = fields().terms(field);
        if (terms == null) {
            return null;
        }
        TermsEnum termsEnum = terms.iterator(null);
        BytesRef val;
        if ((val = termsEnum.next()) == null) {
            return null;
        }
        //System.out.println("Got value for field " + field + ": " + val);
    } catch (IOException e) {
        log.warn("IOException while trying to infer NumericType for field " + field, e);
    }
    return null;
        }
       */

    /**
     * Creates an adjusting reader; removing or/and adding DocValues for the specified fields.
     * @param innerReader the reader to wrap.
     * @param dvConfigs a list of fields to adjust.
     *                 Fields in the innerReader not specified in dvConfigs are passed unmodified.
     */
    public DVAtomicReader(AtomicReader innerReader, Set<DVConfig> dvConfigs) {
        super(innerReader);
        this.dvConfigs = new HashMap<>(dvConfigs.size());
        for (DVConfig dvConfig : dvConfigs) {
            this.dvConfigs.put(dvConfig.getName(), dvConfig);
        }
        log.info("Wrapped AtomicReader with " + maxDoc() + " docs and " + dvConfigs.size() + " field adjustments");
    }

    // Should have been named docsWithDocValueEntriesForField
    // Creates a bitmap of the documents that has stored values and should have DocValues
    @Override
    public synchronized Bits getDocsWithField(final String field) throws IOException {
        if (!dvConfigs.containsKey(field)) {
            return super.getDocsWithField(field);
        }

        if (!dvContent.containsKey(field)) {
            log.info("Resolving docsWithField(" + field + ")");
            long startTime = System.nanoTime();
            DVConfig dvConfig = dvConfigs.get(field);
            if (!dvConfig.hasDocValues()) {
                dvContent.put(field, null);
            } else {
                OpenBitSet hasContent = new OpenBitSet(maxDoc());
                final Set<String> FIELDS = new HashSet<>(Arrays.asList(field));
                for (int docID = 0; docID < maxDoc(); docID++) {
                    if (document(docID, FIELDS).getField(field) != null) {
                        hasContent.fastSet(docID);
                    }
                }
                dvContent.put(field, hasContent);
            }
            log.info("Resolved docsWithField(" + field + ") for " + maxDoc() + " docs in "
                    + (System.nanoTime() - startTime) / 1000000 + "ms");
        }
        return dvContent.get(field);
    }

    @Override
    public NumericDocValues getNumericDocValues(String field) throws IOException {
        log.debug("getNumericDocValues(" + field + ") called");
        if (!dvConfigs.containsKey(field)) {
            return super.getNumericDocValues(field);
        } else if (!dvConfigs.get(field).hasDocValues()) {
            return null;
        }
        NumericDocValues dv = super.getNumericDocValues(field);
        if (dv != null) {
            log.info(
                    "getNumericDocValues called for field '" + field + "'. DV already present, returning directly");
            return dv;
        }
        log.info("getNumericDocValues called for field '" + field + "' with no DV. Constructing from stored");
        // TODO: Infer whether this is long, int, double or float
        long startTime = System.nanoTime();
        NumericDocValues dvs = new NumericDocValuesWrapper(this, dvConfigs.get(field));
        log.info("getNumericDocValues(" + field + ") for " + maxDoc() + " docs prepared in "
                + (System.nanoTime() - startTime) / 1000000 + "ms");
        return dvs;
    }

    @Override
    public BinaryDocValues getBinaryDocValues(String field) throws IOException {
        log.debug("getBinaryDocValues(" + field + ") called");
        if (!dvConfigs.containsKey(field)) {
            return super.getBinaryDocValues(field);
        } else if (!dvConfigs.get(field).hasDocValues()) {
            return null;
        }
        // TODO: Implement this
        BinaryDocValues dv = super.getBinaryDocValues(field);
        if (dv != null) {
            log.info("getBinaryDocValues called for field '" + field + "'. DV already present, returning directly");
            return dv;
        }
        log.warn("getBinaryDocValues called for field '" + field + "' with no DV. Not implemented yet!");
        // TODO: Implement this
        return null;
    }

    @Override
    public SortedDocValues getSortedDocValues(String field) throws IOException {
        log.debug("getSortedDocValues(" + field + ") called");
        if (!dvConfigs.containsKey(field)) {
            return super.getSortedDocValues(field);
        } else if (!dvConfigs.get(field).hasDocValues()) {
            return null;
        }
        SortedDocValues dv = super.getSortedDocValues(field);
        if (dv != null) {
            log.info("getSortedDocValues called for field '" + field + "'. DV already present, returning directly");
            return dv;
        }
        log.info("getSortedDocValues called for field '" + field + "' with no DV. Constructing from stored");
        long startTime = System.nanoTime();
        SortedDocValues dvs = new SortedDocValuesWrapper(this, dvConfigs.get(field));
        log.info("getSortedDocValues(" + field + ") for " + maxDoc() + " docs prepared in "
                + (System.nanoTime() - startTime) / 1000000 + "ms");
        return dvs;
    }

    @Override
    public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
        log.debug("getSortedSetDocValues(" + field + ") called");
        if (!dvConfigs.containsKey(field)) {
            return super.getSortedSetDocValues(field);
        } else if (!dvConfigs.get(field).hasDocValues()) {
            return null;
        }
        SortedSetDocValues dv = super.getSortedSetDocValues(field);
        if (dv != null) {
            log.info("getSortedSetDocValues called for field '" + field
                    + "'. DV already present, returning directly");
            return dv;
        }
        log.info("getSortedSetDocValues called for field '" + field + "' with no DV. Constructing from stored");
        long startTime = System.nanoTime();
        SortedSetDocValues dvs = new SortedSetDocValuesWrapper(this, dvConfigs.get(field));
        log.info("getSortedSetDocValues(" + field + ") for " + maxDoc() + " docs prepared in "
                + (System.nanoTime() - startTime) / 1000000 + "ms");
        return dvs;
    }

    @Override
    protected void doClose() throws IOException {
        log.info("Close called " + (System.nanoTime() - constructionTime) / 1000000 + "ms after construction");
        super.doClose();
    }
}