org.apache.jackrabbit.core.query.lucene.TermDocsCache.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jackrabbit.core.query.lucene.TermDocsCache.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.io.IOException;
import java.util.Map;
import java.util.Collections;
import java.util.BitSet;
import java.util.Arrays;
import java.util.Iterator;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.commons.collections.map.LRUMap;
import org.apache.commons.collections.map.LinkedMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * <code>TermDocsCache</code> implements a cache for frequently read
 * {@link TermDocs}.
 */
public class TermDocsCache {

    /**
     * The logger instance for this class.
     */
    private static final Logger log = LoggerFactory.getLogger(TermDocsCache.class);

    /**
     * The default cache size.
     */
    private static final int CACHE_SIZE = 10;

    /**
     * The underlying index reader.
     */
    private final IndexReader reader;

    /**
     * Only TermDocs for the given <code>field</code> are cached.
     */
    private final String field;

    /**
     * Map of {@link Term#text()} that are unknown to the underlying index.
     */
    private final Map unknownValues = Collections.synchronizedMap(new LRUMap(100));

    /**
     * The cache of the {@link #CACHE_SIZE} most frequently requested TermDocs.
     * Maps term text <code>String</code> to {@link CacheEntry}.
     */
    private final LinkedMap cache = new LinkedMap();

    /**
     * Creates a new cache for the given <code>reader</code> and
     * <code>field</code>.
     *
     * @param reader the index reader.
     * @param field the field name of the terms to potentially cache.
     */
    public TermDocsCache(IndexReader reader, String field) {
        this.reader = reader;
        this.field = field;
    }

    /**
     * Returns the {@link TermDocs} for the given term.
     *
     * @param t the term.
     * @return the term docs for the given term.
     * @throws IOException if an error occurs while reading from the index.
     */
    public TermDocs termDocs(final Term t) throws IOException {
        if (t.field() != field) {
            return reader.termDocs(t);
        }

        String text = t.text();
        if (unknownValues.get(text) != null) {
            log.debug("EmptyTermDocs({},{})", field, text);
            return EmptyTermDocs.INSTANCE;
        }

        // maintain cache
        CacheEntry entry;
        synchronized (cache) {
            entry = (CacheEntry) cache.get(text);
            if (entry == null) {
                // check space
                if (cache.size() >= CACHE_SIZE) {
                    // prune half of them and adjust the rest
                    CacheEntry[] entries = (CacheEntry[]) cache.values().toArray(new CacheEntry[cache.size()]);
                    Arrays.sort(entries);
                    int threshold = entries[CACHE_SIZE / 2].numAccessed;
                    for (Iterator it = cache.entrySet().iterator(); it.hasNext();) {
                        Map.Entry e = (Map.Entry) it.next();
                        if (((CacheEntry) e.getValue()).numAccessed <= threshold) {
                            // prune
                            it.remove();
                        } else {
                            // adjust
                            CacheEntry ce = (CacheEntry) e.getValue();
                            ce.numAccessed = (int) Math.sqrt(ce.numAccessed);
                        }
                    }
                }
                entry = new CacheEntry();
                cache.put(text, entry);
            } else {
                entry.numAccessed++;
            }
        }

        // this is a threshold to prevent caching of TermDocs
        // that are read only irregularly.
        if (entry.numAccessed < 10) {
            if (log.isDebugEnabled()) {
                log.debug("#{} TermDocs({},{})", new Object[] { new Integer(entry.numAccessed), field, text });
            }
            return reader.termDocs(t);
        }

        if (entry.bits == null) {
            // collect bits
            BitSet bits = null;
            TermDocs tDocs = reader.termDocs(t);
            try {
                while (tDocs.next()) {
                    if (bits == null) {
                        bits = new BitSet(reader.maxDoc());
                    }
                    bits.set(tDocs.doc());
                }
            } finally {
                tDocs.close();
            }
            if (bits != null) {
                entry.bits = bits;
            }
        }

        if (entry.bits == null) {
            // none collected
            unknownValues.put(text, text);
            return EmptyTermDocs.INSTANCE;
        } else {
            if (log.isDebugEnabled()) {
                log.debug("CachedTermDocs({},{},{}/{})", new Object[] { field, text,
                        new Integer(entry.bits.cardinality()), new Integer(reader.maxDoc()) });
            }
            return new CachedTermDocs(entry.bits);
        }
    }

    /**
     * Implements a {@link TermDocs} base on a {@link BitSet}.
     */
    private static final class CachedTermDocs implements TermDocs {

        /**
         * The cached docs for this term.
         */
        private final BitSet docs;

        /**
         * The current position into the {@link #docs}.
         */
        private int position = -1;

        /**
         * <code>true</code> if there are potentially more docs.
         */
        private boolean moreDocs = true;

        public CachedTermDocs(BitSet docs) {
            this.docs = docs;
        }

        /**
         * @throws UnsupportedOperationException always.
         */
        public void seek(Term term) throws IOException {
            throw new UnsupportedOperationException();
        }

        /**
         * @throws UnsupportedOperationException always.
         */
        public void seek(TermEnum termEnum) throws IOException {
            throw new UnsupportedOperationException();
        }

        /**
         * {@inheritDoc}
         */
        public int doc() {
            return position;
        }

        /**
         * {@inheritDoc}
         */
        public int freq() {
            return 1;
        }

        /**
         * {@inheritDoc}
         */
        public boolean next() throws IOException {
            if (moreDocs) {
                position = docs.nextSetBit(position + 1);
                moreDocs = position != -1;
            }
            return moreDocs;
        }

        /**
         * {@inheritDoc}
         */
        public int read(int[] docs, int[] freqs) throws IOException {
            int count;
            for (count = 0; count < docs.length && next(); count++) {
                docs[count] = doc();
                freqs[count] = 1;
            }
            return count;
        }

        /**
         * {@inheritDoc}
         */
        public boolean skipTo(int target) throws IOException {
            if (moreDocs) {
                position = docs.nextSetBit(target);
                moreDocs = position != -1;
            }
            return moreDocs;
        }

        /**
         * {@inheritDoc}
         */
        public void close() throws IOException {
        }
    }

    private static final class CacheEntry implements Comparable {

        private volatile int numAccessed = 1;

        private volatile BitSet bits;

        public int compareTo(Object o) {
            CacheEntry other = (CacheEntry) o;
            return (numAccessed < other.numAccessed ? -1 : (numAccessed == other.numAccessed ? 0 : 1));
        }
    }
}