com.browseengine.bobo.api.BoboIndexReader.java Source code

Java tutorial

Introduction

Here is the source code for com.browseengine.bobo.api.BoboIndexReader.java

Source

/**
 * Bobo Browse Engine - High performance faceted/parametric search implementation 
 * that handles various types of semi-structured data.  Written in Java.
 * 
 * Copyright (C) 2005-2006  John Wang
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * To contact the project administrators for the bobo-browse project, 
 * please go to https://sourceforge.net/projects/bobo-browse/, or 
 * send mail to owner@browseengine.com.
 */

package com.browseengine.bobo.api;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.ReaderUtil;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.context.support.GenericApplicationContext;

import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.facets.RuntimeFacetHandlerFactory;

/**
 * bobo browse index reader
 * 
 */
public class BoboIndexReader extends FilterIndexReader {
    private static final String SPRING_CONFIG = "bobo.spring";
    private static Logger logger = Logger.getLogger(BoboIndexReader.class);

    protected Map<String, FacetHandler<?>> _facetHandlerMap;

    protected Collection<FacetHandler<?>> _facetHandlers;
    protected Collection<RuntimeFacetHandlerFactory<?, ?>> _runtimeFacetHandlerFactories;
    protected Map<String, RuntimeFacetHandlerFactory<?, ?>> _runtimeFacetHandlerFactoryMap;

    protected WorkArea _workArea;

    protected IndexReader _srcReader;
    protected BoboIndexReader[] _subReaders = null;
    protected int[] _starts = null;
    private Directory _dir = null;

    private final Map<String, Object> _facetDataMap = new HashMap<String, Object>();
    private final ThreadLocal<Map<String, Object>> _runtimeFacetDataMap = new ThreadLocal<Map<String, Object>>() {
        protected Map<String, Object> initialValue() {
            return new HashMap<String, Object>();
        }
    };

    /**
     * Constructor
     * 
     * @param reader
     *          Index reader
     * @throws IOException
     */
    public static BoboIndexReader getInstance(IndexReader reader) throws IOException {
        return BoboIndexReader.getInstance(reader, null, null, new WorkArea());
    }

    public static BoboIndexReader getInstance(IndexReader reader, WorkArea workArea) throws IOException {
        return BoboIndexReader.getInstance(reader, null, null, workArea);
    }

    /**
     * Constructor.
     * 
     * @param reader
     *          index reader
     * @param facetHandlers
     *          List of facet handlers
     * @throws IOException
     */
    public static BoboIndexReader getInstance(IndexReader reader, Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories) throws IOException {
        return BoboIndexReader.getInstance(reader, facetHandlers, facetHandlerFactories, new WorkArea());
    }

    public static BoboIndexReader getInstance(IndexReader reader, Collection<FacetHandler<?>> facetHandlers)
            throws IOException {
        return BoboIndexReader.getInstance(reader, facetHandlers, Collections.EMPTY_LIST, new WorkArea());
    }

    public static BoboIndexReader getInstance(IndexReader reader, Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories, WorkArea workArea)
            throws IOException {
        BoboIndexReader boboReader = new BoboIndexReader(reader, facetHandlers, facetHandlerFactories, workArea);
        boboReader.facetInit();
        return boboReader;
    }

    public static BoboIndexReader getInstanceAsSubReader(IndexReader reader) throws IOException {
        return getInstanceAsSubReader(reader, null, null, new WorkArea());
    }

    public static BoboIndexReader getInstanceAsSubReader(IndexReader reader,
            Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories) throws IOException {
        return getInstanceAsSubReader(reader, facetHandlers, facetHandlerFactories, new WorkArea());
    }

    public static BoboIndexReader getInstanceAsSubReader(IndexReader reader,
            Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories, WorkArea workArea)
            throws IOException {
        BoboIndexReader boboReader = new BoboIndexReader(reader, facetHandlers, facetHandlerFactories, workArea,
                false);
        boboReader.facetInit();
        return boboReader;
    }

    @Override
    public long getVersion() {
        try {
            SegmentInfos sinfos = new SegmentInfos();
            sinfos.read(_dir);
            return sinfos.getVersion();
        } catch (Exception e) {
            return 0L;
        }
    }

    public IndexReader getInnerReader() {
        return in;
    }

    @Override
    public synchronized IndexReader reopen() throws CorruptIndexException, IOException {
        IndexReader newInner = null;

        SegmentInfos sinfos = new SegmentInfos();
        sinfos.read(_dir);
        int size = sinfos.size();

        if (in instanceof MultiReader) {
            // setup current reader list
            List<IndexReader> boboReaderList = new LinkedList<IndexReader>();
            ReaderUtil.gatherSubReaders((List<IndexReader>) boboReaderList, in);
            Map<String, BoboIndexReader> readerMap = new HashMap<String, BoboIndexReader>();
            for (IndexReader reader : boboReaderList) {
                BoboIndexReader boboReader = (BoboIndexReader) reader;
                SegmentReader sreader = (SegmentReader) (boboReader.in);
                readerMap.put(sreader.getSegmentName(), boboReader);
            }

            ArrayList<BoboIndexReader> currentReaders = new ArrayList<BoboIndexReader>(size);
            boolean isNewReader = false;
            for (int i = 0; i < size; ++i) {
                SegmentInfo sinfo = (SegmentInfo) sinfos.info(i);
                BoboIndexReader breader = readerMap.remove(sinfo.name);
                if (breader != null) {
                    // should use SegmentReader.reopen
                    // TODO: see LUCENE-2559
                    BoboIndexReader newReader = (BoboIndexReader) breader.reopen(true);
                    if (newReader != breader) {
                        isNewReader = true;
                    }
                    if (newReader != null) {
                        currentReaders.add(newReader);
                    }
                } else {
                    isNewReader = true;
                    SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
                    breader = BoboIndexReader.getInstanceAsSubReader(newSreader, this._facetHandlers,
                            this._runtimeFacetHandlerFactories);
                    breader._dir = _dir;
                    currentReaders.add(breader);
                }
            }
            isNewReader = isNewReader || (readerMap.size() != 0);
            if (!isNewReader) {
                return this;
            } else {
                MultiReader newMreader = new MultiReader(
                        currentReaders.toArray(new BoboIndexReader[currentReaders.size()]), false);
                BoboIndexReader newReader = BoboIndexReader.getInstanceAsSubReader(newMreader, this._facetHandlers,
                        this._runtimeFacetHandlerFactories);
                newReader._dir = _dir;
                return newReader;
            }
        } else if (in instanceof SegmentReader) {
            // should use SegmentReader.reopen
            // TODO: see LUCENE-2559

            SegmentReader sreader = (SegmentReader) in;
            int numDels = sreader.numDeletedDocs();

            SegmentInfo sinfo = null;
            boolean sameSeg = false;
            //get SegmentInfo instance
            for (int i = 0; i < size; ++i) {
                SegmentInfo sinfoTmp = (SegmentInfo) sinfos.info(i);
                if (sinfoTmp.name.equals(sreader.getSegmentName())) {
                    int numDels2 = sinfoTmp.getDelCount();
                    sameSeg = numDels == numDels2;
                    sinfo = sinfoTmp;
                    break;
                }
            }

            if (sinfo == null) {
                // segment no longer exists
                return null;
            }
            if (sameSeg) {
                return this;
            } else {
                SegmentReader newSreader = SegmentReader.get(true, sinfo, 1);
                return BoboIndexReader.getInstanceAsSubReader(newSreader, this._facetHandlers,
                        this._runtimeFacetHandlerFactories);
            }
        } else {
            // should not reach here, a catch-all default case
            IndexReader reader = in.reopen(true);
            if (in != reader) {
                return BoboIndexReader.getInstance(newInner, _facetHandlers, _runtimeFacetHandlerFactories,
                        _workArea);
            } else {
                return this;
            }
        }
    }

    @Override
    public synchronized IndexReader reopen(boolean openReadOnly) throws CorruptIndexException, IOException {

        // bobo readers are always readonly 
        return reopen();
    }

    public Object getFacetData(String name) {
        return _facetDataMap.get(name);
    }

    public Object putFacetData(String name, Object data) {
        return _facetDataMap.put(name, data);
    }

    public Object getRuntimeFacetData(String name) {
        Map<String, Object> map = _runtimeFacetDataMap.get();
        if (map == null)
            return null;

        return map.get(name);
    }

    public Object putRuntimeFacetData(String name, Object data) {
        Map<String, Object> map = _runtimeFacetDataMap.get();
        if (map == null) {
            map = new HashMap<String, Object>();
            _runtimeFacetDataMap.set(map);
        }
        return map.put(name, data);
    }

    public void clearRuntimeFacetData() {
        _runtimeFacetDataMap.set(null);
    }

    @Override
    protected void doClose() throws IOException {
        _facetDataMap.clear();
        if (_srcReader != null)
            _srcReader.close();
        super.doClose();
    }

    @Override
    protected void doCommit(Map commitUserData) throws IOException {
        if (_srcReader != null)
            _srcReader.flush(commitUserData);
    }

    @Override
    protected void doDelete(int n) throws CorruptIndexException, IOException {
        if (_srcReader != null)
            _srcReader.deleteDocument(n);
    }

    private void loadFacetHandler(String name, Set<String> loaded, Set<String> visited, WorkArea workArea)
            throws IOException {
        FacetHandler<?> facetHandler = _facetHandlerMap.get(name);
        if (facetHandler != null && !loaded.contains(name)) {
            visited.add(name);
            Set<String> dependsOn = facetHandler.getDependsOn();
            if (dependsOn.size() > 0) {
                Iterator<String> iter = dependsOn.iterator();
                while (iter.hasNext()) {
                    String f = iter.next();
                    if (name.equals(f))
                        continue;
                    if (!loaded.contains(f)) {
                        if (visited.contains(f)) {
                            throw new IOException("Facet handler dependency cycle detected, facet handler: " + name
                                    + " not loaded");
                        }
                        loadFacetHandler(f, loaded, visited, workArea);
                    }
                    if (!loaded.contains(f)) {
                        throw new IOException("unable to load facet handler: " + f);
                    }
                    facetHandler.putDependedFacetHandler(_facetHandlerMap.get(f));
                }
            }

            long start = System.currentTimeMillis();
            facetHandler.loadFacetData(this, workArea);
            long end = System.currentTimeMillis();
            if (logger.isDebugEnabled()) {
                StringBuffer buf = new StringBuffer();
                buf.append("facetHandler loaded: ").append(name).append(", took: ").append(end - start)
                        .append(" ms");
                logger.debug(buf.toString());
            }
            loaded.add(name);
        }
    }

    private void loadFacetHandlers(WorkArea workArea, Set<String> toBeRemoved) throws IOException {
        Set<String> loaded = new HashSet<String>();
        Set<String> visited = new HashSet<String>();

        for (String name : _facetHandlerMap.keySet()) {
            loadFacetHandler(name, loaded, visited, workArea);
        }

        for (String name : toBeRemoved) {
            _facetHandlerMap.remove(name);
        }
    }

    /**
     * Find all the leaf sub-readers and wrap each in BoboIndexReader.
     * @param reader
     * @param workArea
     * @return
     * @throws IOException
     */
    private static IndexReader[] createSubReaders(IndexReader reader, WorkArea workArea) throws IOException {
        List<IndexReader> readerList = new ArrayList<IndexReader>();
        ReaderUtil.gatherSubReaders(readerList, reader);
        IndexReader[] subReaders = (IndexReader[]) readerList.toArray(new IndexReader[readerList.size()]);
        BoboIndexReader[] boboReaders;

        if (subReaders != null && subReaders.length > 0) {
            boboReaders = new BoboIndexReader[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                boboReaders[i] = new BoboIndexReader(subReaders[i], null, null, workArea, false);
            }
        } else {
            boboReaders = new BoboIndexReader[] { new BoboIndexReader(reader, null, null, workArea, false) };
        }
        return boboReaders;
    }

    @Override
    public Directory directory() {
        return (_subReaders != null ? _subReaders[0].directory() : super.directory());
    }

    private static Collection<FacetHandler<?>> loadFromIndex(File file, WorkArea workArea) throws IOException {
        // File springFile = new File(file, SPRING_CONFIG);
        // FileSystemXmlApplicationContext appCtx =
        //   new FileSystemXmlApplicationContext("file:" + springFile.getAbsolutePath());
        //return (Collection<FacetHandler<?>>) appCtx.getBean("handlers");

        Set<Entry<Class<?>, Object>> entries = workArea.map.entrySet();
        FileSystemXmlApplicationContext appCtx = new FileSystemXmlApplicationContext();
        for (Entry<Class<?>, Object> entry : entries) {
            Object obj = entry.getValue();
            if (obj instanceof ClassLoader) {
                appCtx.setClassLoader((ClassLoader) obj);
                break;
            }
        }

        String absolutePath = file.getAbsolutePath();
        String partOne = absolutePath.substring(0, absolutePath.lastIndexOf(File.separator));
        String partTwo = URLEncoder.encode(absolutePath.substring(absolutePath.lastIndexOf(File.separator) + 1),
                "UTF-8");
        absolutePath = partOne + File.separator + partTwo;

        File springFile = new File(new File(absolutePath), SPRING_CONFIG);
        appCtx.setConfigLocation("file:" + springFile.getAbsolutePath());
        appCtx.refresh();

        return (Collection<FacetHandler<?>>) appCtx.getBean("handlers");

    }

    protected void initialize(Collection<FacetHandler<?>> facetHandlers) throws IOException {
        if (facetHandlers == null) // try to load from index
        {
            Directory idxDir = directory();
            if (idxDir != null && idxDir instanceof FSDirectory) {
                FSDirectory fsDir = (FSDirectory) idxDir;
                File file = fsDir.getFile();

                if (new File(file, SPRING_CONFIG).exists()) {
                    facetHandlers = loadFromIndex(file, _workArea);
                } else {
                    facetHandlers = new ArrayList<FacetHandler<?>>();
                }
            } else {
                facetHandlers = new ArrayList<FacetHandler<?>>();
            }
        }

        _facetHandlers = facetHandlers;
        _facetHandlerMap = new HashMap<String, FacetHandler<?>>();
        for (FacetHandler<?> facetHandler : facetHandlers) {
            _facetHandlerMap.put(facetHandler.getName(), facetHandler);
        }
    }

    /**
     * 
     * @param reader
     * @param facetHandlers
     * @param facetHandlerFactories
     * @param workArea
     * @throws IOException
     */
    protected BoboIndexReader(IndexReader reader, Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories, WorkArea workArea)
            throws IOException {
        this(reader, facetHandlers, facetHandlerFactories, workArea, true);
        _srcReader = reader;
    }

    /**
     * @param reader
     * @param facetHandlers
     * @param facetHandlerFactories
     * @param workArea
     * @param useSubReaders true => we create a MultiReader of all the leaf sub-readers as
     * the inner reader. false => we use the given reader as the inner reader.
     * @throws IOException
     */
    protected BoboIndexReader(IndexReader reader, Collection<FacetHandler<?>> facetHandlers,
            Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories, WorkArea workArea,
            boolean useSubReaders) throws IOException {
        super(useSubReaders ? new MultiReader(createSubReaders(reader, workArea), false) : reader);
        if (useSubReaders) {
            _dir = reader.directory();
            BoboIndexReader[] subReaders = (BoboIndexReader[]) in.getSequentialSubReaders();
            if (subReaders != null && subReaders.length > 0) {
                _subReaders = subReaders;

                int maxDoc = 0;
                _starts = new int[_subReaders.length + 1];
                for (int i = 0; i < _subReaders.length; i++) {
                    _subReaders[i]._dir = _dir;
                    if (facetHandlers != null)
                        _subReaders[i].setFacetHandlers(facetHandlers);
                    _starts[i] = maxDoc;
                    maxDoc += _subReaders[i].maxDoc();
                }
                _starts[_subReaders.length] = maxDoc;
            }
        }
        _runtimeFacetHandlerFactories = facetHandlerFactories;
        _runtimeFacetHandlerFactoryMap = new HashMap<String, RuntimeFacetHandlerFactory<?, ?>>();
        if (_runtimeFacetHandlerFactories != null) {
            for (RuntimeFacetHandlerFactory<?, ?> factory : _runtimeFacetHandlerFactories) {
                _runtimeFacetHandlerFactoryMap.put(factory.getName(), factory);
            }
        }
        _facetHandlers = facetHandlers;
        _workArea = workArea;
    }

    protected void facetInit() throws IOException {
        facetInit(new HashSet<String>());
    }

    protected void facetInit(Set<String> toBeRemoved) throws IOException {
        initialize(_facetHandlers);
        if (_subReaders == null) {
            loadFacetHandlers(_workArea, toBeRemoved);
        } else {
            for (BoboIndexReader r : _subReaders) {
                r.facetInit(toBeRemoved);
            }

            for (String name : toBeRemoved) {
                _facetHandlerMap.remove(name);
            }
        }
    }

    protected void setFacetHandlers(Collection<FacetHandler<?>> facetHandlers) {
        _facetHandlers = facetHandlers;
    }

    /**
     * @deprecated use {@link org.apache.lucene.search.MatchAllDocsQuery} instead.
     * @return query that matches all docs in the index
     */
    public Query getFastMatchAllDocsQuery() {
        return new MatchAllDocsQuery();
    }

    /**
     * Utility method to dump out all fields (name and terms) for a given index.
     * 
     * @param outFile
     *          File to dump to.
     * @throws IOException
     */
    public void dumpFields(File outFile) throws IOException {
        FileWriter writer = null;
        try {
            writer = new FileWriter(outFile);
            PrintWriter out = new PrintWriter(writer);
            Set<String> fieldNames = getFacetNames();
            for (String fieldName : fieldNames) {
                TermEnum te = terms(new Term(fieldName, ""));
                out.write(fieldName + ":\n");
                while (te.next()) {
                    Term term = te.term();
                    if (!fieldName.equals(term.field())) {
                        break;
                    }
                    out.write(term.text() + "\n");
                }
                out.write("\n\n");
            }
        } finally {
            if (writer != null) {
                writer.close();
            }
        }
    }

    /**
     * Gets all the facet field names
     * 
     * @return Set of facet field names
     */
    public Set<String> getFacetNames() {
        return _facetHandlerMap.keySet();
    }

    /**
     * Gets a facet handler
     * 
     * @param fieldname
     *          name
     * @return facet handler
     */
    public FacetHandler<?> getFacetHandler(String fieldname) {
        return _facetHandlerMap.get(fieldname);
    }

    @Override
    public IndexReader[] getSequentialSubReaders() {
        return _subReaders;
    }

    /**
       * Gets the facet handler map
       * 
       * @return facet handler map
       */
    public Map<String, FacetHandler<?>> getFacetHandlerMap() {
        return _facetHandlerMap;
    }

    /**
     * @return the map of RuntimeFacetHandlerFactories
     */
    public Map<String, RuntimeFacetHandlerFactory<?, ?>> getRuntimeFacetHandlerFactoryMap() {
        return _runtimeFacetHandlerFactoryMap;
    }

    @Override
    public Document document(int docid) throws IOException {
        if (_subReaders != null) {
            int readerIndex = readerIndex(docid, _starts, _subReaders.length);
            BoboIndexReader subReader = _subReaders[readerIndex];
            return subReader.document(docid - _starts[readerIndex]);
        } else {
            Document doc = super.document(docid);
            Collection<FacetHandler<?>> facetHandlers = _facetHandlerMap.values();
            for (FacetHandler<?> facetHandler : facetHandlers) {
                String[] vals = facetHandler.getFieldValues(this, docid);
                if (vals != null) {
                    String[] values = doc.getValues(facetHandler.getName());
                    Set<String> storedVals = new HashSet<String>(Arrays.asList(values));

                    for (String val : vals) {
                        storedVals.add(val);
                    }
                    doc.removeField(facetHandler.getName());

                    for (String val : storedVals) {
                        doc.add(new Field(facetHandler.getName(), val, Field.Store.NO, Field.Index.NOT_ANALYZED));
                    }
                }
            }
            return doc;
        }
    }

    private static int readerIndex(int n, int[] starts, int numSubReaders) {
        int lo = 0;
        int hi = numSubReaders - 1;

        while (hi >= lo) {
            int mid = (lo + hi) >>> 1;
            int midValue = starts[mid];
            if (n < midValue)
                hi = mid - 1;
            else if (n > midValue)
                lo = mid + 1;
            else {
                while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
                    mid++;
                }
                return mid;
            }
        }
        return hi;
    }

    /**
     * Work area for loading
     */
    public static class WorkArea {
        HashMap<Class<?>, Object> map = new HashMap<Class<?>, Object>();

        @SuppressWarnings("unchecked")
        public <T> T get(Class<T> cls) {
            T obj = (T) map.get(cls);
            return obj;
        }

        public void put(Object obj) {
            map.put(obj.getClass(), obj);
        }

        public void clear() {
            map.clear();
        }

        @Override
        public String toString() {
            return map.toString();
        }
    }

    private BoboIndexReader(IndexReader in) {
        super(in);
    }

    public BoboIndexReader copy(IndexReader in) {
        if (_subReaders != null) {
            throw new IllegalStateException("this BoboIndexReader has subreaders");
        }
        BoboIndexReader copy = new BoboIndexReader(in);
        copy._facetHandlerMap = this._facetHandlerMap;
        copy._facetHandlers = this._facetHandlers;
        copy._runtimeFacetHandlerFactories = this._runtimeFacetHandlerFactories;
        copy._runtimeFacetHandlerFactoryMap = this._runtimeFacetHandlerFactoryMap;
        copy._workArea = this._workArea;
        copy._facetDataMap.putAll(this._facetDataMap);
        copy._srcReader = in;
        copy._starts = this._starts;
        return copy;
    }
}