org.bibsonomy.lucene.index.LuceneResourceIndex.java Source code

Java tutorial

Introduction

Here is the source code for org.bibsonomy.lucene.index.LuceneResourceIndex.java

Source

/**
 *
 *  BibSonomy-Lucene - A blue social bookmark and publication sharing system.
 *
 *  Copyright (C) 2006 - 2011 Knowledge & Data Engineering Group,
 *                            University of Kassel, Germany
 *                            http://www.kde.cs.uni-kassel.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package org.bibsonomy.lucene.index;

import static org.bibsonomy.lucene.util.LuceneBase.CFG_INDEX_ID_DELIMITER;
import static org.bibsonomy.lucene.util.LuceneBase.CFG_LUCENE_INDEX_PREFIX;
import static org.bibsonomy.lucene.util.LuceneBase.FLD_CONTENT_ID;
import static org.bibsonomy.lucene.util.LuceneBase.FLD_LAST_LOG_DATE;
import static org.bibsonomy.lucene.util.LuceneBase.FLD_LAST_TAS_ID;
import static org.bibsonomy.lucene.util.LuceneBase.FLD_USER_NAME;
import static org.bibsonomy.lucene.util.LuceneBase.getIndexBasePath;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.bibsonomy.lucene.param.LuceneIndexStatistics;
import org.bibsonomy.lucene.param.comparator.DocumentCacheComparator;
import org.bibsonomy.lucene.util.LuceneBase;
import org.bibsonomy.model.Resource;

/**
 * abstract base class for managing lucene resource indices
 * 
 * @author fei
 * @version $Id: LuceneResourceIndex.java,v 1.40 2011-05-28 12:27:35 nosebrain Exp $
 *
 * @param <R> the resource of the index
 */
public abstract class LuceneResourceIndex<R extends Resource> {
    protected static final Log log = LogFactory.getLog(LuceneResourceIndex.class);

    /** coding whether index is opened for writing or reading */
    public static enum AccessMode {
        /**
         * none
         */
        None,

        /**
         * read only
         */
        ReadOnly,

        /**
         * write only
         */
        WriteOnly;
    }

    /** indicating whether index is opened for writing or reading */
    private AccessMode accessMode;

    /** gives read only access to the lucene index */
    protected IndexReader indexReader;

    /** gives write access to the lucene index */
    protected IndexWriter indexWriter;

    /** path to the lucene index */
    private String luceneIndexPath;

    /** directory of the lucene index */
    private Directory indexDirectory;

    /** default field tokenizer */
    private Analyzer analyzer;

    /** list containing content ids of cached delete operations */
    private final List<Integer> contentIdsToDelete;

    /** list posts to insert into index */
    protected Set<Document> postsToInsert;

    /** 
     * set of usernames which where flagged as spammers since last update
     * which should be removed from index during next update (blocking new posts
     * to be inserted for given users) 
     */
    private final Set<String> usersToFlag;

    /** flag indicating whether the index should be optimized during next update */
    private boolean optimizeIndex;

    /** flag indicating whether the index was cleanly initialized */
    private boolean isReady = false;

    /** id for identifying redundant resource indeces */
    private int indexId;

    /** keeps track of the newest log_date during last index update */
    private Long lastLogDate;

    /** keeps track of the newest tas_id during last index update */
    private Integer lastTasId;

    /**
     * constructor disabled
     */
    protected LuceneResourceIndex(final int indexId) {
        // init data structures
        this.contentIdsToDelete = new LinkedList<Integer>();
        this.postsToInsert = new TreeSet<Document>(new DocumentCacheComparator());
        this.usersToFlag = new TreeSet<String>();
        this.optimizeIndex = false;

        this.indexId = indexId;

        try {
            init();
        } catch (final Exception e) {
            disableIndex();
        }
    }

    /**
     * Get Statistics for this index
     * @return LuceneIndexStatistics for this index
     */
    public LuceneIndexStatistics getStatistics() {
        final LuceneIndexStatistics statistics = new LuceneIndexStatistics();
        synchronized (this) {
            this.ensureReadAccess();

            statistics.setNumDocs(this.indexReader.numDocs());
            statistics.setNumDeletedDocs(this.indexReader.numDeletedDocs());
            statistics.setCurrentVersion(indexReader.getVersion());
            try {
                statistics.setCurrent(indexReader.isCurrent());
                statistics.setLastModified(IndexReader.lastModified(indexReader.directory()));
            } catch (final IOException e1) {
                log.error(e1);
            }
        }

        statistics.setNewestRecordDate(this.getLastLogDate());

        return statistics;
    }

    /** 
     * Close index-writer and index-reader and disable this index.
     * @throws CorruptIndexException 
     * @throws IOException 
     */
    public void close() throws CorruptIndexException, IOException {
        if (indexWriter != null) {
            this.indexWriter.close();
        }
        if (indexReader != null) {
            this.indexReader.close();
        }

        disableIndex();
    }

    /**
     * initialize internal data structures
     * @throws IOException 
     */
    private void init() throws IOException {
        LuceneBase.initRuntimeConfiguration();
        this.luceneIndexPath = getIndexBasePath() + CFG_LUCENE_INDEX_PREFIX + getResourceName()
                + CFG_INDEX_ID_DELIMITER + getIndexId();

        indexDirectory = FSDirectory.open(new File(luceneIndexPath));
        try {
            if (IndexWriter.isLocked(indexDirectory)) {
                log.error("WARNING: Index " + luceneIndexPath + " is locked - forcibly unlock the index.");
                IndexWriter.unlock(indexDirectory);
                log.error("OK. Index unlocked.");
            }
        } catch (final IOException e) {
            log.fatal("Failed to unlock the index - dying.");
            throw e;
        }

        try {
            openIndexReader();
        } catch (final IOException e) {
            log.error(
                    "Error opening IndexReader (" + e.getMessage() + ") - This is ok while creating a new index.");
            throw e;
        }

        // everything went fine - enable the index
        enableIndex();
    }

    /**
     * @return the latest log_date[ms] from index 
     */
    public long getLastLogDate() {
        // FIXME: this synchronisation is very inefficient 
        synchronized (this) {
            if (!isIndexEnabled()) {
                return Long.MAX_VALUE;
            } else if (this.lastLogDate != null) {
                return this.lastLogDate;
            }

            //----------------------------------------------------------------
            // search over all elements sort them reverse by date 
            // and return 1 top document (newest one)
            //----------------------------------------------------------------
            // get all documents
            final Query matchAll = new MatchAllDocsQuery();
            // sort by last_log_date of type LONG in reversed order 
            final Sort sort = new Sort(new SortField(FLD_LAST_LOG_DATE, SortField.LONG, true));

            final Document doc = searchIndex(matchAll, 1, sort);
            if (doc != null) {
                try {
                    // parse date
                    return Long.parseLong(doc.get(FLD_LAST_LOG_DATE));
                } catch (final NumberFormatException e) {
                    log.error("Error parsing last_log_date " + doc.get(FLD_LAST_LOG_DATE));
                }
            }

            return Long.MAX_VALUE;
        }
    }

    /**
     * set newest log_date[ms] 
     * @param lastLogDate the lastLogDate to set
     */
    public void setLastLogDate(final Long lastLogDate) {
        this.lastLogDate = lastLogDate;
    }

    /** 
     * @return the newest tas_id from index
     */
    public Integer getLastTasId() {
        synchronized (this) {
            if (!isIndexEnabled()) {
                return Integer.MAX_VALUE;
            } else if (this.lastTasId != null) {
                return this.lastTasId;
            }

            //----------------------------------------------------------------
            // search over all elements sort them reverse by last_tas_id
            // and return 1 top document (newest one)
            //----------------------------------------------------------------
            // get all documents
            final Query matchAll = new MatchAllDocsQuery();
            // order by last_tas_id of type INT in reversed order
            final Sort sort = new Sort(new SortField(FLD_LAST_TAS_ID, SortField.INT, true));

            Integer lastTasId = null;
            final Document doc = searchIndex(matchAll, 1, sort);
            if (doc != null) {
                try {
                    lastTasId = Integer.parseInt(doc.get(FLD_LAST_TAS_ID));
                } catch (final NumberFormatException e) {
                    log.error("Error parsing last_tas_id " + doc.get(FLD_LAST_TAS_ID));
                }
            }

            return lastTasId != null ? lastTasId : Integer.MAX_VALUE;
        }
    }

    /** 
     * @param lastTasId the lastTasId to set
     */
    public void setLastTasId(final Integer lastTasId) {
        this.lastTasId = lastTasId;
    }

    /**
     * triggers index optimization during next update
     */
    public void optimizeIndex() {
        synchronized (this) {
            this.optimizeIndex = true;
        }
    }

    /**
     * flag given user as spammer - preventing further posts to be inserted and
     * mark user's posts for deletion from index
     * 
     * @param username
     */
    public void flagUser(final String username) {
        synchronized (this) {
            this.usersToFlag.add(username);
        }
    }

    /**
     * unflag given user as spammer - enabling further posts to be inserted 
     * 
     * @param userName
     */
    public void unFlagUser(final String userName) {
        synchronized (this) {
            this.usersToFlag.remove(userName);
        }
    }

    /**
     * cache given post for deletion
     * 
     * @param contentId post's content id 
     */
    public void deleteDocumentForContentId(final Integer contentId) {
        synchronized (this) {
            this.contentIdsToDelete.add(contentId);
        }
    }

    /**
     * cache given posts for deletion
     * 
     * @param contentIdsToDelete list of content ids which should be removed from the index
     */
    public void deleteDocumentsInIndex(final List<Integer> contentIdsToDelete) {
        synchronized (this) {
            this.contentIdsToDelete.addAll(contentIdsToDelete);
        }
    }

    /**
     * cache given post for insertion
     * 
     * @param doc post document to insert into the index
     */
    public void insertDocument(final Document doc) {
        synchronized (this) {
            this.postsToInsert.add(doc);
        }
    }

    /**
     * cache given post for insertion
     * 
     * @param docs post documents to insert into the index
     */
    public void insertDocuments(final List<Document> docs) {
        synchronized (this) {
            this.postsToInsert.addAll(docs);
        }
    }

    /**
     * perform all cached operations to index
     */
    public void flush() {
        synchronized (this) {
            if (!isIndexEnabled()) {
                return;
            }

            boolean readUpdate = false;
            boolean writeUpdate = false;
            //----------------------------------------------------------------
            // remove cached posts from index
            //----------------------------------------------------------------
            log.debug("Performing " + contentIdsToDelete.size() + " delete operations");
            if ((contentIdsToDelete.size() > 0) || (usersToFlag.size() > 0)) {
                this.ensureReadAccess();

                // remove each cached post from index
                for (final Integer contentId : this.contentIdsToDelete) {
                    try {
                        this.purgeDocumentForContentId(contentId);
                        log.debug("deleted post " + contentId);
                    } catch (final IOException e) {
                        log.error("Error deleting post " + contentId + " from index", e);
                    }
                }

                // remove spam posts form index
                for (final String userName : this.usersToFlag) {
                    try {
                        final int cnt = purgeDocumentsForUser(userName);
                        log.debug("Purged " + cnt + " posts for user " + userName);
                    } catch (final IOException e) {
                        log.error("Error deleting spam posts for user " + userName + " from index", e);
                    }
                }

                readUpdate = true;
            }

            //----------------------------------------------------------------
            // add cached posts to index
            //----------------------------------------------------------------
            log.debug("Performing " + postsToInsert.size() + " insert operations");
            if (this.postsToInsert.size() > 0) {
                this.ensureWriteAccess();
                try {
                    this.insertRecordsIntoIndex(postsToInsert);
                } catch (final IOException e) {
                    log.error("Error adding posts to index.", e);
                }
                writeUpdate = true;
            }

            //----------------------------------------------------------------
            // clear all cached data
            //----------------------------------------------------------------
            this.postsToInsert.clear();
            this.contentIdsToDelete.clear();
            this.usersToFlag.clear();

            //----------------------------------------------------------------
            // commit reader-changes 
            //----------------------------------------------------------------
            // FIXME: this is a bit ugly...
            if (readUpdate && !writeUpdate) {
                try {
                    closeIndexReader();
                    openIndexReader();
                } catch (final IOException e) {
                    log.error("Error commiting index update.", e);
                }
            } else {
                ensureReadAccess();
            }
        }
    }

    /**
     * closes all writer and reader and reopens the index reader
     */
    public void reset() {
        synchronized (this) {
            if (!isIndexEnabled()) {
                try {
                    init();
                } catch (final Exception e) {
                    return;
                }
            }
            switch (this.accessMode) {
            case ReadOnly:
                accessMode = AccessMode.None;
                try {
                    closeIndexReader();
                } catch (final IOException e) {
                    log.error("IOException while closing index reader", e);
                }
                try {
                    openIndexReader();
                } catch (final IOException e) {
                    log.error("Error opening index reader", e);
                }
                break;
            case WriteOnly:
                accessMode = AccessMode.None;
                try {
                    closeIndexWriter();
                } catch (final IOException e) {
                    log.error("IOException while closing index reader", e);
                }
                try {
                    openIndexWriter();
                } catch (final IOException e) {
                    log.error("Error opening index reader", e);
                }
                break;
            default:
                // nothing to do
            }

            // delete the lists
            this.postsToInsert.clear();
            this.contentIdsToDelete.clear();
            this.usersToFlag.clear();

            // reset the cached query parameters
            this.lastLogDate = null;
            this.lastTasId = null;
        }
    }

    //------------------------------------------------------------------------
    // private index access interface
    //------------------------------------------------------------------------
    /**
     * write given post into the index
     * 
     * @param post
     * @throws CorruptIndexException
     * @throws IOException
     */
    private void insertRecordIntoIndex(final Document post) throws CorruptIndexException, IOException {
        if (!this.usersToFlag.contains(post.get(FLD_USER_NAME))) {
            // skip users which where flagged as spammers
            indexWriter.addDocument(post);
        }
    }

    /**
     * write given post into the index
     * 
     * @param post
     * @throws CorruptIndexException
     * @throws IOException
     */
    private void insertRecordsIntoIndex(final Collection<Document> posts)
            throws CorruptIndexException, IOException {
        for (final Document post : posts) {
            this.insertRecordIntoIndex(post);
        }
    }

    /**
     * query the index
     * 
     * @param searchQuery the search query
     * @param hitsPerPage maximal number of result items to retrieve
     * @param ordering sort ordering
     * @return
     */
    private Document searchIndex(final Query searchQuery, final int hitsPerPage, final Sort ordering) {
        // prepare the index searcher
        this.ensureReadAccess();
        final IndexSearcher searcher = new IndexSearcher(indexReader);

        // query the index
        try {
            final TopDocs topDocs = searcher.search(searchQuery, null, hitsPerPage, ordering);
            if (topDocs.totalHits > 0) {
                return searcher.doc(topDocs.scoreDocs[0].doc);
            }
        } catch (final Exception e) {
            log.error("Error performing index search in file " + this.luceneIndexPath, e);
        } finally {
            try {
                searcher.close();
            } catch (final IOException e) {
                log.error("Error closing index " + this.luceneIndexPath + " for searching", e);
            }
        }

        return null;
    }

    /**
     * removes given post from index
     * 
     * @param contentId post's content id 
     * @return number of posts deleted from index
     * 
     * @throws StaleReaderException
     * @throws CorruptIndexException
     * @throws LockObtainFailedException
     * @throws IOException
     */
    private int purgeDocumentForContentId(final Integer contentId)
            throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
        final Term term = new Term(FLD_CONTENT_ID, contentId.toString());
        return purgeDocuments(term);
    }

    /**
     * delete all documents of a given user from index
     * 
     * @param username
     * @return
     * @throws CorruptIndexException
     * @throws IOException
     */
    private int purgeDocumentsForUser(final String username) throws CorruptIndexException, IOException {
        // delete each post owned by given user
        final Term term = new Term(FLD_USER_NAME, username);
        return purgeDocuments(term);
    }

    /**
     * remove posts matching to given search term from index
     * 
     * @param searchTerm
     * @return
     * @throws CorruptIndexException
     * @throws IOException
     */
    private int purgeDocuments(final Term searchTerm) throws CorruptIndexException, IOException {
        return this.indexReader.deleteDocuments(searchTerm);
    }

    /**
     * sets access mode to read-only
     */
    protected void ensureReadAccess() {
        //--------------------------------------------------------------------
        // open index for reading
        //--------------------------------------------------------------------
        // close IndexWriter
        if (accessMode != AccessMode.ReadOnly) {
            try {
                closeIndexWriter();
            } catch (final IOException e) {
                log.error("IOException while closing indexwriter", e);
            }
            accessMode = AccessMode.None;
            try {
                openIndexReader();
            } catch (final IOException e) {
                log.error("Error opening index reader", e);
            }
        }
    }

    protected void openIndexWriter() throws CorruptIndexException, LockObtainFailedException, IOException {
        log.debug("Opening index " + luceneIndexPath + " for writing");
        indexWriter = new IndexWriter(indexDirectory, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
        accessMode = AccessMode.WriteOnly;
    }

    protected void closeIndexWriter() throws CorruptIndexException, IOException {
        if (this.indexWriter == null) {
            return;
        }

        log.debug("Closing index " + luceneIndexPath + " for writing");
        indexWriter.commit();
        // optimize index if requested
        if (this.optimizeIndex) {
            log.debug("optimizing index " + luceneIndexPath);
            indexWriter.optimize();
            log.debug("optimizing index " + luceneIndexPath + " DONE");
            this.optimizeIndex = false;
        }
        // close index for writing
        indexWriter.close();
    }

    protected void openIndexReader() throws CorruptIndexException, IOException {
        log.debug("Opening index " + luceneIndexPath + " for reading");
        this.indexReader = IndexReader.open(indexDirectory, false);
        this.accessMode = AccessMode.ReadOnly;
    }

    protected void closeIndexReader() throws IOException {
        log.debug("Closing index " + luceneIndexPath + " for reading");
        indexReader.close();
    }

    /**
     * sets access mode to write-only
     */
    protected void ensureWriteAccess() {
        //--------------------------------------------------------------------
        // open index for reading
        //--------------------------------------------------------------------
        // close IndexWriter
        if (this.accessMode != AccessMode.WriteOnly) {
            try {
                closeIndexReader();
            } catch (final IOException e) {
                log.error("IOException while closing index reader", e);
            }
            this.accessMode = AccessMode.None;
            try {
                openIndexWriter();
            } catch (final IOException e) {
                log.error("Error opening index writer", e);
            }
        }
    }

    /**
     * disable this index when open fails
     */
    public void disableIndex() {
        this.isReady = false;
    }

    /**
     * enable this index
     */
    public void enableIndex() {
        this.isReady = true;
    }

    /**
     * checks, whether the index is readily initialized
     * @return true, if index is ready - false, otherwise
     */
    public boolean isIndexEnabled() {
        return this.isReady;
    }

    /**
     * get managed resource type
     */
    protected abstract Class<? extends Resource> getResourceType();

    /**
     * @return the managed resource name
     */
    public String getResourceName() {
        String name = getResourceType().getCanonicalName();
        if (name.lastIndexOf('.') > 0) {
            name = name.substring(name.lastIndexOf('.') + 1);
        }

        return name;
    }

    /**
     * @return the postsToInsert
     */
    public Set<Document> getPostsToInsert() {
        return this.postsToInsert;
    }

    /**
     * @return the usersToFlag
     */
    public Set<String> getUsersToFlag() {
        return usersToFlag;
    }

    /**
     * @return the analyzer
     */
    public Analyzer getAnalyzer() {
        return analyzer;
    }

    /**
     * @param analyzer the analyzer to set
     */
    public void setAnalyzer(final Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    /**
     * @return the indexId
     */
    public int getIndexId() {
        return indexId;
    }

    /**
     * @param indexId the indexId to set
     */
    public void setIndexId(final int indexId) {
        this.indexId = indexId;
    }
}