de.nava.informa.search.ChannelIndexer.java Source code

Introduction

Here is the source code for de.nava.informa.search.ChannelIndexer.java
Source

//
// Informa -- RSS Library for Java
// Copyright (c) 2002 by Niko Schmuck
//
// Niko Schmuck
// http://sourceforge.net/projects/informa
// mailto:niko_schmuck@users.sourceforge.net
//
// This library is free software.
//
// You may redistribute it and/or modify it under the terms of the GNU
// Lesser General Public License as published by the Free Software Foundation.
//
// Version 2.1 of the license should be included with this distribution in
// the file LICENSE. If the license is not included with this distribution,
// you may find a copy at the FSF web site at 'www.gnu.org' or 'www.fsf.org',
// or you may write to the Free Software Foundation, 675 Mass Ave, Cambridge, 
// MA 02139 USA.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied waranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//

// $Id: ChannelIndexer.java,v 1.8 2006/12/04 23:43:28 italobb Exp $

package de.nava.informa.search;

import de.nava.informa.core.ChannelIF;
import de.nava.informa.core.ItemIF;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;

import java.util.ArrayList;
import java.util.Collection;

/**
 * Utility class which allows to generate respective maintain a
 * fulltext index for news items.
 *
 * @author Niko Schmuck (niko@nava.de)
 */
public final class ChannelIndexer {

    private static final Log LOG = LogFactory.getLog(ChannelIndexer.class);

    private String indexDir;
    private int nrOfIndexedItems;
    private Analyzer analyzer;

    /**
     * Constructor which allows to specify the index directory.
     * For the full-text indexing process the lucene
     * {@link org.apache.lucene.analysis.standard.StandardAnalyzer}
     * is used.
     *
     * @param indexDir - The directory in which the index files are stored.
     */
    public ChannelIndexer(String indexDir) {
        this.indexDir = indexDir;
        this.nrOfIndexedItems = 0;
        this.analyzer = new StandardAnalyzer();
    }

    /**
     * Index all news items contained in the given channels.
     *
     * @param createNewIndex - wether a new index should be generated or
     *                       an existant one should be taken into account
     * @param channels       - a collection of ChannelIF objects
     */
    public void indexChannels(boolean createNewIndex, Collection channels) throws java.io.IOException {

        Collection<ItemIF> items = new ArrayList<ItemIF>();
        for (Object channel1 : channels) {
            ChannelIF channel = (ChannelIF) channel1;
            if (LOG.isDebugEnabled()) {
                LOG.debug("Searching channel " + channel + " for items.");
            }
            items.addAll(channel.getItems());
        }
        if (!items.isEmpty()) {
            indexItems(createNewIndex, items);
        } else if (LOG.isDebugEnabled()) {
            LOG.debug("No items found for indexing.");
        }
    }

    /**
     * Index all given news items.
     *
     * @param createNewIndex - Wether a new index should be generated or
     *                       an existant one should be taken into account.
     * @param items          - A collection of ItemIF objects.
     */
    public void indexItems(boolean createNewIndex, Collection<ItemIF> items) throws java.io.IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Start writing index.");
        }
        IndexWriter writer = new IndexWriter(indexDir, analyzer, createNewIndex);
        try {
            for (ItemIF item1 : items) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Add item " + item1 + " to index.");
                }
                writer.addDocument(ItemDocument.makeDocument(item1));
            }
            writer.optimize();
            nrOfIndexedItems = writer.docCount();
        } finally {
            writer.close();
        }
        if (LOG.isDebugEnabled()) {
            LOG.info("Finished writing index.");
        }
    }

    /**
     * Returns the number of documents that were in the index last time
     * the index operation was performed.
     * <p/>
     * Note: Use only directly after the indexing process, otherwise the
     * return value may be wrong.
     */
    public int getNrOfIndexedItems() {
        return nrOfIndexedItems;
    }

    public void setIndexDir(String indexDir) {
        this.indexDir = indexDir;
    }

    public String getIndexDir() {
        return indexDir;
    }

}