org.fracturedatlas.athena.apa.indexing.IndexingApaAdapter.java Source code

Introduction

Here is the source code for org.fracturedatlas.athena.apa.indexing.IndexingApaAdapter.java
Source

/*
    
ATHENA Project: Management Tools for the Cultural Sector
Copyright (C) 2010, Fractured Atlas
    
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/
    
 */
package org.fracturedatlas.athena.apa.indexing;

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.Version;
import org.fracturedatlas.athena.apa.AbstractApaAdapter;
import org.fracturedatlas.athena.client.PTicket;
import org.fracturedatlas.athena.id.IdAdapter;
import org.fracturedatlas.athena.search.AthenaSearch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

/*
 * TODO: This should be refactored to use a decorator pattern
 */
public abstract class IndexingApaAdapter extends AbstractApaAdapter {

    @Autowired
    Directory directory;

    Analyzer analyzer;

    Boolean indexingDisabled = false;

    IndexWriterConfig config;

    Set<String> excludedTypes = new HashSet<String>();

    //do not use this directly, call getWriter() instead
    static IndexWriter writer;

    Logger logger = LoggerFactory.getLogger(this.getClass().getName());

    public static final Integer DEFAULT_PAGE_SIZE = 10;
    public static final Integer MERGE_FACTOR = 20;

    public final static String DOC_TEXT = "text";

    /*
     * Initialize any indexing.  Remember that this method is called BEFORE the
     * directory is injected by spring.  Any operations that depend on the availability
     * of the index will not work.
     */
    public void initializeIndex() {
        analyzer = new WhitespaceLowerCaseAnalyzer();
        config = new IndexWriterConfig(Version.LUCENE_32, analyzer);
        LogMergePolicy mergePolicy = new LogDocMergePolicy();
        mergePolicy.setMergeFactor(MERGE_FACTOR);
        config.setMergePolicy(mergePolicy);

    }

    public Boolean rebuildNeeded() {
        Boolean rebuildIndex = false;
        try {
            if (!IndexReader.indexExists(directory)) {
                logger.debug("No index exists");
                rebuildIndex = true;
            }
        } catch (Exception e) {
            logger.debug("Exception while reading index {}", e.getMessage());
            rebuildIndex = true;
        }

        return rebuildIndex;
    }

    /*
     * Will not search for record before inserting it into the index.
     * Will create and close its own indexWriter
     */
    public void addAllToIndex(Collection<PTicket> records) {
        if (records == null || records.size() == 0 || indexingDisabled) {
            return;
        }

        config = new IndexWriterConfig(Version.LUCENE_32, analyzer);

        try {
            IndexWriter indexWriter = getWriter();
            for (PTicket record : records) {

                if (excludedTypes.contains(record.getType())) {
                    continue;
                }

                Document doc = new Document();
                StringBuffer documentText = new StringBuffer();
                doc.add(new Field("_id", record.getIdAsString(), Field.Store.YES, Field.Index.ANALYZED));
                doc.add(new Field("_type", record.getType(), Field.Store.YES, Field.Index.ANALYZED));

                for (String key : record.getProps().keySet()) {
                    List<String> vals = record.getProps().get(key);

                    for (String val : vals) {
                        addToDocument(doc, key, val);
                        documentText.append(val).append(" ");
                    }
                }
                addToDocument(doc, DOC_TEXT, documentText.toString());
                indexWriter.addDocument(doc);
            }

            indexWriter.optimize();
        } catch (NullPointerException npe) {
            logger.error("Null pointer exception coming.  Did you call initializeIndex() ?");
            npe.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /*
     * Will not search for record before inserting it into the index.
     * Will create and close its own indexWriter
     */
    public void addToIndex(PTicket record) {
        if (record == null || record.getId() == null || indexingDisabled) {
            return;
        }

        logger.debug("Checking to see if [{}] is excluded from the index", record.getType());
        if (excludedTypes.contains(record.getType())) {
            logger.debug("It is");
            return;
        }
        logger.debug("It isn't");

        Document doc = new Document();
        StringBuilder documentText = new StringBuilder();
        doc.add(new Field("_id", record.getIdAsString(), Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("_type", record.getType(), Field.Store.YES, Field.Index.ANALYZED));

        for (String key : record.getProps().keySet()) {
            List<String> vals = record.getProps().get(key);

            for (String val : vals) {
                addToDocument(doc, key, val);
                documentText.append(val).append(" ");
            }
        }
        addToDocument(doc, DOC_TEXT, documentText.toString());

        try {
            IndexWriter indexWriter = getWriter();
            indexWriter.addDocument(doc);
            indexWriter.optimize();
        } catch (NullPointerException npe) {
            logger.error("Null pointer exception coming.  Did you call initializeIndex() ?");
            npe.printStackTrace();
        } catch (IOException e) {
            logger.error("IOException when writing to index {}", e);
            e.printStackTrace();
        }
    }

    public void deleteFromIndex(PTicket record) {
        if (record == null || indexingDisabled) {
            return;
        }
        deleteFromIndex(record.getIdAsString());
    }

    public void deleteFromIndex(Object oid) {
        if (oid == null || indexingDisabled) {
            return;
        }
        String id = IdAdapter.toString(oid);

        IndexWriter indexWriter = null;
        try {
            indexWriter = getWriter();
            indexWriter.deleteDocuments(new Term("_id", id));
            indexWriter.optimize();
        } catch (NullPointerException npe) {
            logger.error("Null pointer exception coming.  Did you call initializeIndex() ?");
            npe.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    private void cleanup(IndexSearcher searcher, IndexReader reader) {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException ioe) {
                logger.error("Error trying to close index searcher");
                logger.error("{}", ioe.getClass().getName());
                logger.error("{}", ioe.getMessage());
            }
        }

        if (reader != null) {
            try {
                reader.close();
            } catch (IOException ioe) {
                logger.error("Error trying to close index writer");
                logger.error("{}", ioe.getClass().getName());
                logger.error("{}", ioe.getMessage());
            }
        }
    }

    /*
     * Will search the index for record.id and delete the document if found,
     * then call addtoIndex
     */
    public void updateIndex(PTicket record) {
        if (record == null || record.getId() == null || indexingDisabled) {
            return;
        }

        deleteFromIndex(record);
        addToIndex(record);
    }

    private void addToDocument(Document doc, String field, String value) {
        doc.add(new Field(field, value, Field.Store.YES, Field.Index.ANALYZED));
    }

    /*
     * Returns a set of ids
     * 
     * TODO: Converting from hits to Set<PTicket>
     * isn't preserving order of the results.  
     */
    public Set<Object> searchIndex(AthenaSearch search) {
        Set<Object> ids = new HashSet<Object>();

        if (indexingDisabled) {
            return ids;
        }

        String query = search.getQuery();

        query = query + " AND _type:" + search.getType();
        logger.debug("{}", query);

        Integer start = 0;
        if (search.getStart() != null) {
            start = search.getStart();
        }
        ;

        Integer limit = DEFAULT_PAGE_SIZE;
        if (search.getLimit() != null) {
            limit = search.getLimit();
        }
        Integer numResults = start + limit;

        try {
            QueryParser queryParser = new QueryParser(Version.LUCENE_32, DOC_TEXT, analyzer);
            queryParser.setAllowLeadingWildcard(true);
            Query q = queryParser.parse(query);
            IndexReader reader = IndexReader.open(getWriter(), false);
            IndexSearcher indexSearcher = new IndexSearcher(reader);
            TopDocs topDocs = indexSearcher.search(q, numResults);
            ScoreDoc[] hits = topDocs.scoreDocs;
            for (int i = start; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = indexSearcher.doc(docId);
                ids.add(d.get("_id"));
            }
            reader.close();
            indexSearcher.close();
            return ids;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public Directory getDirectory() {
        return directory;
    }

    private IndexWriter getWriter() {

        if (writer == null) {
            try {
                if (!indexingDisabled) {
                    config = new IndexWriterConfig(Version.LUCENE_32, analyzer);
                    writer = new IndexWriter(directory, config);
                    return writer;
                }
            } catch (NoSuchDirectoryException ioe) {
                logger.error(
                        "Could not instantiate an index searcher.  Index does not exist.  Add some items to the index and try again.");
                logger.error(ioe.getMessage());
            } catch (IOException ioe) {
                logger.error("Could not instantiate an index searcher.  Indexing is will now be disabled");
                logger.error(ioe.getMessage());
                logger.error("{}", ioe);
                indexingDisabled = true;
            }
        }

        return writer;
    }

    private void closeWriter() {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException ioe) {
                logger.error("Could not close searcher");
                logger.error("{}", ioe);
            }
        }

        writer = null;
    }

    public void setDirectory(Directory directory) {
        this.directory = directory;
        closeWriter();
        if (rebuildNeeded() && !indexingDisabled) {
            logger.info("Rebuilding index");
            Set<String> types = getTypes();
            for (String type : types) {
                if (excludedTypes.contains(type)) {
                    logger.info("Skipping [{}] because it is excluded", type);
                    continue;
                }
                AthenaSearch search = new AthenaSearch.Builder().type(type).build();
                Set<PTicket> records = findTickets(search);
                logger.info("Indexing {} records of type {}", records.size(), type);
                long startTime = System.currentTimeMillis();
                addAllToIndex(records);
                long endTime = System.currentTimeMillis();
                logger.info("Done.  Took {} millis", (endTime - startTime));
            }
        } else if (indexingDisabled) {
            logger.info(
                    "Indexing is disabled.  Re-enable indexing by setting athena.index.disabled=false in db.properties");
        }
    }

    public Boolean getIndexingDisabled() {
        return this.indexingDisabled;
    }

    public void setIndexingDisabled(Boolean indexingDisabled) {
        this.indexingDisabled = indexingDisabled;
    }

    /*
     * This is such a hacky way to do this, but...
     * IF the implementor fat-fingers the value such that is isn't a boolean "yes" or "truee"
     * then Spring is going to choke on the auto-injection.  This method wraps it such that
     * Anything that isn't "true" is going to map to false (adheres to Boolean.parseBoolean())
     */
    public void setIndexingDisabledString(String indexingDisabled) {
        setIndexingDisabled(Boolean.parseBoolean(indexingDisabled));
    }

    public Set<String> getExcludedTypes() {
        return excludedTypes;
    }

    public void setExcludedTypes(Set<String> excludedTypes) {
        logger.info("Setting excluded types to {}", excludedTypes);
        this.excludedTypes = excludedTypes;
    }
}