com.github.rnewson.couchdb.lucene.Index.java Source code

Java tutorial

Introduction

Here is the source code for com.github.rnewson.couchdb.lucene.Index.java

Source

package com.github.rnewson.couchdb.lucene;

/**
 * Copyright 2009 Robert Newson
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0 
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import static com.github.rnewson.couchdb.lucene.Utils.docQuery;
import static com.github.rnewson.couchdb.lucene.Utils.token;

import java.io.IOException;
import java.util.Arrays;
import java.util.Scanner;

import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

import org.apache.commons.httpclient.HttpException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public final class Index {

    private static final Database DB = new Database(Config.DB_URL);

    static class Indexer implements Runnable {

        private boolean isStale = true;

        private final Directory dir;

        public Indexer(final Directory dir) {
            this.dir = dir;
        }

        public synchronized boolean isStale() {
            return isStale;
        }

        public synchronized void setStale(final boolean isStale) {
            this.isStale = isStale;
        }

        public synchronized boolean setStale(final boolean expected, final boolean update) {
            if (isStale == expected) {
                isStale = update;
                return true;
            }
            return false;
        }

        public void run() {
            while (true) {
                if (!isStale()) {
                    sleep();
                } else {
                    final long commitBy = System.currentTimeMillis() + Config.COMMIT_MAX;
                    boolean quiet = false;
                    while (!quiet && System.currentTimeMillis() < commitBy) {
                        setStale(false);
                        sleep();
                        quiet = !isStale();
                    }

                    /*
                     * Either no update has occurred in the last COMMIT_MIN
                     * interval or continual updates have occurred for
                     * COMMIT_MAX interval. Either way, index all changes and
                     * commit.
                     */
                    try {
                        updateIndex();
                    } catch (final IOException e) {
                        Utils.LOG.warn("Exception while updating index.", e);
                    }
                }
            }
        }

        private void sleep() {
            try {
                Thread.sleep(Config.COMMIT_MIN);
            } catch (final InterruptedException e) {
                Utils.LOG.fatal("Interrupted while sleeping, indexer is exiting.", e);
            }
        }

        private IndexWriter newWriter() throws IOException {
            final IndexWriter result = new IndexWriter(Config.INDEX_DIR, Config.ANALYZER, MaxFieldLength.UNLIMITED);

            // Customize merge policy.
            final LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(5);
            mp.setMaxMergeMB(1000);
            mp.setUseCompoundFile(false);
            result.setMergePolicy(mp);

            // Customize other settings.
            result.setRAMBufferSizeMB(Config.RAM_BUF);

            return result;
        }

        private synchronized void updateIndex() throws IOException {
            if (IndexWriter.isLocked(dir)) {
                Utils.LOG.warn("Forcibly unlocking locked index at startup.");
                IndexWriter.unlock(dir);
            }

            final String[] dbnames = DB.getAllDatabases();
            Arrays.sort(dbnames);

            boolean commit = false;
            boolean expunge = false;
            final IndexWriter writer = newWriter();
            final Progress progress = new Progress();
            try {
                final IndexReader reader = IndexReader.open(dir);
                try {
                    // Load status.
                    progress.load(reader);

                    // Remove documents from deleted databases.
                    final TermEnum terms = reader.terms(new Term(Config.DB, ""));
                    try {
                        do {
                            final Term term = terms.term();
                            if (term == null || Config.DB.equals(term.field()) == false)
                                break;
                            if (Arrays.binarySearch(dbnames, term.text()) < 0) {
                                Utils.LOG.info("Database '" + term.text()
                                        + "' has been deleted, removing all documents from index.");
                                deleteDatabase(term.text(), progress, writer);
                                commit = true;
                                expunge = true;
                            }
                        } while (terms.next());
                    } finally {
                        terms.close();
                    }
                } finally {
                    reader.close();
                }

                // Update all extant databases.
                for (final String dbname : dbnames) {
                    // Iterate through all views in all design documents.
                    final JSONObject designDocs = DB.getAllDocs(dbname, "_design", "_design0");

                    // Get rows.
                    final JSONArray arr = designDocs.getJSONArray("rows");

                    boolean delete_all = true;
                    // For each row, extract all fulltext view functions.
                    for (int i = 0; i < arr.size(); i++) {
                        final JSONObject doc = arr.getJSONObject(i).getJSONObject("doc");
                        final JSONObject fulltext = doc.getJSONObject("fulltext");
                        if (fulltext != null) {
                            delete_all = false;
                            for (final Object key : fulltext.keySet()) {
                                final String defaults = fulltext.getJSONObject((String) key).optString("defaults",
                                        "{}");

                                String fun = fulltext.getJSONObject((String) key).getString("index");
                                fun = fun.replaceAll("^\"*", "");
                                fun = fun.replaceAll("\"*$", "");

                                final String viewname = String.format("%s/%s/%s", dbname,
                                        doc.getString(Config.ID).replaceFirst("_design/", ""), key);

                                final Rhino rhino = new Rhino(dbname, defaults, fun);
                                try {
                                    commit |= updateDatabase(writer, dbname, viewname, progress, rhino);
                                } finally {
                                    rhino.close();
                                }
                            }
                        }
                    }

                    /*
                     * If there are no fulltext attributes in any design
                     * document, ensure that nothing is indexed for this
                     * database.
                     */
                    if (delete_all) {
                        deleteDatabase(dbname, progress, writer);
                    }
                }
            } catch (final Exception e) {
                Utils.LOG.error("Error updating index.", e);
                commit = false;
            } finally {
                if (commit) {
                    progress.save(writer);
                    if (expunge) {
                        writer.expungeDeletes();
                    }
                    writer.close();

                    final IndexReader reader = IndexReader.open(dir);
                    try {
                        Utils.LOG.info("Committed changes to index (" + reader.numDocs() + " documents in index, "
                                + reader.numDeletedDocs() + " deletes).");
                    } finally {
                        reader.close();
                    }
                } else {
                    writer.rollback();
                }
            }
        }

        private boolean updateDatabase(final IndexWriter writer, final String dbname, final String viewname,
                final Progress progress, final Rhino rhino) throws HttpException, IOException {
            assert rhino != null;

            final long target_seq = DB.getInfo(dbname).getLong("update_seq");

            final String cur_sig = progress.getSignature(viewname);
            final String new_sig = rhino.getSignature();

            boolean result = false;

            // Reindex the database if sequence is 0 or signature changed.
            if (progress.getSeq(viewname) == 0 || cur_sig.equals(new_sig) == false) {
                Utils.LOG.info("Indexing " + viewname + " from scratch.");
                deleteView(viewname, progress, writer);
                progress.update(viewname, new_sig, 0);
                result = true;
            }

            long update_seq = progress.getSeq(viewname);
            while (update_seq < target_seq) {
                final JSONObject obj = DB.getAllDocsBySeq(dbname, update_seq, Config.BATCH_SIZE);

                if (!obj.has("rows")) {
                    Utils.LOG.warn("no rows found (" + obj + ").");
                    return false;
                }

                // Process all rows
                final JSONArray rows = obj.getJSONArray("rows");
                for (int i = 0, max = rows.size(); i < max; i++) {
                    final JSONObject row = rows.getJSONObject(i);
                    final JSONObject value = row.optJSONObject("value");
                    final JSONObject doc = row.optJSONObject("doc");
                    final String docid = row.getString("id");

                    // New or updated document.
                    if (doc != null && !docid.startsWith("_design")) {
                        writer.deleteDocuments(docQuery(viewname, row.getString("id")));
                        final Document[] docs = rhino.map(docid, doc.toString());

                        for (int j = 0; j < docs.length; j++) {
                            docs[j].add(token(Config.DB, dbname, false));
                            docs[j].add(token(Config.VIEW, viewname, false));
                            docs[j].add(token(Config.ID, docid, true));
                            writer.addDocument(docs[j]);
                        }

                        result = true;
                    }

                    // Deleted document.
                    if (value != null && value.optBoolean("deleted")) {
                        writer.deleteDocuments(docQuery(viewname, row.getString("id")));
                        result = true;
                    }

                    update_seq = row.getLong("key");
                }
            }

            if (result) {
                progress.update(viewname, new_sig, update_seq);
                Utils.LOG.info(viewname + ": index caught up to " + update_seq + ".");
            }

            return result;
        }

        private void deleteView(final String viewname, final Progress progress, final IndexWriter writer)
                throws IOException {
            writer.deleteDocuments(new Term(Config.VIEW, viewname));
            progress.removeView(viewname);
        }

        private void deleteDatabase(final String dbname, final Progress progress, final IndexWriter writer)
                throws IOException {
            writer.deleteDocuments(new Term(Config.DB, dbname));
            progress.removeDatabase(dbname);
        }

    }

    public static void main(String[] args) throws Exception {
        Utils.LOG.info("indexer started.");
        final Indexer indexer = new Indexer(FSDirectory.getDirectory(Config.INDEX_DIR));
        final Thread thread = new Thread(indexer, "index");
        thread.setDaemon(true);
        thread.start();

        final Scanner scanner = new Scanner(System.in);
        while (scanner.hasNextLine()) {
            final String line = scanner.nextLine();
            final JSONObject obj = JSONObject.fromObject(line);
            if (obj.has("type") && obj.has("db")) {
                indexer.setStale(true);
            }
        }
        Utils.LOG.info("indexer stopped.");
    }

}