com.zimbra.cs.mailbox.MailboxIndex.java Source code

Java tutorial

Introduction

Here is the source code for com.zimbra.cs.mailbox.MailboxIndex.java

Source

/*
 * ***** BEGIN LICENSE BLOCK *****
 * Zimbra Collaboration Suite Server
 * Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Synacor, Inc.
 *
 * This program is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software Foundation,
 * version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with this program.
 * If not, see <https://www.gnu.org/licenses/>.
 * ***** END LICENSE BLOCK *****
 */
package com.zimbra.cs.mailbox;

import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.Semaphore;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimaps;
import com.google.common.collect.SetMultimap;
import com.google.common.io.Closeables;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.zimbra.common.localconfig.LC;
import com.zimbra.common.mime.InternetAddress;
import com.zimbra.common.service.ServiceException;
import com.zimbra.common.soap.SoapProtocol;
import com.zimbra.common.util.AccessBoundedRegex;
import com.zimbra.common.util.ZimbraLog;
import com.zimbra.cs.account.Provisioning;
import com.zimbra.cs.db.DbMailItem;
import com.zimbra.cs.db.DbPool;
import com.zimbra.cs.db.DbPool.DbConnection;
import com.zimbra.cs.db.DbSearch;
import com.zimbra.cs.db.DbTag;
import com.zimbra.cs.index.BrowseTerm;
import com.zimbra.cs.index.DbSearchConstraints;
import com.zimbra.cs.index.IndexDocument;
import com.zimbra.cs.index.IndexPendingDeleteException;
import com.zimbra.cs.index.IndexStore;
import com.zimbra.cs.index.Indexer;
import com.zimbra.cs.index.LuceneFields;
import com.zimbra.cs.index.LuceneIndex;
import com.zimbra.cs.index.ReSortingQueryResults;
import com.zimbra.cs.index.SearchParams;
import com.zimbra.cs.index.SortBy;
import com.zimbra.cs.index.ZimbraAnalyzer;
import com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration;
import com.zimbra.cs.index.ZimbraIndexSearcher;
import com.zimbra.cs.index.ZimbraQuery;
import com.zimbra.cs.index.ZimbraQueryResults;
import com.zimbra.cs.mailbox.MailItem.Type;
import com.zimbra.cs.mailbox.MailItem.UnderlyingData;
import com.zimbra.cs.mailbox.Mailbox.IndexItemEntry;
import com.zimbra.cs.util.Zimbra;

/**
 * Index related mailbox operations.
 *
 * @author tim
 * @author ysasaki
 */
public final class MailboxIndex {
    private static final long MAX_TX_BYTES = LC.zimbra_index_max_transaction_bytes.longValue();
    private static final int MAX_TX_ITEMS = LC.zimbra_index_max_transaction_items.intValue();
    private static final long FAILURE_DELAY = LC.zimbra_index_deferred_items_failure_delay.intValue() * 1000;

    private static final ThreadPoolExecutor INDEX_EXECUTOR = new ThreadPoolExecutor(
            LC.zimbra_index_threads.intValue(), LC.zimbra_index_threads.intValue(), Long.MAX_VALUE,
            TimeUnit.NANOSECONDS, new SynchronousQueue<Runnable>(),
            new ThreadFactoryBuilder().setNameFormat("Index-%d").setDaemon(true).build());
    // Re-index threads are created on demand basis. The number of threads are capped.
    private static final ExecutorService REINDEX_EXECUTOR = new ThreadPoolExecutor(0,
            LC.zimbra_reindex_threads.intValue(), 0L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
            new ThreadFactoryBuilder().setNameFormat("ReIndex-%d").setDaemon(true).build());

    private volatile long lastFailedTime = -1;
    // Only one thread may run index at a time.
    private final Semaphore indexLock = new Semaphore(1);
    private final Mailbox mailbox;
    private final Analyzer analyzer;
    private IndexStore indexStore;
    // current re-indexing operation for this mailbox, or NULL if a re-index is not in progress.
    private volatile ReIndexTask reIndex;
    // current compact-indexing operation for this mailbox, or NULL if a compact-index is not in progress.
    private volatile CompactIndexTask compactIndex;
    private volatile SetMultimap<MailItem.Type, Integer> deferredIds; // guarded by IndexHelper
    boolean indexingSuspended = false;
    int numMaybeIndexDeferredItemsCalls = 0;

    MailboxIndex(Mailbox mbox) {
        mailbox = mbox;
        String analyzerName;
        try {
            analyzerName = mbox.getAccount().getTextAnalyzer();
        } catch (ServiceException e) {
            analyzerName = null;
        }
        analyzer = ZimbraAnalyzer.getAnalyzer(analyzerName);
    }

    /**
     * Starts all index threads.
     */
    public static void startup() {
        INDEX_EXECUTOR.prestartAllCoreThreads();
    }

    public static void shutdown() {
        IndexStore.getFactory().destroy();
    }

    public Analyzer getAnalyzer() {
        return analyzer;
    }

    void open() throws ServiceException {
        indexStore = IndexStore.getFactory().getIndexStore(mailbox);
    }

    public final IndexStore getIndexStore() {
        assert (indexStore != null);
        return indexStore;
    }

    /**
     * This is the preferred form of the API call.
     *
     * In order to avoid deadlock, callers MUST NOT be holding the Mailbox lock when calling this API.
     *
     * You MUST call {@link ZimbraQueryResults#doneWithSearchResults()} when you are done with the search results,
     * otherwise resources will be leaked.
     *
     * @param octxt Operation Context
     * @param params Search Parameters
     * @return search result
     */
    public ZimbraQueryResults search(SoapProtocol proto, OperationContext octx, SearchParams params)
            throws ServiceException {
        assert (mailbox.lock.isUnlocked());
        assert (octx != null);

        ZimbraQuery query = new ZimbraQuery(octx, proto, mailbox, params);
        Set<MailItem.Type> types = toIndexTypes(params.getTypes());
        // no need to index if the search doesn't involve Lucene
        if (!params.isQuick() && query.hasTextOperation() && getDeferredCount(types) > 0) {
            try {
                // don't wait if an indexing is in progress by other thread
                indexDeferredItems(types, new BatchStatus(), false);
            } catch (ServiceException e) {
                ZimbraLog.index.error("Failed to index deferred items", e);
            }
        }
        return search(query);
    }

    public ZimbraQueryResults search(OperationContext octxt, String queryString, Set<MailItem.Type> types,
            SortBy sortBy, int chunkSize, boolean inDumpster) throws ServiceException {
        SearchParams params = new SearchParams();
        params.setQueryString(queryString);
        params.setTimeZone(null);
        params.setLocale(null);
        params.setTypes(types);
        params.setSortBy(sortBy);
        params.setChunkSize(chunkSize);
        params.setPrefetch(true);
        params.setFetchMode(SearchParams.Fetch.NORMAL);
        params.setInDumpster(inDumpster);
        return search(SoapProtocol.Soap12, octxt, params);
    }

    public ZimbraQueryResults search(OperationContext octxt, SearchParams params) throws ServiceException {
        return search(SoapProtocol.Soap12, octxt, params);
    }

    public ZimbraQueryResults search(OperationContext octxt, String queryString, Set<MailItem.Type> types,
            SortBy sortBy, int chunkSize) throws ServiceException {
        return search(octxt, queryString, types, sortBy, chunkSize, false);
    }

    private ZimbraQueryResults search(ZimbraQuery zq) throws ServiceException {
        SearchParams params = zq.getParams();
        ZimbraLog.search.debug("query: %s", params.getQueryString());
        ZimbraLog.searchstat.debug("query: %s", zq.toSanitizedtring());

        // handle special-case Task-only sorts: convert them to a "normal sort" and then re-sort them at the end
        // TODO: this hack (converting the sort) should be able to go away w/ the new SortBy implementation, if the
        // lower-level code was modified to use the SortBy.Criterion and SortBy.Direction data (instead of switching on
        // the SortBy itself). We still will need this switch so that we can wrap the results in ReSortingQueryResults.
        boolean isTaskSort = false;
        boolean isLocalizedSort = false;
        SortBy originalSort = params.getSortBy();
        switch (originalSort) {
        case TASK_DUE_ASC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case TASK_DUE_DESC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case TASK_STATUS_ASC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case TASK_STATUS_DESC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case TASK_PERCENT_COMPLETE_ASC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case TASK_PERCENT_COMPLETE_DESC:
            isTaskSort = true;
            params.setSortBy(SortBy.DATE_DESC);
            break;
        case NAME_LOCALIZED_ASC:
        case NAME_LOCALIZED_DESC:
            isLocalizedSort = true;
            break;
        }

        ZimbraQueryResults results = zq.execute();
        if (isTaskSort) {
            results = new ReSortingQueryResults(results, originalSort, null);
        }
        if (isLocalizedSort) {
            results = new ReSortingQueryResults(results, originalSort, params);
        }
        return results;
    }

    /**
     * Returns true if any of the specified email addresses exists in contacts, otherwise false.
     */
    public boolean existsInContacts(Collection<InternetAddress> addrs) throws IOException {
        Set<MailItem.Type> types = EnumSet.of(MailItem.Type.CONTACT);
        if (getDeferredCount(types) > 0) {
            try {
                indexDeferredItems(types, new BatchStatus(), false);
            } catch (ServiceException e) {
                ZimbraLog.index.error("Failed to index deferred items", e);
            }
        }

        ZimbraIndexSearcher searcher = indexStore.openSearcher();
        try {
            for (InternetAddress addr : addrs) {
                if (!Strings.isNullOrEmpty(addr.getAddress())) {
                    String lcAddr = addr.getAddress().toLowerCase();
                    TermFieldEnumeration values = null;
                    try {
                        values = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTACT_DATA, lcAddr);
                        if (values.hasMoreElements()) {
                            BrowseTerm term = values.nextElement();
                            if (term != null && lcAddr.equals(term.getText())) {
                                ZimbraLog.index.debug("Contact = %s present in indexed items", lcAddr);
                                return true;
                            }
                        }
                    } finally {
                        Closeables.closeQuietly(values);
                    }
                }
            }
            return false;
        } finally {
            Closeables.closeQuietly(searcher);
        }
    }

    /**
     * Returns the maximum number of items to be batched in a single indexing pass. If a search comes in that requires
     * use of the index, all deferred unindexed items are immediately indexed regardless of batch size. If this number
     * is {@code 0}, all items are indexed immediately when they are added.
     */
    public int getBatchThreshold() {
        if (indexStore instanceof LuceneIndex) {
            try {
                return mailbox.getAccount().getBatchedIndexingSize();
            } catch (ServiceException e) {
                ZimbraLog.index.warn("Failed to get %s", Provisioning.A_zimbraBatchedIndexingSize, e);
            }
        }
        return 0; // disable batch indexing for non Lucene index stores
    }

    void evict() {
        indexStore.evict();
    }

    public void deleteIndex() throws IOException {
        if (isReIndexInProgress()) {
            cancelReIndex();
        }
        indexStore.deleteIndex();
    }

    /**
     * Submits a task to {@link #INDEX_EXECUTOR}.
     *
     * @param task index task
     * @throws RejectedExecutionException if all index threads are busy
     */
    public void submit(IndexTask task) {
        INDEX_EXECUTOR.submit(task);
    }

    void setIndexingSuspended(boolean suspended) {
        ZimbraLog.index.info("indexSuspended set to %s.  Current deferred count %s", suspended,
                getDeferredCount(EnumSet.noneOf(MailItem.Type.class)));
        indexingSuspended = suspended;
        if (!suspended) {
            numMaybeIndexDeferredItemsCalls = 0;
        }
    }

    /**
     * Attempts to index deferred items.
     */
    void maybeIndexDeferredItems() {
        if ((indexStore != null) && indexStore.isPendingDelete()) {
            ZimbraLog.index.debug("index delete is in progress by other thread, skipping");
            return; // No point in indexing if we are going to delete the index
        }
        if (indexingSuspended) {
            if ((numMaybeIndexDeferredItemsCalls % 1000) == 0) {
                ZimbraLog.index.debug(
                        "Indexing suspended. maybeIndexDeferredItems called %s times whilst suspended",
                        numMaybeIndexDeferredItemsCalls);
                numMaybeIndexDeferredItemsCalls = 0;
            }
            numMaybeIndexDeferredItemsCalls++;
            return;
        }

        // If there was a failure, we trigger indexing even if the deferred count is still low.
        if ((lastFailedTime >= 0 && System.currentTimeMillis() - lastFailedTime > FAILURE_DELAY)
                || getDeferredCount(EnumSet.noneOf(MailItem.Type.class)) >= getBatchThreshold()) {
            try {
                INDEX_EXECUTOR.submit(new BatchIndexTask());
            } catch (RejectedExecutionException e) {
                ZimbraLog.index.warn("Skipping batch index because all index threads are busy");
            }
        }
    }

    void resumeIndexing() {
        setIndexingSuspended(false);
    }

    void resumeIndexingAndDrainDeferred() {
        resumeIndexing();
        maybeIndexDeferredItems();
        ZimbraLog.index.info("resumeIndexingAndDrainDeferred deferred count=%s",
                getDeferredCount(EnumSet.noneOf(MailItem.Type.class)));
    }

    /**
     * Index deferred items.
     *
     * @param types item types to index, empty set means all types
     * @param wait if an indexing is in progress by other threads, true to wait for them to complete, false to skip
     * indexing
     */
    private void indexDeferredItems(Set<MailItem.Type> types, BatchStatus status, boolean wait)
            throws ServiceException {
        assert (mailbox.lock.isUnlocked());
        if ((indexStore != null) && indexStore.isPendingDelete()) {
            ZimbraLog.index.debug("index delete is in progress by other thread, skipping");
            return; // No point in indexing if we are going to delete the index
        }

        if (wait) {
            indexLock.acquireUninterruptibly();
        } else if (!indexLock.tryAcquire()) {
            ZimbraLog.index.debug("index is in progress by other thread, skipping");
            return;
        }
        lastFailedTime = -1; // reset
        try {
            long start = System.currentTimeMillis();
            Collection<Integer> ids = getDeferredIds(types);
            indexItemList(ids, status);

            long elapsed = System.currentTimeMillis() - start;
            ZimbraLog.index.info("Batch complete processed=%d,failed=%d,elapsed=%d (%.2f items/sec)",
                    status.getProcessed(), status.getFailed(), elapsed,
                    1000.0 * (status.getProcessed() - status.getFailed()) / elapsed);
        } finally {
            indexLock.release();
        }
    }

    @VisibleForTesting
    public void indexDeferredItems() throws ServiceException {
        indexDeferredItems(EnumSet.noneOf(MailItem.Type.class), new BatchStatus(), true);
    }

    /**
     * Kick off the requested re-index in a background thread. The re-index is run on a best-effort basis, if it fails
     * a WARN message is logged, but it won't be retried.
     */
    public void startReIndex() throws ServiceException {
        startReIndex(new ReIndexTask(mailbox, null));
    }

    public void startReIndexById(Collection<Integer> ids) throws ServiceException {
        startReIndex(new ReIndexTask(mailbox, ids));
    }

    public void startReIndexByType(Set<MailItem.Type> types) throws ServiceException {
        List<Integer> ids;
        DbConnection conn = DbPool.getConnection(mailbox);
        try {
            ids = DbMailItem.getReIndexIds(conn, mailbox, types);
        } finally {
            conn.closeQuietly();
        }
        startReIndexById(ids);
    }

    private synchronized void startReIndex(ReIndexTask task) throws ServiceException {
        if ((indexStore != null) && indexStore.isPendingDelete()) {
            throw ServiceException.FAILURE("Unable to submit reindex request. Index is pending delete", null);
        }
        try {
            if (reIndex != null) {
                throw ServiceException.ALREADY_IN_PROGRESS(Integer.toString(mailbox.getId()),
                        reIndex.status.toString());
            }
            // reIndex and compactIndex cannot interleave
            if (isCompactIndexInProgress()) {
                throw ServiceException.ALREADY_IN_PROGRESS(Integer.toString(mailbox.getId()), "Compact Index");
            }
            REINDEX_EXECUTOR.submit(reIndex = task);
        } catch (RejectedExecutionException e) {
            throw ServiceException.FAILURE("Unable to submit reindex request. Try again later", e);
        }
    }

    public synchronized ReIndexStatus cancelReIndex() {
        if (reIndex == null) {
            return null;
        }
        reIndex.status.cancel();
        return reIndex.status;
    }

    public void startCompactIndex() throws ServiceException {
        startCompactIndex(new CompactIndexTask(mailbox));
    }

    private synchronized void startCompactIndex(CompactIndexTask task) throws ServiceException {
        if ((indexStore != null) && indexStore.isPendingDelete()) {
            throw ServiceException.FAILURE("Unable to submit compact index request. Index is pending delete", null);
        }
        try {
            if (compactIndex != null) {
                throw ServiceException.ALREADY_IN_PROGRESS(Integer.toString(mailbox.getId()), "Compact Index");
            }
            // reIndex and compactIndex cannot interleave
            if (isReIndexInProgress()) {
                throw ServiceException.ALREADY_IN_PROGRESS(Integer.toString(mailbox.getId()),
                        reIndex.status.toString());
            }
            REINDEX_EXECUTOR.submit(compactIndex = task);
        } catch (RejectedExecutionException e) {
            throw ServiceException.FAILURE("Unable to submit compact index request. Try again later", e);
        }
    }

    public boolean verify(PrintStream out) throws ServiceException {
        indexLock.acquireUninterruptibly(); // make sure no writers are opened
        try {
            return indexStore.verify(out);
        } catch (IOException e) {
            throw ServiceException.FAILURE("Failed to verify index", e);
        } finally {
            indexLock.release();
        }
    }

    private class ReIndexTask extends IndexTask {
        private final Collection<Integer> ids;
        private final ReIndexStatus status = new ReIndexStatus();

        ReIndexTask(Mailbox mbox, Collection<Integer> ids) {
            super(mbox);
            this.ids = ids;
        }

        @Override
        public void exec() {
            try {
                ZimbraLog.index.info("Re-index start");

                long start = System.currentTimeMillis();
                reIndex();
                long elapsed = System.currentTimeMillis() - start;
                long avg = 0;
                long mps = 0;

                if (status.getProcessed() > 0) {
                    avg = elapsed / status.getProcessed();
                    mps = avg > 0 ? 1000 / avg : 0;
                }
                ZimbraLog.index.info(
                        "Re-index completed items=%d,failed=%d,elapsed=%d (avg %d ms/item, %d items/sec)",
                        status.getTotal(), status.getFailed(), elapsed, avg, mps);
                onCompletion();
            } catch (ServiceException e) {
                if (e.getCode() == ServiceException.INTERRUPTED) {
                    ZimbraLog.index.info("Re-index cancelled %s", status);
                } else {
                    ZimbraLog.index.error("Re-index failed. This mailbox must be manually re-indexed.", e);
                }
            } catch (OutOfMemoryError e) {
                Zimbra.halt("out of memory", e);
            } catch (Throwable t) {
                ZimbraLog.index.error("Re-index failed. This mailbox must be manually re-indexed.", t);
            } finally {
                synchronized (MailboxIndex.this) {
                    reIndex = null;
                }
            }
        }

        /**
         * Subclass may override to trigger something at end of indexing.
         *
         * @throws ServiceException error
         */
        protected void onCompletion() throws ServiceException {
        }

        /**
         * Re-Index some or all items in this mailbox. This can be a *very* expensive operation (upwards of an hour to
         * run on a large mailbox on slow hardware). We are careful to unlock the mailbox periodically so that the
         * mailbox can still be accessed while the re-index is running, albeit at a slower rate.
         */
        void reIndex() throws ServiceException {
            if (ids == null) { // full re-index
                mailbox.lock.lock();
                try {
                    ZimbraLog.index.info("Resetting DB index data");
                    mailbox.resetIndex();
                    ZimbraLog.index.info("Deleting index store data");
                    try {
                        indexStore.deleteIndex();
                    } catch (IOException e) {
                        throw ServiceException.FAILURE("Failed to delete index before re-index", e);
                    }
                    clearDeferredIds();
                } finally {
                    mailbox.lock.release();
                }
                ZimbraLog.index.info("Re-indexing all items");
                indexDeferredItems(EnumSet.noneOf(MailItem.Type.class), status, true);
                // skipping the optimize!!
                // Note: Lucene 3.5.0 highly discourage optimizing the index as
                // it is horribly inefficient and very rarely justified. Please check the API doc for more details.
            } else { // partial re-index
                indexLock.acquireUninterruptibly();
                try {
                    indexItemList(ids, status);
                } finally {
                    indexLock.release();
                }
            }
        }
    }

    private class CompactIndexTask extends IndexTask {

        public CompactIndexTask(Mailbox mbox) {
            super(mbox);
        }

        @Override
        protected void exec() throws Exception {
            try {
                ZimbraLog.index.info("Compact-index start");

                long start = System.currentTimeMillis();
                compact();
                long elapsed = System.currentTimeMillis() - start;
                ZimbraLog.index.info("Compact-index completed elapsed=%d", elapsed);
            } catch (ServiceException e) {
                if (e.getCode() == ServiceException.INTERRUPTED) {
                    ZimbraLog.index.info("Compact-index cancelled");
                } else {
                    ZimbraLog.index.error("Compact-index failed. This mailbox must be re-indexed.", e);
                }
            } catch (OutOfMemoryError e) {
                Zimbra.halt("out of memory", e);
            } catch (Throwable t) {
                ZimbraLog.index.error("Compact-index failed. This mailbox must be manually re-indexed.", t);
            } finally {
                synchronized (MailboxIndex.this) {
                    compactIndex = null;
                }
            }
        }
    }

    /**
     * Migrate to mailbox version 1.5.
     */
    @SuppressWarnings("deprecation")
    void indexAllDeferredFlagItems() throws ServiceException {
        Set<Integer> ids = new HashSet<Integer>();
        boolean success = false;
        try {
            mailbox.beginTransaction("indexAllDeferredFlagItems", null);
            DbSearchConstraints.Leaf c = new DbSearchConstraints.Leaf();
            c.tags.add(mailbox.getFlagById(Flag.ID_INDEXING_DEFERRED));
            List<DbSearch.Result> list = new DbSearch(mailbox).search(mailbox.getOperationConnection(), c,
                    SortBy.NONE, -1, -1, DbSearch.FetchMode.ID);
            for (DbSearch.Result sr : list) {
                ids.add(sr.getId());
            }
            success = true;
        } finally {
            mailbox.endTransaction(success);
        }

        ReIndexTask task = new ReIndexTask(mailbox, ids) {
            @Override
            protected void onCompletion() {
                try {
                    mailbox.lock.lock();
                    try {
                        boolean success = false;
                        try {
                            mailbox.beginTransaction("indexAllDeferredFlagItems", null);
                            DbSearchConstraints.Leaf c = new DbSearchConstraints.Leaf();
                            c.tags.add(mailbox.getFlagById(Flag.ID_INDEXING_DEFERRED));
                            List<DbSearch.Result> list = new DbSearch(mailbox).search(
                                    mailbox.getOperationConnection(), c, SortBy.NONE, -1, -1,
                                    DbSearch.FetchMode.MODCONTENT);

                            List<Integer> deferredTagsToClear = new ArrayList<Integer>();

                            Flag indexingDeferredFlag = mailbox.getFlagById(Flag.ID_INDEXING_DEFERRED);

                            for (DbSearch.Result sr : list) {
                                MailItem item = mailbox.getItemById(sr.getId(), sr.getType());
                                deferredTagsToClear.add(sr.getId());
                                item.tagChanged(indexingDeferredFlag, false);
                            }
                            mailbox.getOperationConnection(); // we must call this before DbMailItem.alterTag
                            DbTag.alterTag(indexingDeferredFlag, deferredTagsToClear, false);
                            success = true;
                        } finally {
                            mailbox.endTransaction(success);
                        }

                        if (!mailbox.getVersion().atLeast(1, 5)) {
                            try {
                                mailbox.updateVersion(new MailboxVersion((short) 1, (short) 5));
                            } catch (ServiceException se) {
                                ZimbraLog.mailbox.warn("Failed to update mbox version after "
                                        + "reindex all deferred items during mailbox upgrade initialization.", se);
                            }
                        }
                    } finally {
                        mailbox.lock.release();
                    }
                } catch (ServiceException se) {
                    ZimbraLog.mailbox.warn("Failed to clear deferred flag after "
                            + "reindex all deferred items during mailbox upgrade initialization.", se);
                }
            }
        };

        try {
            if (ids.isEmpty()) {
                task.onCompletion();
            } else {
                startReIndex(task);
            }
        } catch (RejectedExecutionException e) {
            ZimbraLog.mailbox.warn("Failed to reindex deferred items on mailbox upgrade initialization."
                    + " Skipping (you will have to manually reindex this mailbox)");
        }
    }

    /**
     * Index a potentially very large list of {@link MailItem}s. Iterate through the list of items, fetch each one and
     * call generateIndexData(). Buffer the items, IndexData into a chunk and when the chunk gets sufficiently large,
     * run a Mailbox transaction to actually do the indexing
     *
     * @param ids item IDs to index
     * @param status progress will be written to the status
     * @throws ServiceException {@link ServiceException#INTERRUPTED} if {@link #cancelReIndex()} is called
     */
    private void indexItemList(Collection<Integer> ids, BatchStatus status) throws ServiceException {
        assert (mailbox.lock.isUnlocked());

        status.setTotal(ids.size());
        if (ids.isEmpty()) {
            return;
        }

        // we re-index 'chunks' of items -- up to a certain size or count
        List<Mailbox.IndexItemEntry> chunk = new ArrayList<Mailbox.IndexItemEntry>();
        long chunkByteSize = 0;
        int i = 0;
        for (int id : ids) {
            i++;
            status.addProcessed(1);

            // Fetch the item and generate the list of Lucene documents to index. Do this without holding the Mailbox
            // lock. Once we've accumulated a "chunk" of items, do a mailbox transaction to actually add them to the
            // index.
            ZimbraLog.index.debug("Tokenizing id=%d", id);
            MailItem item = null;
            try {
                mailbox.beginReadTransaction("IndexItemList-Fetch", null);
                item = mailbox.getItemById(id, MailItem.Type.UNKNOWN, false);
            } catch (MailServiceException.NoSuchItemException e) { // fallback to dumpster
                try {
                    item = mailbox.getItemById(id, MailItem.Type.UNKNOWN, true);
                } catch (MailServiceException.NoSuchItemException again) { // The item has just been deleted.
                    ZimbraLog.index.debug("deferred item no longer exist id=%d", id);
                    removeDeferredId(id);
                    continue;
                }
            } catch (MailServiceException e) {
                // fetch without metadata because reindex will regenerate metadata
                if (MailServiceException.INVALID_METADATA.equals(e.getCode()) && isReIndexInProgress()) {
                    UnderlyingData ud = DbMailItem.getById(mailbox, id, MailItem.Type.UNKNOWN, false);
                    ud.metadata = null; // ignore corrupted metadata
                    item = mailbox.getItem(ud);
                } else {
                    throw e;
                }
            } catch (Exception e) {
                ZimbraLog.index.warn("Failed to fetch deferred item id=%d", id, e);
                status.addFailed(1);
                continue;
            } finally {
                mailbox.endTransaction(item != null);
            }
            try {
                chunk.add(new Mailbox.IndexItemEntry(item, item.generateIndexData()));
            } catch (MailItem.TemporaryIndexingException e) {
                ZimbraLog.index.warn("Temporary index failure id=%d", id, e);
                lastFailedTime = System.currentTimeMillis();
                status.addFailed(1);
                continue;
            }
            chunkByteSize += item.getSize();

            if (i == ids.size() || chunkByteSize > MAX_TX_BYTES || chunk.size() >= MAX_TX_ITEMS) {
                // we have a chunk of items and their corresponding index data -- add them to the index
                try {
                    ZimbraLog.index.debug("Batch progress %d/%d", i, ids.size());

                    if (status.isCancelled()) {
                        throw ServiceException.INTERRUPTED("cancelled");
                    }

                    try {
                        boolean success = false;
                        try {
                            mailbox.beginTransaction("IndexItemList-Commit", null);
                            for (Mailbox.IndexItemEntry entry : chunk) {
                                mailbox.addIndexItemToCurrentChange(entry);
                            }
                            success = true;
                        } finally {
                            mailbox.endTransaction(success);
                        }
                    } catch (ServiceException e) {
                        ZimbraLog.index.warn("Failed to index chunk=%s", chunk, e);
                        status.addFailed(chunk.size());
                    }
                } finally {
                    chunk.clear();
                    chunkByteSize = 0;
                }
            }
        }
    }

    /**
     * Mailbox version (1.0,1.1)->1.2 Re-Index all contacts.
     */
    void upgradeMailboxTo1_2() throws ServiceException {
        DbConnection conn = DbPool.getConnection(mailbox);
        try {
            List<Integer> ids = DbMailItem.getReIndexIds(conn, mailbox, EnumSet.of(MailItem.Type.CONTACT));
            if (ids.isEmpty()) {
                return;
            }
            ReIndexTask task = new ReIndexTask(mailbox, ids) {
                @Override
                protected void onCompletion() throws ServiceException {
                    mailbox.lock.lock();
                    try {
                        if (!mailbox.getVersion().atLeast(1, 2)) {
                            try {
                                mailbox.updateVersion(new MailboxVersion((short) 1, (short) 2));
                            } catch (ServiceException e) {
                                ZimbraLog.mailbox.warn("Failed to update mbox version after "
                                        + "reindex contacts on mailbox upgrade initialization.", e);
                            }
                        }
                    } finally {
                        mailbox.lock.release();
                    }
                }
            };
            startReIndex(task);
        } catch (ServiceException e) {
            ZimbraLog.mailbox.warn("Failed to reindex contacts on mailbox upgrade initialization."
                    + " Skipping (you will have to manually reindex contacts for this mailbox)");
        } finally {
            conn.closeQuietly();
        }
    }

    /**
     * Entry point for Redo-logging system only. Everybody else should use queueItemForIndexing inside a transaction.
     */
    public void redoIndexItem(MailItem item, int itemId, List<IndexDocument> docs) {
        mailbox.lock.lock();
        try {
            Indexer indexer = indexStore.openIndexer();
            try {
                indexer.addDocument(item.getFolder(), item, docs);
            } finally {
                indexer.close();
            }
        } catch (Exception e) {
            ZimbraLog.index.warn("Skipping indexing; Unable to parse message %d", itemId, e);
        } finally {
            mailbox.lock.release();
        }
    }

    /**
     * Deletes index documents. The caller doesn't necessarily hold the mailbox lock.
     */
    synchronized void delete(List<Integer> ids) {
        if (ids.isEmpty()) {
            return;
        }

        Indexer indexer;
        try {
            indexer = indexStore.openIndexer();
        } catch (IndexPendingDeleteException e) {
            ZimbraLog.index.debug("delete of ids from index aborted as it is pending delete");
            lastFailedTime = System.currentTimeMillis();
            return;
        } catch (IOException e) {
            ZimbraLog.index.warn("Failed to open Indexer", e);
            lastFailedTime = System.currentTimeMillis();
            return;
        }

        try {
            indexer.deleteDocument(ids);
        } catch (IOException e) {
            ZimbraLog.index.warn("Failed to delete index documents", e);
        } finally {
            try {
                indexer.close();
            } catch (IOException e) {
                ZimbraLog.index.error("Failed to close Indexer", e);
                return;
            }
        }
        removeDeferredId(ids);
    }

    /**
     * Adds index documents. The caller must hold the mailbox lock.
     */
    synchronized void add(List<IndexItemEntry> entries) throws ServiceException {
        assert (mailbox.lock.isWriteLockedByCurrentThread());
        if (entries.isEmpty()) {
            return;
        }

        Indexer indexer;
        try {
            indexer = indexStore.openIndexer();
        } catch (IndexPendingDeleteException e) {
            ZimbraLog.index.debug("add of entries to index aborted as index is pending delete");
            lastFailedTime = System.currentTimeMillis();
            return;
        } catch (IOException e) {
            ZimbraLog.index.warn("Failed to open Indexer", e);
            lastFailedTime = System.currentTimeMillis();
            return;
        }

        List<MailItem> indexed = new ArrayList<MailItem>(entries.size());
        try {
            for (IndexItemEntry entry : entries) {
                if ((indexStore != null) && indexStore.isPendingDelete()) {
                    ZimbraLog.index.debug("add of list of entries to index aborted as index is pending delete");
                    lastFailedTime = System.currentTimeMillis();
                    return; // No point in indexing if we are going to delete the index
                }
                if (entry.documents == null) {
                    ZimbraLog.index.warn("NULL index data item=%s", entry);
                    continue;
                }

                ZimbraLog.index.debug("Indexing id=%d", entry.item.getId());

                try {
                    indexer.addDocument(entry.item.getFolder(), entry.item, entry.documents);
                } catch (IOException e) {
                    ZimbraLog.index.warn("Failed to index item=%s", entry, e);
                    lastFailedTime = System.currentTimeMillis();
                    continue;
                }
                indexed.add(entry.item);
            }
        } finally {
            try {
                indexer.close();
            } catch (IOException e) {
                ZimbraLog.index.error("Failed to close Indexer", e);
                return;
            }
        }

        List<Integer> ids = new ArrayList<Integer>(indexed.size());
        for (MailItem item : indexed) {
            ids.add(item.getId());
        }
        DbMailItem.setIndexIds(mailbox.getOperationConnection(), mailbox, ids);
        for (MailItem item : indexed) {
            item.mData.indexId = item.getId();
            removeDeferredId(item.getId());
        }
    }

    /**
     * Primes the index for the fastest available search if useful to the underlying IndexStore.
     * This is a very expensive operation especially on large index.
     */
    public void optimize() throws ServiceException {
        indexDeferredItems(EnumSet.noneOf(MailItem.Type.class), new BatchStatus(), true); // index all pending items
        indexStore.optimize();
    }

    /**
     * Compacts the index data by expunging deletes
     * @throws ServiceException
     */
    public void compact() throws ServiceException {
        try {
            Indexer indexer = indexStore.openIndexer();
            try {
                indexer.compact();
            } finally {
                indexer.close();
            }
        } catch (IndexPendingDeleteException e) {
            ZimbraLog.index.debug("Compaction of index aborted as it is pending delete");
        } catch (IOException e) {
            ZimbraLog.index.error("Failed to compact index", e);
        }
    }

    public static final class IndexStats {
        private final int maxDocs;
        private final int numDeletedDocs;

        public IndexStats(int maxDocs, int numDeletedDocs) {
            super();
            this.maxDocs = maxDocs;
            this.numDeletedDocs = numDeletedDocs;
        }

        public int getMaxDocs() {
            return maxDocs;
        }

        public int getNumDeletedDocs() {
            return numDeletedDocs;
        }
    }

    public IndexStats getIndexStats() throws ServiceException {
        int maxDocs = 0;
        int numDeletedDocs = 0;
        try {
            Indexer indexer = indexStore.openIndexer();
            try {
                maxDocs = indexer.maxDocs();
            } finally {
                indexer.close();
            }
            numDeletedDocs = numDeletedDocs();
        } catch (IOException e) {
            throw ServiceException.FAILURE("Failed to open Indexer", e);
        }

        return new IndexStats(maxDocs, numDeletedDocs);
    }

    /**
     * Returns the number of deleted documents.
     * @return number of deleted docs for this index
     * @throws ServiceException
     */
    public int numDeletedDocs() throws ServiceException {
        try {
            ZimbraIndexSearcher searcher = indexStore.openSearcher();
            try {
                return searcher.getIndexReader().numDeletedDocs();
            } finally {
                Closeables.closeQuietly(searcher);
            }
        } catch (IOException e) {
            throw ServiceException.FAILURE("Failed to open Searcher", e);
        }
    }

    public synchronized ReIndexStatus getReIndexStatus() {
        return reIndex != null ? reIndex.status : null;
    }

    public boolean isReIndexInProgress() {
        return reIndex != null;
    }

    public boolean isCompactIndexInProgress() {
        return compactIndex != null;
    }

    /**
     * Executes a DB search in a mailbox transaction.
     */
    public List<DbSearch.Result> search(DbSearchConstraints constraints, DbSearch.FetchMode fetch, SortBy sort,
            int offset, int size, boolean inDumpster) throws ServiceException {
        List<DbSearch.Result> result;
        boolean success = false;
        try {
            mailbox.beginReadTransaction("search", null);
            result = new DbSearch(mailbox, inDumpster).search(mailbox.getOperationConnection(), constraints, sort,
                    offset, size, fetch);
            if (fetch == DbSearch.FetchMode.MAIL_ITEM) {
                // Convert UnderlyingData to MailItem
                ListIterator<DbSearch.Result> itr = result.listIterator();
                while (itr.hasNext()) {
                    DbSearch.Result sr = itr.next();
                    try {
                        MailItem item = mailbox.getItem(sr.getItemData());
                        itr.set(new ItemSearchResult(item, sr.getSortValue()));
                    } catch (ServiceException se) {
                        ZimbraLog.index.info(String.format(
                                "Problem constructing Result for folder=%s item=%s from UnderlyingData - dropping item",
                                sr.getItemData().folderId, sr.getItemData().id, sr.getId()), se);
                        itr.remove();
                    }
                }
            }
            success = true;
        } finally {
            mailbox.endTransaction(success);
        }
        return result;
    }

    /* These regexes really shouldn't be complicated - so this value should be way more than enough.
     * Leaving hard coded.  This is the number of accesses allowed to the underlying CharSequence before
     * deciding that too much resource has been used.
     */
    final private static int MAX_REGEX_ACCESSES = 100000;

    /**
     * Returns all domain names from the index.
     *
     * @param field Lucene field name (e.g. LuceneFields.L_H_CC)
     * @param regex matching pattern or null to match everything
     * @return {@link BrowseTerm}s which correspond to all of the domain terms stored in a given field
     */
    public List<BrowseTerm> getDomains(String field, String regex) throws IOException, ServiceException {
        Pattern pattern = Strings.isNullOrEmpty(regex) ? null
                : Pattern.compile(regex.startsWith("@") ? regex : "@" + regex);
        List<BrowseTerm> result = new ArrayList<BrowseTerm>();
        ZimbraIndexSearcher searcher = indexStore.openSearcher();
        TermFieldEnumeration values = null;
        try {
            values = searcher.getIndexReader().getTermsForField(field, "");
            while (values.hasMoreElements()) {
                BrowseTerm term = values.nextElement();
                if (term == null) {
                    break;
                }
                String text = term.getText();
                // Domains are tokenized with '@' prefix. Exclude partial domain tokens.
                if (text.startsWith("@") && text.contains(".")) {
                    if (pattern == null || AccessBoundedRegex.matches(text, pattern, MAX_REGEX_ACCESSES)) {
                        result.add(new BrowseTerm(text.substring(1), term.getFreq()));
                    }
                }
            }
        } finally {
            Closeables.closeQuietly(values);
            Closeables.closeQuietly(searcher);
        }
        return result;
    }

    /**
     * Returns all attachment types from the index.
     *
     * @param regex matching pattern or null to match everything
     * @return {@link BrowseTerm}s which correspond to all of the attachment types in the index
     */
    public List<BrowseTerm> getAttachmentTypes(String regex) throws IOException, ServiceException {
        Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex);
        List<BrowseTerm> result = new ArrayList<BrowseTerm>();
        ZimbraIndexSearcher searcher = indexStore.openSearcher();
        TermFieldEnumeration values = null;
        try {
            values = searcher.getIndexReader().getTermsForField(LuceneFields.L_ATTACHMENTS, "");
            while (values.hasMoreElements()) {
                BrowseTerm term = values.nextElement();
                if (pattern == null || AccessBoundedRegex.matches(term.getText(), pattern, MAX_REGEX_ACCESSES)) {
                    result.add(term);
                }
            }
        } finally {
            Closeables.closeQuietly(values);
            Closeables.closeQuietly(searcher);
        }
        return result;
    }

    /**
     * Returns all objects (e.g. PO, etc) from the index.
     *
     * @param regex matching pattern or null to match everything
     * @return {@link BrowseTerm}s which correspond to all of the objects in the index
     */
    public List<BrowseTerm> getObjects(String regex) throws IOException, ServiceException {
        Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex);
        List<BrowseTerm> result = new ArrayList<BrowseTerm>();
        ZimbraIndexSearcher searcher = indexStore.openSearcher();
        TermFieldEnumeration values = null;
        try {
            values = searcher.getIndexReader().getTermsForField(LuceneFields.L_OBJECTS, "");
            while (values.hasMoreElements()) {
                BrowseTerm term = values.nextElement();
                if (term == null) {
                    break;
                }
                if (pattern == null || AccessBoundedRegex.matches(term.getText(), pattern, MAX_REGEX_ACCESSES)) {
                    result.add(term);
                }
            }
        } finally {
            Closeables.closeQuietly(values);
            Closeables.closeQuietly(searcher);
        }
        return result;
    }

    /**
     * Returns the index deferred item count for the types.
     *
     * @param types item types, empty set means all types
     * @return index deferred count
     */
    private int getDeferredCount(Set<MailItem.Type> types) {
        SetMultimap<MailItem.Type, Integer> ids;
        try {
            ids = Multimaps.synchronizedSetMultimap(getDeferredIds());
        } catch (ServiceException e) {
            ZimbraLog.index.error("Failed to query deferred IDs", e);
            return 0;
        }

        if (ids == null || ids.isEmpty()) {
            return 0;
        } else if (types.isEmpty()) {
            return ids.size();
        } else {
            int total = 0;
            for (MailItem.Type type : types) {
                total += ids.get(type).size();
            }
            return total;
        }
    }

    private SetMultimap<MailItem.Type, Integer> getDeferredIds() throws ServiceException {
        if (deferredIds == null) {
            DbConnection conn = DbPool.getConnection(mailbox);
            try {
                deferredIds = DbMailItem.getIndexDeferredIds(conn, mailbox);
            } finally {
                conn.closeQuietly();
            }
        }
        return deferredIds;
    }

    private synchronized Collection<Integer> getDeferredIds(Set<MailItem.Type> types) throws ServiceException {
        SetMultimap<MailItem.Type, Integer> ids = getDeferredIds();
        if (ids == null || ids.isEmpty()) {
            return Collections.emptyList();
        } else if (types.isEmpty()) {
            return ImmutableSet.copyOf(ids.values());
        } else {
            ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
            for (MailItem.Type type : types) {
                Set<Integer> set = ids.get(type);
                if (set != null) {
                    builder.addAll(set);
                }
            }
            return builder.build();
        }
    }

    /**
     * Adds the item to the deferred queue.
     *
     * @param item item to index
     */
    synchronized void add(MailItem item) {
        switch (item.getIndexStatus()) {
        case NO:
            return;
        case DONE:
            item.mData.indexId = MailItem.IndexStatus.STALE.id();
            break;
        default:
            break;
        }

        if (deferredIds == null) {
            return;
        }

        deferredIds.put(item.getType(), item.getId());
        ZimbraLog.index.debug("deferredIds=%s", deferredIds);
    }

    synchronized void removeDeferredId(int id) {
        assert id > 0 : id;
        if (deferredIds == null) {
            return;
        }
        deferredIds.values().remove(id);
    }

    synchronized void removeDeferredId(Collection<Integer> ids) {
        if (deferredIds == null) {
            return;
        }
        deferredIds.values().removeAll(ids);
    }

    synchronized void clearDeferredIds() {
        deferredIds = null;
    }

    /**
     * Converts conversation type to message type if the type set contains it. We need to index message items when
     * a conversation search is requested.
     */
    private Set<MailItem.Type> toIndexTypes(Set<MailItem.Type> types) {
        if (types.contains(MailItem.Type.CONVERSATION)) {
            types = EnumSet.copyOf(types); // copy
            types.remove(MailItem.Type.CONVERSATION);
            types.add(MailItem.Type.MESSAGE);
        }
        return types;
    }

    /**
     * Batch index progress information. The counters are not thread safe.
     */
    private static class BatchStatus {
        private int total = -1;
        private int processed = 0;
        private int failed = 0;

        void setTotal(int value) {
            total = value;
        }

        void addProcessed(int delta) {
            processed += delta;
        }

        void addFailed(int delta) {
            failed += delta;
        }

        public int getTotal() {
            return total;
        }

        public int getProcessed() {
            return processed;
        }

        public int getFailed() {
            return failed;
        }

        boolean isCancelled() {
            return false;
        }

        @Override
        public String toString() {
            return Objects.toStringHelper(this).add("total", getTotal()).add("processed", getProcessed())
                    .add("failed", getFailed()).toString();
        }
    }

    /**
     * Re-index progress information. The counters are thread safe.
     */
    public static final class ReIndexStatus extends BatchStatus {
        private volatile int total = -1;
        private volatile int processed = 0;
        private volatile int failed = 0;
        private volatile boolean cancel = false;

        private ReIndexStatus() {
        }

        @Override
        void setTotal(int value) {
            total = value;
        }

        @Override
        void addProcessed(int delta) {
            processed += delta;
            if (processed % 2000 == 0) {
                ZimbraLog.index.info("Re-index progress %d/%d", processed, total);
            }
        }

        @Override
        void addFailed(int delta) {
            failed += delta;
        }

        @Override
        public int getTotal() {
            return total;
        }

        @Override
        public int getProcessed() {
            return processed;
        }

        @Override
        public int getFailed() {
            return failed;
        }

        void cancel() {
            cancel = true;
        }

        @Override
        boolean isCancelled() {
            return cancel;
        }
    }

    public static abstract class IndexTask implements Runnable {
        private final Mailbox mailbox;

        public IndexTask(Mailbox mbox) {
            mailbox = mbox;
        }

        @Override
        public final void run() {
            try {
                ZimbraLog.addMboxToContext(mailbox.getId());
                ZimbraLog.addAccountNameToContext(mailbox.getAccount().getName());
                exec();
            } catch (OutOfMemoryError e) {
                Zimbra.halt("out of memory", e);
            } catch (Throwable t) {
                ZimbraLog.index.error(t.getMessage(), t);
            } finally {
                ZimbraLog.clearContext();
            }
        }

        protected abstract void exec() throws Exception;
    }

    private final class BatchIndexTask extends IndexTask {

        BatchIndexTask() {
            super(mailbox);
        }

        @Override
        protected void exec() throws Exception {
            indexDeferredItems(EnumSet.noneOf(MailItem.Type.class), new BatchStatus(), false);
        }

    }

    private static final class ItemSearchResult extends DbSearch.Result {
        private final MailItem item;

        ItemSearchResult(MailItem item, Object sortkey) {
            super(sortkey);
            this.item = item;
        }

        @Override
        public int getId() {
            return item.getId();
        }

        @Override
        public int getIndexId() {
            return item.getIndexId();
        }

        @Override
        public Type getType() {
            return item.getType();
        }

        @Override
        public MailItem getItem() {
            return item;
        }
    }

}