com.vmware.dcp.services.common.LuceneBlobIndexService.java Source code

Java tutorial

Introduction

Here is the source code for com.vmware.dcp.services.common.LuceneBlobIndexService.java

Source

/*
 * Copyright (c) 2014-2015 VMware, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.  You may obtain a copy of
 * the License at http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, without warranties or
 * conditions of any kind, EITHER EXPRESS OR IMPLIED.  See the License for the
 * specific language governing permissions and limitations under the License.
 */

package com.vmware.dcp.services.common;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.EnumSet;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutorService;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.BytesRef;

import com.vmware.dcp.common.Operation;
import com.vmware.dcp.common.ServiceDocument;
import com.vmware.dcp.common.ServiceHost;
import com.vmware.dcp.common.ServiceHost.ServiceHostState.MemoryLimitType;
import com.vmware.dcp.common.StatelessService;
import com.vmware.dcp.common.UriUtils;
import com.vmware.dcp.common.Utils;

public class LuceneBlobIndexService extends StatelessService {
    public static enum BlobIndexOption {
        /**
         * Key is deleted after a successful query
         */
        SINGLE_USE_KEYS,

        /**
         * Index is created on start. If an older index exists, its deleted.
         */
        CREATE
    }

    public static Operation createPost(ServiceHost host, String key, Object blob) {
        return createPost(host, SELF_LINK, key, blob);
    }

    public static Operation createGet(ServiceHost host, String key) {
        return createGet(host, SELF_LINK, key);
    }

    protected static Operation createPost(ServiceHost host, String indexPath, String key, Object blob) {
        URI indexUri = UriUtils.buildUri(host, indexPath, LuceneBlobIndexService.URI_PARAM_NAME_KEY + "=" + key
                + "&" + LuceneBlobIndexService.URI_PARAM_NAME_UPDATE_TIME + "=" + Utils.getNowMicrosUtc());
        return Operation.createPost(indexUri).setBodyNoCloning(blob);
    }

    protected static Operation createGet(ServiceHost host, String indexPath, String key) {
        URI indexUri = UriUtils.buildUri(host, indexPath, LuceneBlobIndexService.URI_PARAM_NAME_KEY + "=" + key);
        return Operation.createGet(indexUri);
    }

    public static final String SELF_LINK = ServiceUriPaths.CORE_BLOB_INDEX;

    public static final String FILE_PATH = "lucene-blob-index";

    public static final String URI_PARAM_NAME_KEY = "key";

    private static final String URI_PARAM_NAME_UPDATE_TIME = "updateTime";

    private static final String LUCENE_FIELD_NAME_BINARY_CONTENT = "binaryContent";

    private String indexDirectory;

    private IndexSearcher searcher = null;
    private IndexWriter writer = null;

    private Object searchSync = new Object();

    private long searcherUpdateTimeMicros;

    private long indexUpdateTimeMicros;

    private EnumSet<BlobIndexOption> indexOptions;

    private Sort timeSort;

    private final FieldType longStoredField = LuceneDocumentIndexService.numericDocType(FieldType.NumericType.LONG,
            true);

    private int maxBinaryContextSizeBytes = 1024 * 1024;

    private ExecutorService executor;

    public LuceneBlobIndexService() {
        this.indexDirectory = FILE_PATH;
        this.indexOptions = EnumSet.noneOf(BlobIndexOption.class);
    }

    public LuceneBlobIndexService(EnumSet<BlobIndexOption> options, String indexDirectory) {
        super(ServiceDocument.class);
        super.toggleOption(ServiceOption.PERIODIC_MAINTENANCE, true);
        super.toggleOption(ServiceOption.INSTRUMENTATION, true);
        this.indexDirectory = indexDirectory;
        this.indexOptions = options;
    }

    @Override
    public void handleStart(final Operation post) {
        this.executor = getHost().allocateExecutor(this, 1);
        super.setMaintenanceIntervalMicros(getHost().getMaintenanceIntervalMicros() * 5);
        File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
        this.timeSort = new Sort(new SortField(URI_PARAM_NAME_UPDATE_TIME, SortField.Type.LONG, true));
        try {
            this.writer = createWriter(directory);
        } catch (IOException e) {
            post.fail(e);
            return;
        }
        post.complete();
    }

    public IndexWriter createWriter(File directory) throws IOException {
        Directory dir = MMapDirectory.open(directory.toPath());
        Analyzer analyzer = new SimpleAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (this.indexOptions.contains(BlobIndexOption.CREATE)) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
        if (totalMBs != null) {
            totalMBs = Math.max(1, totalMBs);
            iwc.setRAMBufferSizeMB(totalMBs);
        }
        IndexWriter w = new IndexWriter(dir, iwc);
        w.commit();
        return w;
    }

    @Override
    public void handleRequest(Operation op) {
        Action a = op.getAction();
        if (a == Action.PUT || a == Action.PATCH) {
            getHost().failRequestActionNotSupported(op);
            return;
        }

        this.executor.execute(() -> {
            try {
                switch (a) {
                case DELETE:
                    handleDelete(op);
                    break;
                case GET:
                    handleGet(op);
                    break;
                case POST:
                    handlePost(op);
                    break;
                case PUT:
                default:
                    break;
                }
            } catch (Throwable e) {
                op.fail(e);
            }
        });
    }

    @Override
    public void handleGet(Operation get) {
        try {
            Map<String, String> params = UriUtils.parseUriQueryParams(get.getUri());
            String key = params.get(URI_PARAM_NAME_KEY);

            if (key == null) {
                get.fail(new IllegalArgumentException("key query parameter is required"));
                return;
            }
            queryIndex(key, get);
        } catch (Throwable e) {
            logSevere(e);
            get.fail(e);
        }
    }

    private void queryIndex(String key, Operation op) throws Throwable {
        IndexWriter w = this.writer;
        if (w == null) {
            op.fail(new CancellationException());
            return;
        }

        IndexSearcher s = updateSearcher(key, w);
        Query linkQuery = new TermQuery(new Term(URI_PARAM_NAME_KEY, key));
        TopDocs hits = s.search(linkQuery, 1, this.timeSort, false, false);
        if (hits.totalHits == 0) {
            op.complete();
            return;
        }

        Document hitDoc = s.doc(hits.scoreDocs[0].doc);
        BytesRef content = hitDoc.getBinaryValue(LUCENE_FIELD_NAME_BINARY_CONTENT);
        long updateTime = Long.parseLong(hitDoc.get(URI_PARAM_NAME_UPDATE_TIME));
        Object hydratedInstance = Utils.fromBytes(content.bytes, content.offset, content.length);
        applyBlobRetentionPolicy(linkQuery, updateTime);
        op.setBodyNoCloning(hydratedInstance).complete();

    }

    protected void handlePost(Operation post) {
        if (post.isRemote()) {
            post.fail(new IllegalStateException("Remote requests not allowed"));
            return;
        }

        Map<String, String> params = UriUtils.parseUriQueryParams(post.getUri());
        String key = params.get(URI_PARAM_NAME_KEY);
        if (key == null) {
            post.fail(new IllegalArgumentException("key query parameter is required"));
            return;
        }

        String updateTimeParam = params.get(URI_PARAM_NAME_UPDATE_TIME);

        if (updateTimeParam == null) {
            post.fail(new IllegalArgumentException("update time query parameter is required"));
            return;
        }

        long updateTime = Long.parseLong(updateTimeParam);
        IndexWriter wr = this.writer;
        if (wr == null) {
            post.fail(new CancellationException());
            return;
        }

        try {
            Object content = post.getBodyRaw();
            if (content == null) {
                post.fail(new IllegalArgumentException("service instance is required"));
                return;
            }
            byte[] binaryContent = new byte[this.maxBinaryContextSizeBytes];
            int count = Utils.toBytes(content, binaryContent, 0);
            Document doc = new Document();
            Field binaryContentField = new StoredField(LUCENE_FIELD_NAME_BINARY_CONTENT, binaryContent, 0, count);
            doc.add(binaryContentField);
            Field keyField = new StringField(URI_PARAM_NAME_KEY, key, Field.Store.NO);
            doc.add(keyField);

            Field updateTimeField = new LongField(URI_PARAM_NAME_UPDATE_TIME, updateTime, this.longStoredField);
            doc.add(updateTimeField);
            wr.addDocument(doc);
            this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
            post.setBody(null).complete();
        } catch (Throwable e) {
            logSevere(e);
            post.fail(e);
        }
    }

    @Override
    public void handleDelete(Operation delete) {
        if (delete.hasBody()) {
            getHost().failRequestActionNotSupported(delete);
            return;
        }

        setProcessingStage(ProcessingStage.STOPPED);
        close(this.writer);
        this.writer = null;
        this.executor.shutdownNow();
        delete.complete();
    }

    private void close(IndexWriter wr) {
        try {
            if (wr == null) {
                return;
            }
            wr.commit();
            wr.close();
        } catch (Throwable e) {

        }
    }

    private IndexSearcher updateSearcher(String selfLink, IndexWriter w) throws IOException {
        IndexSearcher s = null;
        long now = Utils.getNowMicrosUtc();
        synchronized (this.searchSync) {
            s = this.searcher;
            if (s != null && this.searcherUpdateTimeMicros > this.indexUpdateTimeMicros) {
                return s;
            }
        }
        s = new IndexSearcher(DirectoryReader.open(w, true));
        synchronized (this.searchSync) {
            if (this.searcherUpdateTimeMicros < now) {
                this.searcher = s;
                this.searcherUpdateTimeMicros = now;
            }
            return this.searcher;
        }
    }

    private void applyBlobRetentionPolicy(Query linkQuery, long updateTime) throws IOException {
        IndexWriter wr = this.writer;
        if (wr == null) {
            return;
        }

        if (!this.indexOptions.contains(BlobIndexOption.SINGLE_USE_KEYS)) {
            return;
        }

        BooleanQuery bq = new BooleanQuery();
        bq.add(linkQuery, Occur.MUST);
        NumericRangeQuery<Long> timeQuery = NumericRangeQuery.newLongRange(URI_PARAM_NAME_UPDATE_TIME, null,
                updateTime, false, true);
        bq.add(timeQuery, Occur.MUST);
        wr.deleteDocuments(linkQuery);
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    }

    @Override
    public void handleMaintenance(Operation post) {
        this.executor.execute(() -> {
            handleMaintenanceSafe(post);
        });
    }

    private void handleMaintenanceSafe(Operation post) {
        try {
            IndexWriter w = this.writer;
            if (w == null) {
                post.complete();
                return;
            }
            w.commit();
            setStat(LuceneDocumentIndexService.STAT_NAME_INDEXED_DOCUMENT_COUNT, w.maxDoc());
            File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
            String[] list = directory.list();
            int count = list == null ? 0 : list.length;
            // for debugging use only: we need to verify that the number of index files stays bounded
            if (count > LuceneDocumentIndexService.INDEX_FILE_COUNT_THRESHOLD_FOR_REOPEN) {
                consolidateIndexFiles();
            }
            post.complete();
        } catch (Throwable e) {
            logSevere(e);
            post.fail(e);
        }
    }

    private void consolidateIndexFiles() throws IOException {
        IndexWriter w = this.writer;
        if (w == null) {
            return;
        }
        File directory = new File(new File(getHost().getStorageSandbox()), this.indexDirectory);
        String[] list = directory.list();
        int count = list == null ? 0 : list.length;
        try {
            logInfo("Before: File count: %d, document count: %d", count, w.maxDoc());
            w.close();
        } catch (Throwable e) {
        }

        this.writer = createWriter(directory);
        list = directory.list();
        count = list == null ? 0 : list.length;
        logInfo("After: File count: %d, document count: %d", count, w.maxDoc());

    }
}