com.qwazr.search.index.IndexInstance.java Source code

Java tutorial

Introduction

Here is the source code for com.qwazr.search.index.IndexInstance.java

Source

/**
 * Copyright 2015-2016 Emmanuel Keller / QWAZR
 * <p>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.qwazr.search.index;

import com.qwazr.search.analysis.AnalyzerContext;
import com.qwazr.search.analysis.AnalyzerDefinition;
import com.qwazr.search.analysis.CustomAnalyzer;
import com.qwazr.search.analysis.UpdatableAnalyzer;
import com.qwazr.search.field.FieldDefinition;
import com.qwazr.utils.IOUtils;
import com.qwazr.utils.StringUtils;
import com.qwazr.utils.json.JsonMapper;
import com.qwazr.utils.server.ServerException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.ws.rs.core.Response;
import java.io.Closeable;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
import java.util.concurrent.Semaphore;
import java.util.function.BiConsumer;

final public class IndexInstance implements Closeable {

    private static final Logger logger = LoggerFactory.getLogger(IndexInstance.class);

    private final static String INDEX_DATA = "data";
    private final static String INDEX_BACKUP = "backup";
    private final static String FIELDS_FILE = "fields.json";
    private final static String ANALYZERS_FILE = "analyzers.json";
    private final static String SETTINGS_FILE = "settings.json";

    private final FileSet fileSet;

    private final SchemaInstance schema;
    private final Directory dataDirectory;
    private final LiveIndexWriterConfig indexWriterConfig;
    private final SnapshotDeletionPolicy snapshotDeletionPolicy;
    private final IndexWriter indexWriter;
    private final SearcherManager searcherManager;
    private final IndexSettingsDefinition settings;

    private final UpdatableAnalyzer indexAnalyzer;
    private final UpdatableAnalyzer queryAnalyzer;
    private volatile LinkedHashMap<String, FieldDefinition> fieldMap;
    private volatile LinkedHashMap<String, AnalyzerDefinition> analyzerMap;

    private volatile Pair<IndexReader, SortedSetDocValuesReaderState> facetsReaderStateCache;

    private IndexInstance(SchemaInstance schema, Directory dataDirectory, IndexSettingsDefinition settings,
            LinkedHashMap<String, AnalyzerDefinition> analyzerMap, LinkedHashMap<String, FieldDefinition> fieldMap,
            FileSet fileSet, IndexWriter indexWriter, SearcherManager searcherManager,
            UpdatableAnalyzer queryAnalyzer) {
        this.schema = schema;
        this.fileSet = fileSet;
        this.dataDirectory = dataDirectory;
        this.analyzerMap = analyzerMap;
        this.fieldMap = fieldMap;
        this.indexWriter = indexWriter;
        this.indexWriterConfig = indexWriter.getConfig();
        this.indexAnalyzer = (UpdatableAnalyzer) indexWriterConfig.getAnalyzer();
        this.queryAnalyzer = queryAnalyzer;
        this.snapshotDeletionPolicy = (SnapshotDeletionPolicy) indexWriterConfig.getIndexDeletionPolicy();
        this.settings = settings;
        this.searcherManager = searcherManager;
        this.facetsReaderStateCache = null;
    }

    private static class FileSet {

        private final File settingsFile;
        private final File indexDirectory;
        private final File backupDirectory;
        private final File dataDirectory;
        private final File analyzerMapFile;
        private final File fieldMapFile;

        private FileSet(File indexDirectory) {
            this.indexDirectory = indexDirectory;
            this.backupDirectory = new File(indexDirectory, INDEX_BACKUP);
            this.dataDirectory = new File(indexDirectory, INDEX_DATA);
            this.analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE);
            this.fieldMapFile = new File(indexDirectory, FIELDS_FILE);
            this.settingsFile = new File(indexDirectory, SETTINGS_FILE);
        }
    }

    /**
     * @param schema
     * @param indexDirectory
     * @return
     */
    final static IndexInstance newInstance(SchemaInstance schema, File indexDirectory,
            IndexSettingsDefinition settings)
            throws ServerException, IOException, ReflectiveOperationException, InterruptedException {
        UpdatableAnalyzer indexAnalyzer = null;
        UpdatableAnalyzer queryAnalyzer = null;
        IndexWriter indexWriter = null;
        Directory dataDirectory = null;
        try {

            if (!indexDirectory.exists())
                indexDirectory.mkdir();
            if (!indexDirectory.isDirectory())
                throw new IOException(
                        "This name is not valid. No directory exists for this location: " + indexDirectory);

            FileSet fileSet = new FileSet(indexDirectory);

            //Loading the settings
            if (settings == null) {
                settings = fileSet.settingsFile.exists()
                        ? JsonMapper.MAPPER.readValue(fileSet.settingsFile, IndexSettingsDefinition.class)
                        : IndexSettingsDefinition.EMPTY;
            } else {
                JsonMapper.MAPPER.writeValue(fileSet.settingsFile, settings);
            }

            //Loading the fields
            File fieldMapFile = new File(indexDirectory, FIELDS_FILE);
            LinkedHashMap<String, FieldDefinition> fieldMap = fieldMapFile.exists()
                    ? JsonMapper.MAPPER.readValue(fieldMapFile, FieldDefinition.MapStringFieldTypeRef)
                    : new LinkedHashMap<>();

            //Loading the fields
            File analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE);
            LinkedHashMap<String, AnalyzerDefinition> analyzerMap = analyzerMapFile.exists()
                    ? JsonMapper.MAPPER.readValue(analyzerMapFile, AnalyzerDefinition.MapStringAnalyzerTypeRef)
                    : new LinkedHashMap<>();

            AnalyzerContext context = new AnalyzerContext(analyzerMap, fieldMap);
            indexAnalyzer = new UpdatableAnalyzer(context, context.indexAnalyzerMap);
            queryAnalyzer = new UpdatableAnalyzer(context, context.queryAnalyzerMap);

            // Open and lock the data directory
            dataDirectory = FSDirectory.open(fileSet.dataDirectory.toPath());

            // Set
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(indexAnalyzer);
            if (settings != null && settings.similarity_class != null)
                indexWriterConfig.setSimilarity(IndexUtils.findSimilarity(settings.similarity_class));
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(
                    indexWriterConfig.getIndexDeletionPolicy());
            indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy);
            indexWriter = new IndexWriter(dataDirectory, indexWriterConfig);
            if (indexWriter.hasUncommittedChanges())
                indexWriter.commit();

            // Finally we build the SearchSearcherManger
            SearcherManager searcherManager = new SearcherManager(indexWriter, true, null);

            return new IndexInstance(schema, dataDirectory, settings, analyzerMap, fieldMap, fileSet, indexWriter,
                    searcherManager, queryAnalyzer);
        } catch (IOException | ServerException | ReflectiveOperationException | InterruptedException e) {
            // We failed in opening the index. We close everything we can
            if (queryAnalyzer != null)
                IOUtils.closeQuietly(queryAnalyzer);
            if (indexAnalyzer != null)
                IOUtils.closeQuietly(indexAnalyzer);
            if (indexWriter != null)
                IOUtils.closeQuietly(indexWriter);
            if (dataDirectory != null)
                IOUtils.closeQuietly(dataDirectory);
            throw e;
        }
    }

    public IndexSettingsDefinition getSettings() {
        return settings;
    }

    @Override
    public void close() {
        IOUtils.closeQuietly(searcherManager);
        if (indexWriter.isOpen())
            IOUtils.closeQuietly(indexWriter);
        IOUtils.closeQuietly(dataDirectory);
    }

    /**
     * Delete the index. The directory is deleted from the local file system.
     */
    void delete() {
        close();
        if (fileSet.indexDirectory.exists())
            FileUtils.deleteQuietly(fileSet.indexDirectory);
    }

    private IndexStatus getIndexStatus() throws IOException {
        final IndexSearcher indexSearcher = searcherManager.acquire();
        try {
            return new IndexStatus(indexSearcher.getIndexReader(), settings, analyzerMap.keySet(),
                    fieldMap.keySet());
        } finally {
            searcherManager.release(indexSearcher);
        }
    }

    LinkedHashMap<String, FieldDefinition> getFields() {
        return fieldMap;
    }

    IndexStatus getStatus() throws IOException, InterruptedException {
        final Semaphore sem = schema.acquireReadSemaphore();
        try {
            return getIndexStatus();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    synchronized void setFields(LinkedHashMap<String, FieldDefinition> fields) throws ServerException, IOException {
        AnalyzerContext analyzerContext = new AnalyzerContext(analyzerMap, fields);
        indexAnalyzer.update(analyzerContext, analyzerContext.indexAnalyzerMap);
        queryAnalyzer.update(analyzerContext, analyzerContext.queryAnalyzerMap);
        JsonMapper.MAPPER.writeValue(fileSet.fieldMapFile, fields);
        fieldMap = fields;
    }

    void setField(String field_name, FieldDefinition field) throws IOException, ServerException {
        LinkedHashMap<String, FieldDefinition> fields = (LinkedHashMap<String, FieldDefinition>) fieldMap.clone();
        fields.put(field_name, field);
        setFields(fields);
    }

    void deleteField(String field_name) throws IOException, ServerException {
        LinkedHashMap<String, FieldDefinition> fields = (LinkedHashMap<String, FieldDefinition>) fieldMap.clone();
        if (fields.remove(field_name) == null)
            throw new ServerException(Response.Status.NOT_FOUND, "Field not found: " + field_name);
        setFields(fields);
    }

    LinkedHashMap<String, AnalyzerDefinition> getAnalyzers() {
        return analyzerMap;
    }

    synchronized void setAnalyzers(LinkedHashMap<String, AnalyzerDefinition> analyzers)
            throws ServerException, IOException {
        AnalyzerContext analyzerContext = new AnalyzerContext(analyzers, fieldMap);
        indexAnalyzer.update(analyzerContext, analyzerContext.indexAnalyzerMap);
        queryAnalyzer.update(analyzerContext, analyzerContext.queryAnalyzerMap);
        JsonMapper.MAPPER.writeValue(fileSet.analyzerMapFile, analyzers);
        analyzerMap = analyzers;
    }

    void setAnalyzer(String analyzerName, AnalyzerDefinition analyzer) throws IOException, ServerException {
        LinkedHashMap<String, AnalyzerDefinition> analyzers = (LinkedHashMap<String, AnalyzerDefinition>) analyzerMap
                .clone();
        analyzers.put(analyzerName, analyzer);
        setAnalyzers(analyzers);
    }

    List<TermDefinition> testAnalyzer(String analyzerName, String text)
            throws ServerException, InterruptedException, ReflectiveOperationException, IOException {
        AnalyzerDefinition analyzerDefinition = analyzerMap.get(analyzerName);
        if (analyzerDefinition == null)
            throw new ServerException(Response.Status.NOT_FOUND, "Analyzer not found: " + analyzerName);
        Analyzer analyzer = new CustomAnalyzer(analyzerDefinition);
        try {
            return TermDefinition.buildTermList(analyzer, StringUtils.EMPTY, text);
        } finally {
            analyzer.close();
        }
    }

    void deleteAnalyzer(String analyzerName) throws IOException, ServerException {
        LinkedHashMap<String, AnalyzerDefinition> analyzers = (LinkedHashMap<String, AnalyzerDefinition>) analyzerMap
                .clone();
        if (analyzers.remove(analyzerName) == null)
            throw new ServerException(Response.Status.NOT_FOUND, "Analyzer not found: " + analyzerName);
        setAnalyzers(analyzers);
    }

    public Analyzer getIndexAnalyzer(String field) throws ServerException, IOException {
        return indexAnalyzer.getWrappedAnalyzer(field);
    }

    public Analyzer getQueryAnalyzer(String field) throws ServerException, IOException {
        return queryAnalyzer.getWrappedAnalyzer(field);
    }

    private void nrtCommit() throws IOException, ServerException {
        indexWriter.commit();
        searcherManager.maybeRefresh();
        schema.mayBeRefresh();
    }

    final synchronized BackupStatus backup(Integer keepLastCount) throws IOException, InterruptedException {
        Semaphore sem = schema.acquireReadSemaphore();
        try {
            File backupdir = null;
            final IndexCommit commit = snapshotDeletionPolicy.snapshot();
            try {
                int files_count = 0;
                long bytes_size = 0;
                if (!fileSet.backupDirectory.exists())
                    fileSet.backupDirectory.mkdir();
                backupdir = new File(fileSet.backupDirectory, Long.toString(commit.getGeneration()));
                if (!backupdir.exists())
                    backupdir.mkdir();
                if (!backupdir.exists())
                    throw new IOException("Cannot create the backup directory: " + backupdir);
                for (String fileName : commit.getFileNames()) {
                    File sourceFile = new File(fileSet.dataDirectory, fileName);
                    File targetFile = new File(backupdir, fileName);
                    files_count++;
                    bytes_size += sourceFile.length();
                    if (targetFile.exists() && targetFile.length() == sourceFile.length()
                            && targetFile.lastModified() == sourceFile.lastModified())
                        continue;
                    FileUtils.copyFile(sourceFile, targetFile, true);
                }
                purgeBackups(keepLastCount);
                return new BackupStatus(commit.getGeneration(), backupdir.lastModified(), bytes_size, files_count);
            } catch (IOException e) {
                if (backupdir != null)
                    FileUtils.deleteQuietly(backupdir);
                throw e;
            } finally {
                snapshotDeletionPolicy.release(commit);
            }
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    private void purgeBackups(Integer keepLastCount) {
        if (keepLastCount == null)
            return;
        if (keepLastCount == 0)
            return;
        List<BackupStatus> backups = backups();
        if (backups.size() <= keepLastCount)
            return;
        for (int i = keepLastCount; i < backups.size(); i++) {
            File backupDir = new File(fileSet.backupDirectory, Long.toString(backups.get(i).generation));
            FileUtils.deleteQuietly(backupDir);
        }
    }

    private List<BackupStatus> backups() {
        List<BackupStatus> list = new ArrayList<BackupStatus>();
        if (!fileSet.backupDirectory.exists())
            return list;
        File[] dirs = fileSet.backupDirectory.listFiles((FileFilter) DirectoryFileFilter.INSTANCE);
        if (dirs == null)
            return list;
        for (File dir : dirs) {
            BackupStatus status = BackupStatus.newBackupStatus(dir);
            if (status != null)
                list.add(status);
        }
        list.sort(new Comparator<BackupStatus>() {
            @Override
            public int compare(BackupStatus o1, BackupStatus o2) {
                return o2.generation.compareTo(o1.generation);
            }
        });
        return list;
    }

    final List<BackupStatus> getBackups() throws InterruptedException {
        final Semaphore sem = schema.acquireReadSemaphore();
        try {
            return backups();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final void deleteAll() throws IOException, InterruptedException, ServerException {
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            indexWriter.deleteAll();
            nrtCommit();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    private final RecordsPoster.UpdateObjectDocument getDocumentPoster(final Map<String, Field> fields) {
        return new RecordsPoster.UpdateObjectDocument(fields, indexAnalyzer.getContext(), indexWriter);
    }

    private final RecordsPoster.UpdateMapDocument getDocumentPoster() {
        return new RecordsPoster.UpdateMapDocument(indexAnalyzer.getContext(), indexWriter);
    }

    private final RecordsPoster.UpdateObjectDocValues getDocValuesPoster(final Map<String, Field> fields) {
        return new RecordsPoster.UpdateObjectDocValues(fields, indexAnalyzer.getContext(), indexWriter);
    }

    private final RecordsPoster.UpdateMapDocValues getDocValuesPoster() {
        return new RecordsPoster.UpdateMapDocValues(indexAnalyzer.getContext(), indexWriter);
    }

    final <T> Object postDocument(final Map<String, Field> fields, final T document)
            throws IOException, InterruptedException {
        if (document == null)
            return null;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            schema.checkSize(1);
            RecordsPoster.UpdateObjectDocument poster = getDocumentPoster(fields);
            poster.accept(document);
            Object id = poster.ids.isEmpty() ? null : poster.ids.iterator().next();
            nrtCommit();
            return id;
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final Object postMappedDocument(final Map<String, Object> document) throws IOException, InterruptedException {
        if (document == null || document.isEmpty())
            return null;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            schema.checkSize(1);
            RecordsPoster.UpdateMapDocument poster = getDocumentPoster();
            poster.accept(document);
            Object id = poster.ids.isEmpty() ? null : poster.ids.iterator().next();
            nrtCommit();
            return id;
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final Collection<Object> postMappedDocuments(final Collection<Map<String, Object>> documents)
            throws IOException, InterruptedException {
        if (documents == null || documents.isEmpty())
            return null;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            schema.checkSize(documents.size());
            RecordsPoster.UpdateMapDocument poster = getDocumentPoster();
            documents.forEach(poster);
            nrtCommit();
            return poster.ids;
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final <T> Collection<Object> postDocuments(final Map<String, Field> fields, final Collection<T> documents)
            throws IOException, InterruptedException {
        if (documents == null || documents.isEmpty())
            return null;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            schema.checkSize(documents.size());
            RecordsPoster.UpdateObjectDocument poster = getDocumentPoster(fields);
            documents.forEach(poster);
            nrtCommit();
            return poster.ids;
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final <T> void updateDocValues(final Map<String, Field> fields, final T document)
            throws InterruptedException, IOException {
        if (document == null)
            return;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            RecordsPoster.UpdateObjectDocValues poster = getDocValuesPoster(fields);
            poster.accept(document);
            nrtCommit();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final void updateMappedDocValues(final Map<String, Object> document) throws IOException, InterruptedException {
        if (document == null || document.isEmpty())
            return;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            RecordsPoster.UpdateMapDocValues poster = getDocValuesPoster();
            poster.accept(document);
            nrtCommit();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final <T> void updateDocsValues(final Map<String, Field> fields, final Collection<T> documents)
            throws IOException, InterruptedException {
        if (documents == null || documents.isEmpty())
            return;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            RecordsPoster.UpdateObjectDocValues poster = getDocValuesPoster(fields);
            documents.forEach(poster);
            nrtCommit();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final void updateMappedDocsValues(final Collection<Map<String, Object>> documents)
            throws IOException, ServerException, InterruptedException {
        if (documents == null || documents.isEmpty())
            return;
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            RecordsPoster.UpdateMapDocValues poster = getDocValuesPoster();
            documents.forEach(poster);
            nrtCommit();
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    final ResultDefinition.WithMap deleteByQuery(final QueryDefinition queryDefinition)
            throws IOException, InterruptedException, QueryNodeException, ParseException, ServerException,
            ReflectiveOperationException {
        final Semaphore sem = schema.acquireWriteSemaphore();
        try {
            final QueryContext queryContext = new QueryContext(null, queryAnalyzer, null, queryDefinition);
            final Query query = QueryUtils.getLuceneQuery(queryContext);
            int docs = indexWriter.numDocs();
            indexWriter.deleteDocuments(query);
            nrtCommit();
            docs -= indexWriter.numDocs();
            return new ResultDefinition.WithMap(docs);
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    private synchronized SortedSetDocValuesReaderState getFacetsState(final IndexReader indexReader)
            throws IOException {
        Pair<IndexReader, SortedSetDocValuesReaderState> current = facetsReaderStateCache;
        if (current != null && current.getLeft() == indexReader)
            return current.getRight();
        SortedSetDocValuesReaderState newState = IndexUtils.getNewFacetsState(indexReader);
        facetsReaderStateCache = Pair.of(indexReader, newState);
        return newState;
    }

    final private QueryContext buildQueryContext(final IndexSearcher indexSearcher,
            final QueryDefinition queryDefinition) throws IOException {
        indexSearcher.setSimilarity(indexWriterConfig.getSimilarity());
        final SortedSetDocValuesReaderState facetsState = getFacetsState(indexSearcher.getIndexReader());
        return new QueryContext(indexSearcher, queryAnalyzer, facetsState, queryDefinition);
    }

    final ResultDefinition search(final QueryDefinition queryDefinition,
            ResultDocumentBuilder.BuilderFactory<?> documentBuilderFactory) throws IOException,
            InterruptedException, ParseException, ReflectiveOperationException, QueryNodeException {
        final Semaphore sem = schema.acquireReadSemaphore();
        try {
            final IndexSearcher indexSearcher = searcherManager.acquire();
            try {
                return QueryUtils.search(buildQueryContext(indexSearcher, queryDefinition), documentBuilderFactory);
            } finally {
                searcherManager.release(indexSearcher);
            }
        } finally {
            if (sem != null)
                sem.release();
        }
    }

    Directory getDataDirectory() {
        return dataDirectory;
    }

    void fillFields(final Map<String, FieldDefinition> fields) {
        if (fields == null)
            return;
        this.fieldMap.forEach(new BiConsumer<String, FieldDefinition>() {
            @Override
            public void accept(String name, FieldDefinition fieldDefinition) {
                if (!fields.containsKey(name))
                    fields.put(name, fieldDefinition);
            }
        });
    }

    void fillAnalyzers(final Map<String, AnalyzerDefinition> analyzers) {
        if (analyzers == null)
            return;
        this.analyzerMap.forEach(new BiConsumer<String, AnalyzerDefinition>() {
            @Override
            public void accept(String name, AnalyzerDefinition analyzerDefinition) {
                if (!analyzers.containsKey(name))
                    analyzers.put(name, analyzerDefinition);
            }
        });
    }

}