org.openspaces.textsearch.LuceneTextSearchQueryExtensionManager.java Source code

Java tutorial

Introduction

Here is the source code for org.openspaces.textsearch.LuceneTextSearchQueryExtensionManager.java

Source

/*
 * Copyright (c) 2008-2016, GigaSpaces Technologies, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.openspaces.textsearch;

import com.gigaspaces.SpaceRuntimeException;
import com.gigaspaces.internal.io.FileUtils;
import com.gigaspaces.internal.utils.Assert;
import com.gigaspaces.metadata.SpaceTypeDescriptor;
import com.gigaspaces.query.extension.QueryExtensionEntryIterator;
import com.gigaspaces.query.extension.QueryExtensionManager;
import com.gigaspaces.query.extension.QueryExtensionRuntimeInfo;
import com.gigaspaces.query.extension.metadata.TypeQueryExtensions;
import com.gigaspaces.server.SpaceServerEntry;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;

import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * @author Vitaliy_Zinchenko
 * @since 12.1
 */
public class LuceneTextSearchQueryExtensionManager extends QueryExtensionManager {
    private static final Logger _logger = Logger.getLogger(LuceneTextSearchQueryExtensionManager.class.getName());

    public static final String SEARCH_OPERATION_NAME = "match";

    protected static final String XAP_ID = "XAP_ID";
    private static final FieldType XAP_ID_TYPE = toFieldType(Field.Store.YES, IndexOptions.NONE, true);
    protected static final String XAP_ID_VERSION = "XAP_ID_VERSION";
    private static final FieldType XAP_ID_VERSION_TYPE = toFieldType(Field.Store.YES,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, false);

    protected final String _namespace;
    protected final Map<String, LuceneTextSearchTypeIndex> _luceneHolderMap = new ConcurrentHashMap<String, LuceneTextSearchTypeIndex>();
    protected final LuceneTextSearchConfiguration _luceneConfiguration;

    protected LuceneTextSearchQueryExtensionManager(LuceneTextSearchQueryExtensionProvider provider,
            QueryExtensionRuntimeInfo info, LuceneTextSearchConfiguration configuration) {
        super(info);
        _namespace = provider.getNamespace();
        _luceneConfiguration = configuration;
        File location = new File(_luceneConfiguration.getLocation());
        FileUtils.deleteFileOrDirectoryIfExists(location);
    }

    @Override
    public void close() throws IOException {
        for (LuceneTextSearchTypeIndex luceneHolder : _luceneHolderMap.values())
            luceneHolder.close();

        _luceneHolderMap.clear();
        FileUtils.deleteFileOrDirectoryIfExists(new File(_luceneConfiguration.getLocation()));
        super.close();
    }

    @Override
    public void registerType(SpaceTypeDescriptor typeDescriptor) {
        super.registerType(typeDescriptor);
        final String typeName = typeDescriptor.getTypeName();
        if (!_luceneHolderMap.containsKey(typeName)) {
            try {
                _luceneHolderMap.put(typeName,
                        new LuceneTextSearchTypeIndex(_luceneConfiguration, _namespace, typeDescriptor));
            } catch (IOException e) {
                throw new SpaceRuntimeException("Failed to register type " + typeName, e);
            }
        } else {
            _logger.log(Level.WARNING, "Type [" + typeName + "] is already registered");
        }
    }

    protected LuceneTextSearchTypeIndex createTypeIndex(LuceneTextSearchConfiguration luceneConfig,
            String namespace, SpaceTypeDescriptor typeDescriptor) throws IOException {
        return new LuceneTextSearchTypeIndex(luceneConfig, namespace, typeDescriptor);
    }

    @Override
    public boolean accept(String typeName, String path, String operation, Object gridValue, Object luceneQuery) {
        Assert.notNull(gridValue, "Provided value from grid is null");
        Assert.notNull(luceneQuery, "Provided lucene query is null");
        validateOperationName(operation);

        if (_logger.isLoggable(Level.FINE))
            _logger.log(Level.FINE, "filter [operation=" + operation + ", leftOperand(value from grid)=" + gridValue
                    + ", rightOperand(lucene query)=" + luceneQuery + "]");

        try {
            Analyzer analyzer = getAnalyzer(typeName, path);
            MemoryIndex index = new MemoryIndex();
            index.addField("content", String.valueOf(gridValue), analyzer);
            Query query = new QueryParser("content", analyzer).parse(String.valueOf(luceneQuery));
            float score = index.search(query);
            return score > 0.0f;
        } catch (ParseException e) {
            throw new SpaceRuntimeException("Could not parse full text query [ " + luceneQuery + " ]", e);
        }
    }

    @Override
    public boolean insertEntry(SpaceServerEntry entry, boolean hasPrevious) {
        final String typeName = entry.getSpaceTypeDescriptor().getTypeName();
        final LuceneTextSearchTypeIndex luceneHolder = _luceneHolderMap.get(typeName);
        try {
            final Document doc = createDocumentIfNeeded(luceneHolder, entry);
            // Add new
            if (doc != null)
                luceneHolder.getIndexWriter().addDocument(doc);
            // Delete old
            if (hasPrevious) {
                TermQuery query = new TermQuery(
                        new Term(XAP_ID_VERSION, concat(entry.getUid(), entry.getVersion() - 1)));
                luceneHolder.getIndexWriter().deleteDocuments(query);
            }
            // Flush
            if (doc != null || hasPrevious)
                luceneHolder.commit(false);
            return doc != null;
        } catch (Exception e) {
            String operation = hasPrevious ? "update" : "insert";
            throw new SpaceRuntimeException(
                    "Failed to " + operation + " entry of type " + typeName + " with id [" + entry.getUid() + "]",
                    e);
        }
    }

    protected String concat(String uid, int version) {
        return uid + "_" + version;
    }

    protected Document createDocumentIfNeeded(LuceneTextSearchTypeIndex luceneHolder, SpaceServerEntry entry) {
        TypeQueryExtensions queryExtensions = entry.getSpaceTypeDescriptor().getQueryExtensions();
        Document doc = null;
        for (String path : luceneHolder.getQueryExtensionInfo().getPaths()) {
            if (!queryExtensions.isIndexed(_namespace, path)) {
                continue;
            }
            final Object fieldValue = entry.getPathValue(path);
            if (fieldValue == null) {
                continue;
            }
            Field[] fields = convertField(path, fieldValue);
            if (doc == null && fields.length != 0) {
                doc = new Document();
            }
            for (Field field : fields) {
                doc.add(field);
            }
        }
        if (doc != null) {
            //cater for uid & version
            doc.add(new Field(XAP_ID, entry.getUid(), XAP_ID_TYPE));
            doc.add(new Field(XAP_ID_VERSION, concat(entry.getUid(), entry.getVersion()), XAP_ID_VERSION_TYPE));
        }

        return doc;
    }

    protected Field[] convertField(String path, Object fieldValue) {
        if (!(fieldValue instanceof String)) {
            throw new IllegalArgumentException("Field '" + path + "' with value '" + fieldValue
                    + "' is not String. " + "Try to use 'path' of the @" + SpaceTextAnalyzer.class.getSimpleName()
                    + " or @" + SpaceTextIndex.class.getSimpleName());
        }
        Field field = new TextField(path, (String) fieldValue, Field.Store.NO);
        return new Field[] { field };
    }

    private Analyzer getAnalyzer(String typeName, String path) {
        LuceneTextSearchTypeIndex typeIndex = _luceneHolderMap.get(typeName);
        return typeIndex != null ? typeIndex.getAnalyzerForPath(path) : _luceneConfiguration.getDefaultAnalyzer();
    }

    @Override
    public QueryExtensionEntryIterator queryByIndex(String typeName, String path, String operationName,
            Object operand) {
        if (_logger.isLoggable(Level.FINE))
            _logger.log(Level.FINE, "query [typeName=" + typeName + ", path=" + path + ", operation="
                    + operationName + ", operand=" + operand + "]");

        final Query query = createQuery(typeName, path, operationName, operand);
        final LuceneTextSearchTypeIndex luceneHolder = _luceneHolderMap.get(typeName);
        try {
            // Flush
            luceneHolder.commit(true);

            DirectoryReader dr = DirectoryReader.open(luceneHolder.getDirectory());
            IndexSearcher is = new IndexSearcher(dr);
            ScoreDoc[] scores = is.search(query, _luceneConfiguration.getMaxResults()).scoreDocs;
            return new LuceneQueryExtensionEntryIterator(scores, is, dr);
        } catch (IOException e) {
            throw new SpaceRuntimeException("Failed to scan index", e);
        }
    }

    protected Query createQuery(String typeName, String path, String operationName, Object operand) {
        Assert.notNull(operand, "Provided operand is null");
        validateOperationName(operationName);
        try {
            LuceneTextSearchTypeIndex typeIndex = _luceneHolderMap.get(typeName);
            Analyzer analyzer = typeIndex.getAnalyzerForPath(path);
            return new QueryParser(path, analyzer).parse(path + ":" + operand);
        } catch (ParseException e) {
            throw new IllegalArgumentException("Couldn't create full text search query for path=" + path
                    + " operationName=" + operationName + " operand=" + operand, e);
        }
    }

    private void validateOperationName(String operationName) {
        if (!SEARCH_OPERATION_NAME.equals(operationName)) {
            throw new IllegalArgumentException("Provided operationName=" + operationName
                    + " is incorrect. Correct one is '" + SEARCH_OPERATION_NAME + "'");
        }
    }

    @Override
    public void removeEntry(SpaceTypeDescriptor typeDescriptor, String uid, int version) {
        final String typeName = typeDescriptor.getTypeName();
        final LuceneTextSearchTypeIndex luceneHolder = _luceneHolderMap.get(typeName);
        try {
            luceneHolder.getIndexWriter()
                    .deleteDocuments(new TermQuery(new Term(XAP_ID_VERSION, concat(uid, version))));
            luceneHolder.commit(false);
        } catch (IOException e) {
            throw new SpaceRuntimeException("Failed to remove entry of type " + typeName, e);
        }
    }

    private static FieldType toFieldType(Field.Store store, IndexOptions index, boolean analyzed) {
        final FieldType ft = new FieldType();

        ft.setStored(store == Field.Store.YES);
        ft.setIndexOptions(index);
        ft.setTokenized(analyzed);

        ft.freeze();
        return ft;
    }
}