org.apache.carbondata.datamap.lucene.LuceneCoarseGrainDataMap.java Source code

Introduction

Here is the source code for org.apache.carbondata.datamap.lucene.LuceneCoarseGrainDataMap.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.carbondata.datamap.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.carbondata.common.annotations.InterfaceAudience;
import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.datamap.dev.DataMapModel;
import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.indexstore.Blocklet;
import org.apache.carbondata.core.indexstore.PartitionSpec;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.solr.store.hdfs.HdfsDirectory;

@InterfaceAudience.Internal
public class LuceneCoarseGrainDataMap extends CoarseGrainDataMap {

    /**
     * log information
     */
    private static final LogService LOGGER = LogServiceFactory
            .getLogService(LuceneCoarseGrainDataMap.class.getName());

    public static final int BLOCKID_ID = 0;

    public static final int BLOCKLETID_ID = 1;

    public static final int PAGEID_ID = 2;

    public static final int ROWID_ID = 3;
    /**
     * searcher object for this datamap
     */
    private IndexSearcher indexSearcher = null;

    /**
     * default max values to return
     */
    private static int MAX_RESULT_NUMBER = 100;

    /**
     * analyzer for lucene index
     */
    private Analyzer analyzer;

    LuceneCoarseGrainDataMap(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    /**
     * It is called to load the data map to memory or to initialize it.
     */
    @Override
    public void init(DataMapModel dataMapModel) throws MemoryException, IOException {
        // get this path from file path
        Path indexPath = FileFactory.getPath(dataMapModel.getFilePath());

        LOGGER.info("Lucene index read path " + indexPath.toString());

        // get file system , use hdfs file system , realized in solr project
        FileSystem fs = FileFactory.getFileSystem(indexPath);

        // check this path valid
        if (!fs.exists(indexPath)) {
            String errorMessage = String.format("index directory %s not exists.", indexPath);
            LOGGER.error(errorMessage);
            throw new IOException(errorMessage);
        }

        if (!fs.isDirectory(indexPath)) {
            String errorMessage = String.format("error index path %s, must be directory", indexPath);
            LOGGER.error(errorMessage);
            throw new IOException(errorMessage);
        }

        // open this index path , use HDFS default configuration
        Directory indexDir = new HdfsDirectory(indexPath, FileFactory.getConfiguration());

        IndexReader indexReader = DirectoryReader.open(indexDir);
        if (indexReader == null) {
            throw new RuntimeException("failed to create index reader object");
        }

        // create a index searcher object
        indexSearcher = new IndexSearcher(indexReader);
    }

    /**
     * Prune the datamap with filter expression. It returns the list of
     * blocklets where these filters can exist.
     */
    @Override
    public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties,
            List<PartitionSpec> partitions) throws IOException {

        // convert filter expr into lucene list query
        List<String> fields = new ArrayList<String>();

        // only for test , query all data
        String strQuery = "*:*";

        String[] sFields = new String[fields.size()];
        fields.toArray(sFields);

        // get analyzer
        if (analyzer == null) {
            analyzer = new StandardAnalyzer();
        }

        // use MultiFieldQueryParser to parser query
        QueryParser queryParser = new MultiFieldQueryParser(sFields, analyzer);
        Query query;
        try {
            query = queryParser.parse(strQuery);
        } catch (ParseException e) {
            String errorMessage = String.format("failed to filter block with query %s, detail is %s", strQuery,
                    e.getMessage());
            LOGGER.error(errorMessage);
            return null;
        }

        // execute index search
        TopDocs result;
        try {
            result = indexSearcher.search(query, MAX_RESULT_NUMBER);
        } catch (IOException e) {
            String errorMessage = String.format("failed to search lucene data, detail is %s", e.getMessage());
            LOGGER.error(errorMessage);
            throw new IOException(errorMessage);
        }

        // temporary data, delete duplicated data
        // Map<BlockId, Map<BlockletId, Map<PageId, Set<RowId>>>>
        Map<String, Set<Number>> mapBlocks = new HashMap<String, Set<Number>>();

        for (ScoreDoc scoreDoc : result.scoreDocs) {
            // get a document
            Document doc = indexSearcher.doc(scoreDoc.doc);

            // get all fields
            List<IndexableField> fieldsInDoc = doc.getFields();

            // get this block id Map<BlockId, Set<BlockletId>>>>
            String blockId = fieldsInDoc.get(BLOCKID_ID).stringValue();
            Set<Number> setBlocklets = mapBlocks.get(blockId);
            if (setBlocklets == null) {
                setBlocklets = new HashSet<Number>();
                mapBlocks.put(blockId, setBlocklets);
            }

            // get the blocklet id Set<BlockletId>
            Number blockletId = fieldsInDoc.get(BLOCKLETID_ID).numericValue();
            if (!setBlocklets.contains(blockletId.intValue())) {
                setBlocklets.add(blockletId.intValue());
            }
        }

        // result blocklets
        List<Blocklet> blocklets = new ArrayList<Blocklet>();

        // transform all blocks into result type blocklets Map<BlockId, Set<BlockletId>>
        for (Map.Entry<String, Set<Number>> mapBlock : mapBlocks.entrySet()) {
            String blockId = mapBlock.getKey();
            Set<Number> setBlocklets = mapBlock.getValue();

            // for blocklets in this block Set<BlockletId>
            for (Number blockletId : setBlocklets) {

                // add a CoarseGrainBlocklet
                blocklets.add(new Blocklet(blockId, blockletId.toString()));
            }
        }

        return blocklets;
    }

    @Override
    public boolean isScanRequired(FilterResolverIntf filterExp) {
        return true;
    }

    /**
     * Clear complete index table and release memory.
     */
    @Override
    public void clear() {

    }

    @Override
    public void finish() {

    }
}