org.apache.carbondata.datamap.bloom.BloomDataMapWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.carbondata.datamap.bloom.BloomDataMapWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.carbondata.datamap.bloom;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.carbondata.common.annotations.InterfaceAudience;
import org.apache.carbondata.core.datamap.Segment;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.Predicate;

/**
 * BloomDataMap is constructed in CG level (blocklet level).
 * For each indexed column, a bloom filter is constructed to indicate whether a value
 * belongs to this blocklet. Bloom filter of blocklet that belongs to same block will
 * be written to one index file suffixed with .bloomindex. So the number
 * of bloom index file will be equal to that of the blocks.
 */
@InterfaceAudience.Internal
public class BloomDataMapWriter extends AbstractBloomDataMapWriter {
    private ColumnarSplitter columnarSplitter;
    // for the dict/sort/date column, they are encoded in MDK,
    // this maps the index column name to the index in MDK
    private Map<String, Integer> indexCol2MdkIdx;

    BloomDataMapWriter(String tablePath, String dataMapName, List<CarbonColumn> indexColumns, Segment segment,
            String shardName, SegmentProperties segmentProperties, int bloomFilterSize, double bloomFilterFpp,
            boolean compressBloom) throws IOException {
        super(tablePath, dataMapName, indexColumns, segment, shardName, segmentProperties, bloomFilterSize,
                bloomFilterFpp, compressBloom);

        columnarSplitter = segmentProperties.getFixedLengthKeySplitter();
        this.indexCol2MdkIdx = new HashMap<>();
        int idx = 0;
        for (final CarbonDimension dimension : segmentProperties.getDimensions()) {
            if (!dimension.isGlobalDictionaryEncoding() && !dimension.isDirectDictionaryEncoding()) {
                continue;
            }
            boolean isExistInIndex = CollectionUtils.exists(indexColumns, new Predicate() {
                @Override
                public boolean evaluate(Object object) {
                    return ((CarbonColumn) object).getColName().equalsIgnoreCase(dimension.getColName());
                }
            });
            if (isExistInIndex) {
                this.indexCol2MdkIdx.put(dimension.getColName(), idx);
            }
            idx++;
        }
    }

    protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) {
        if (DataTypes.VARCHAR == indexColumns.get(indexColIdx).getDataType()) {
            return DataConvertUtil.getRawBytesForVarchar((byte[]) value);
        } else if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) {
            // get bytes for the original value of the no dictionary column
            return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
        } else {
            return DataConvertUtil.getRawBytes((byte[]) value);
        }
    }

    @Override
    protected byte[] convertDictionaryValue(int indexColIdx, Object value) {
        // input value from onPageAdded in load process is byte[]

        // for dict columns including dictionary and date columns decode value to get the surrogate key
        int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName());
        int surrogateKey = CarbonUtil.getSurrogateInternal((byte[]) value, 0,
                columnarSplitter.getBlockKeySize()[thisKeyIdx]);
        // store the dictionary key in bloom
        return CarbonUtil.getValueAsBytes(DataTypes.INT, surrogateKey);
    }
}