io.druid.query.metadata.SegmentAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for io.druid.query.metadata.SegmentAnalyzer.java

Source

/*
 * Druid - a distributed column store.
 * Copyright 2012 - 2015 Metamarkets Group Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.druid.query.metadata;

import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.primitives.Longs;
import com.metamx.common.logger.Logger;
import com.metamx.common.StringUtils;
import io.druid.query.metadata.metadata.ColumnAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.segment.QueryableIndex;
import io.druid.segment.StorageAdapter;
import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ComplexColumn;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.Indexed;
import io.druid.segment.serde.ComplexMetricSerde;
import io.druid.segment.serde.ComplexMetrics;

import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

public class SegmentAnalyzer {
    private static final Logger log = new Logger(SegmentAnalyzer.class);

    /**
     * This is based on the minimum size of a timestamp (POSIX seconds).  An ISO timestamp will actually be more like 24+
     */
    private static final int NUM_BYTES_IN_TIMESTAMP = 10;

    /**
     * This is based on assuming 6 units of precision, one decimal point and a single value left of the decimal
     */
    private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;

    public Map<String, ColumnAnalysis> analyze(QueryableIndex index,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        Preconditions.checkNotNull(index, "Index cannot be null");

        Map<String, ColumnAnalysis> columns = Maps.newTreeMap();

        for (String columnName : index.getColumnNames()) {
            final Column column = index.getColumn(columnName);
            final ColumnCapabilities capabilities = column.getCapabilities();

            final ColumnAnalysis analysis;
            final ValueType type = capabilities.getType();
            switch (type) {
            case LONG:
                analysis = analyzeLongColumn(column, analysisTypes);
                break;
            case FLOAT:
                analysis = analyzeFloatColumn(column, analysisTypes);
                break;
            case STRING:
                analysis = analyzeStringColumn(column, analysisTypes);
                break;
            case COMPLEX:
                analysis = analyzeComplexColumn(column, analysisTypes);
                break;
            default:
                log.warn("Unknown column type[%s].", type);
                analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
            }

            columns.put(columnName, analysis);
        }

        columns.put(Column.TIME_COLUMN_NAME, lengthBasedAnalysis(index.getColumn(Column.TIME_COLUMN_NAME),
                NUM_BYTES_IN_TIMESTAMP, analysisTypes));

        return columns;
    }

    public Map<String, ColumnAnalysis> analyze(StorageAdapter adapter,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        Preconditions.checkNotNull(adapter, "Adapter cannot be null");
        Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
        List<String> columnNames = getStorageAdapterColumnNames(adapter);

        int numRows = adapter.getNumRows();
        for (String columnName : columnNames) {
            final ColumnCapabilities capabilities = adapter.getColumnCapabilities(columnName);
            final ColumnAnalysis analysis;

            /**
             * StorageAdapter doesn't provide a way to get column values, so size is
             * not calculated for STRING and COMPLEX columns.
             */
            ValueType capType = capabilities.getType();
            switch (capType) {
            case LONG:
                analysis = lengthBasedAnalysisForAdapter(analysisTypes, capType.name(), capabilities, numRows,
                        Longs.BYTES);
                break;
            case FLOAT:
                analysis = lengthBasedAnalysisForAdapter(analysisTypes, capType.name(), capabilities, numRows,
                        NUM_BYTES_IN_TEXT_FLOAT);
                break;
            case STRING:
                analysis = new ColumnAnalysis(capType.name(), 0,
                        analysisHasCardinality(analysisTypes) ? adapter.getDimensionCardinality(columnName) : 0,
                        null);
                break;
            case COMPLEX:
                analysis = new ColumnAnalysis(capType.name(), 0, null, null);
                break;
            default:
                log.warn("Unknown column type[%s].", capType);
                analysis = ColumnAnalysis.error(String.format("unknown_type_%s", capType));
            }

            columns.put(columnName, analysis);
        }

        columns.put(Column.TIME_COLUMN_NAME, lengthBasedAnalysisForAdapter(analysisTypes, ValueType.LONG.name(),
                null, numRows, NUM_BYTES_IN_TIMESTAMP));

        return columns;
    }

    public ColumnAnalysis analyzeLongColumn(Column column,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        return lengthBasedAnalysis(column, Longs.BYTES, analysisTypes);
    }

    public ColumnAnalysis analyzeFloatColumn(Column column,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT, analysisTypes);
    }

    private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        final ColumnCapabilities capabilities = column.getCapabilities();
        if (capabilities.hasMultipleValues()) {
            return ColumnAnalysis.error("multi_value");
        }

        int size = 0;
        if (analysisHasSize(analysisTypes)) {
            size = column.getLength() * numBytes;
        }

        return new ColumnAnalysis(capabilities.getType().name(), size, null, null);
    }

    public ColumnAnalysis analyzeStringColumn(Column column,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        final ColumnCapabilities capabilities = column.getCapabilities();

        if (capabilities.hasBitmapIndexes()) {
            final BitmapIndex bitmapIndex = column.getBitmapIndex();

            int cardinality = bitmapIndex.getCardinality();
            long size = 0;

            if (analysisHasSize(analysisTypes)) {
                for (int i = 0; i < cardinality; ++i) {
                    String value = bitmapIndex.getValue(i);
                    if (value != null) {
                        size += StringUtils.toUtf8(value).length * bitmapIndex.getBitmap(value).size();
                    }
                }
            }

            return new ColumnAnalysis(capabilities.getType().name(), size,
                    analysisHasCardinality(analysisTypes) ? cardinality : 0, null);
        }

        return ColumnAnalysis.error("string_no_bitmap");
    }

    public ColumnAnalysis analyzeComplexColumn(Column column,
            EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        final ColumnCapabilities capabilities = column.getCapabilities();
        final ComplexColumn complexColumn = column.getComplexColumn();

        final String typeName = complexColumn.getTypeName();
        final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
        if (serde == null) {
            return ColumnAnalysis.error(String.format("unknown_complex_%s", typeName));
        }

        final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
        if (inputSizeFn == null) {
            return new ColumnAnalysis(typeName, 0, null, null);
        }

        final int length = column.getLength();
        long size = 0;
        if (analysisHasSize(analysisTypes)) {
            for (int i = 0; i < length; ++i) {
                size += inputSizeFn.apply(complexColumn.getRowValue(i));
            }
        }

        return new ColumnAnalysis(typeName, size, null, null);
    }

    private List<String> getStorageAdapterColumnNames(StorageAdapter adapter) {
        Indexed<String> dims = adapter.getAvailableDimensions();
        Iterable<String> metrics = adapter.getAvailableMetrics();
        Iterable<String> columnNames = Iterables.concat(dims, metrics);
        List<String> sortedColumnNames = Lists.newArrayList(columnNames);
        Collections.sort(sortedColumnNames);
        return sortedColumnNames;
    }

    private ColumnAnalysis lengthBasedAnalysisForAdapter(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes,
            String type, ColumnCapabilities capabilities, int numRows, final int numBytes) {
        if (capabilities != null && capabilities.hasMultipleValues()) {
            return ColumnAnalysis.error("multi_value");
        }
        return new ColumnAnalysis(type, analysisHasSize(analysisTypes) ? numRows * numBytes : 0, null, null);
    }

    private boolean analysisHasSize(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.SIZE);
    }

    private boolean analysisHasCardinality(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
        return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
    }

}