com.metamx.druid.query.metadata.SegmentAnalyzer.java Source code

Java tutorial

Introduction

Here is the source code for com.metamx.druid.query.metadata.SegmentAnalyzer.java

Source

/*
 * Druid - a distributed column store.
 * Copyright (C) 2012  Metamarkets Group Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

package com.metamx.druid.query.metadata;

import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.primitives.Longs;
import com.metamx.common.logger.Logger;
import com.metamx.druid.index.QueryableIndex;
import com.metamx.druid.index.column.BitmapIndex;
import com.metamx.druid.index.column.Column;
import com.metamx.druid.index.column.ColumnCapabilities;
import com.metamx.druid.index.column.ComplexColumn;
import com.metamx.druid.index.column.ValueType;
import com.metamx.druid.index.v1.serde.ComplexMetricSerde;
import com.metamx.druid.index.v1.serde.ComplexMetrics;

import java.util.Map;

public class SegmentAnalyzer {
    private static final Logger log = new Logger(SegmentAnalyzer.class);

    /**
     * This is based on the minimum size of a timestamp (POSIX seconds).  An ISO timestamp will actually be more like 24+
     */
    private static final int NUM_BYTES_IN_TIMESTAMP = 10;

    /**
     * This is based on assuming 6 units of precision, one decimal point and a single value left of the decimal
     */
    private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;

    public Map<String, ColumnAnalysis> analyze(QueryableIndex index) {
        Preconditions.checkNotNull(index, "Index cannot be null");

        Map<String, ColumnAnalysis> columns = Maps.newTreeMap();

        for (String columnName : index.getColumnNames()) {
            final Column column = index.getColumn(columnName);
            final ColumnCapabilities capabilities = column.getCapabilities();

            final ColumnAnalysis analysis;
            final ValueType type = capabilities.getType();
            switch (type) {
            case LONG:
                analysis = analyzeLongColumn(column);
                break;
            case FLOAT:
                analysis = analyzeFloatColumn(column);
                break;
            case STRING:
                analysis = analyzeStringColumn(column);
                break;
            case COMPLEX:
                analysis = analyzeComplexColumn(column);
                break;
            default:
                log.warn("Unknown column type[%s].", type);
                analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
            }

            columns.put(columnName, analysis);
        }

        columns.put("__time", lengthBasedAnalysis(index.getTimeColumn(), NUM_BYTES_IN_TIMESTAMP));

        return columns;
    }

    public ColumnAnalysis analyzeLongColumn(Column column) {
        return lengthBasedAnalysis(column, Longs.BYTES);
    }

    public ColumnAnalysis analyzeFloatColumn(Column column) {
        return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT);
    }

    private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes) {
        final ColumnCapabilities capabilities = column.getCapabilities();
        if (capabilities.hasMultipleValues()) {
            return ColumnAnalysis.error("multi_value");
        }

        return new ColumnAnalysis(capabilities.getType(), column.getLength() * numBytes, null, null);
    }

    public ColumnAnalysis analyzeStringColumn(Column column) {
        final ColumnCapabilities capabilities = column.getCapabilities();

        if (capabilities.hasBitmapIndexes()) {
            final BitmapIndex bitmapIndex = column.getBitmapIndex();

            int cardinality = bitmapIndex.getCardinality();
            long size = 0;
            for (int i = 0; i < cardinality; ++i) {
                String value = bitmapIndex.getValue(i);

                if (value != null) {
                    size += value.getBytes(Charsets.UTF_8).length * bitmapIndex.getConciseSet(value).size();
                }
            }

            return new ColumnAnalysis(capabilities.getType(), size, cardinality, null);
        }

        return ColumnAnalysis.error("string_no_bitmap");
    }

    public ColumnAnalysis analyzeComplexColumn(Column column) {
        final ColumnCapabilities capabilities = column.getCapabilities();
        final ComplexColumn complexColumn = column.getComplexColumn();

        final String typeName = complexColumn.getTypeName();
        final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
        if (serde == null) {
            return ColumnAnalysis.error(String.format("unknown_complex_%s", typeName));
        }

        final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
        if (inputSizeFn == null) {
            return ColumnAnalysis.error("noSizeFn");
        }

        final int length = column.getLength();
        long size = 0;
        for (int i = 0; i < length; ++i) {
            size += inputSizeFn.apply(complexColumn.getRowValue(i));
        }

        return new ColumnAnalysis(capabilities.getType(), size, null, null);
    }
}