Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.query.metadata; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Accumulator; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.metadata.metadata.ColumnAnalysis; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.Segment; import org.apache.druid.segment.StorageAdapter; import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.serde.ComplexMetricSerde; import org.apache.druid.segment.serde.ComplexMetrics; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; import java.util.EnumSet; import java.util.Map; import java.util.Set; public class SegmentAnalyzer { private static final Logger log = new Logger(SegmentAnalyzer.class); /** * This is based on the minimum size of a timestamp (POSIX seconds). An ISO timestamp will actually be more like 24+ */ private static final int NUM_BYTES_IN_TIMESTAMP = 10; /** * This is based on assuming 6 units of precision, one decimal point and a single value left of the decimal */ private static final int NUM_BYTES_IN_TEXT_FLOAT = 8; private final EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes; public SegmentAnalyzer(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) { this.analysisTypes = analysisTypes; } public long numRows(Segment segment) { return Preconditions.checkNotNull(segment, "segment").asStorageAdapter().getNumRows(); } public Map<String, ColumnAnalysis> analyze(Segment segment) { Preconditions.checkNotNull(segment, "segment"); // index is null for incremental-index-based segments, but storageAdapter is always available final QueryableIndex index = segment.asQueryableIndex(); final StorageAdapter storageAdapter = segment.asStorageAdapter(); // get length and column names from storageAdapter final int length = storageAdapter.getNumRows(); final Set<String> columnNames = Sets.newHashSet(); Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions()); Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics()); Map<String, ColumnAnalysis> columns = Maps.newTreeMap(); for (String columnName : columnNames) { final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName); final ColumnCapabilities capabilities = columnHolder != null ? columnHolder.getCapabilities() : storageAdapter.getColumnCapabilities(columnName); final ColumnAnalysis analysis; final ValueType type = capabilities.getType(); switch (type) { case LONG: analysis = analyzeNumericColumn(capabilities, length, Long.BYTES); break; case FLOAT: analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT); break; case DOUBLE: analysis = analyzeNumericColumn(capabilities, length, Double.BYTES); break; case STRING: if (index != null) { analysis = analyzeStringColumn(capabilities, columnHolder); } else { analysis = analyzeStringColumn(capabilities, storageAdapter, columnName); } break; case COMPLEX: analysis = analyzeComplexColumn(capabilities, columnHolder, storageAdapter.getColumnTypeName(columnName)); break; default: log.warn("Unknown column type[%s].", type); analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", type)); } columns.put(columnName, analysis); } // Add time column too ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME); if (timeCapabilities == null) { timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false); } columns.put(ColumnHolder.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP)); return columns; } public boolean analyzingSize() { return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.SIZE); } public boolean analyzingCardinality() { return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY); } public boolean analyzingMinMax() { return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.MINMAX); } private ColumnAnalysis analyzeNumericColumn(final ColumnCapabilities capabilities, final int length, final int sizePerRow) { long size = 0; if (analyzingSize()) { if (capabilities.hasMultipleValues()) { return ColumnAnalysis.error("multi_value"); } size = ((long) length) * sizePerRow; } return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, null, null, null, null); } private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final ColumnHolder columnHolder) { long size = 0; Comparable min = null; Comparable max = null; if (!capabilities.hasBitmapIndexes()) { return ColumnAnalysis.error("string_no_bitmap"); } final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex(); final int cardinality = bitmapIndex.getCardinality(); if (analyzingSize()) { for (int i = 0; i < cardinality; ++i) { String value = bitmapIndex.getValue(i); if (value != null) { size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size(); } } } if (analyzingMinMax() && cardinality > 0) { min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0)); max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1)); } return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, analyzingCardinality() ? cardinality : 0, min, max, null); } private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) { int cardinality = 0; long size = 0; Comparable min = null; Comparable max = null; if (analyzingCardinality()) { cardinality = storageAdapter.getDimensionCardinality(columnName); } if (analyzingSize()) { final DateTime start = storageAdapter.getMinTime(); final DateTime end = storageAdapter.getMaxTime(); final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null); size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() { @Override public Long accumulate(Long accumulated, Cursor cursor) { DimensionSelector selector = cursor.getColumnSelectorFactory() .makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)); if (selector == null) { return accumulated; } long current = accumulated; while (!cursor.isDone()) { final IndexedInts row = selector.getRow(); for (int i = 0, rowSize = row.size(); i < rowSize; ++i) { final String dimVal = selector.lookupName(row.get(i)); if (dimVal != null && !dimVal.isEmpty()) { current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal); } } cursor.advance(); } return current; } }); } if (analyzingMinMax()) { min = storageAdapter.getMinValue(columnName); max = storageAdapter.getMaxValue(columnName); } return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, cardinality, min, max, null); } private ColumnAnalysis analyzeComplexColumn(@Nullable final ColumnCapabilities capabilities, @Nullable final ColumnHolder columnHolder, final String typeName) { try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues(); long size = 0; if (analyzingSize() && complexColumn != null) { final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); if (serde == null) { return ColumnAnalysis.error(StringUtils.format("unknown_complex_%s", typeName)); } final Function<Object, Long> inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis(typeName, hasMultipleValues, 0, null, null, null, null); } final int length = complexColumn.getLength(); for (int i = 0; i < length; ++i) { size += inputSizeFn.apply(complexColumn.getRowValue(i)); } } return new ColumnAnalysis(typeName, hasMultipleValues, size, null, null, null, null); } } }