io.druid.benchmark.FilterPartitionBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for io.druid.benchmark.FilterPartitionBenchmark.java

Source

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.benchmark;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import io.druid.benchmark.datagen.BenchmarkDataGenerator;
import io.druid.benchmark.datagen.BenchmarkSchemaInfo;
import io.druid.benchmark.datagen.BenchmarkSchemas;
import io.druid.data.input.InputRow;
import io.druid.hll.HyperLogLogHash;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.java.util.common.granularity.Granularities;
import io.druid.java.util.common.guava.Sequence;
import io.druid.java.util.common.guava.Sequences;
import io.druid.java.util.common.logger.Logger;
import io.druid.js.JavaScriptConfig;
import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.extraction.ExtractionFn;
import io.druid.query.extraction.JavaScriptExtractionFn;
import io.druid.query.filter.AndDimFilter;
import io.druid.query.filter.BitmapIndexSelector;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.DruidDoublePredicate;
import io.druid.query.filter.DruidFloatPredicate;
import io.druid.query.filter.DruidLongPredicate;
import io.druid.query.filter.DruidPredicateFactory;
import io.druid.query.filter.Filter;
import io.druid.query.filter.OrDimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.ordering.StringComparators;
import io.druid.segment.BaseLongColumnValueSelector;
import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector;
import io.druid.segment.IndexIO;
import io.druid.segment.IndexMergerV9;
import io.druid.segment.IndexSpec;
import io.druid.segment.QueryableIndex;
import io.druid.segment.QueryableIndexStorageAdapter;
import io.druid.segment.StorageAdapter;
import io.druid.segment.VirtualColumns;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnConfig;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.filter.AndFilter;
import io.druid.segment.filter.BoundFilter;
import io.druid.segment.filter.DimensionPredicateFilter;
import io.druid.segment.filter.Filters;
import io.druid.segment.filter.OrFilter;
import io.druid.segment.filter.SelectorFilter;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.serde.ComplexMetrics;
import org.apache.commons.io.FileUtils;
import org.joda.time.Interval;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 10)
@Measurement(iterations = 25)
public class FilterPartitionBenchmark {
    @Param({ "750000" })
    private int rowsPerSegment;

    @Param({ "basic" })
    private String schema;

    private static final Logger log = new Logger(FilterPartitionBenchmark.class);
    private static final int RNG_SEED = 9999;
    private static final IndexMergerV9 INDEX_MERGER_V9;
    private static final IndexIO INDEX_IO;
    public static final ObjectMapper JSON_MAPPER;
    private IncrementalIndex incIndex;
    private QueryableIndex qIndex;
    private File indexFile;
    private File tmpDir;

    private Filter timeFilterNone;
    private Filter timeFilterHalf;
    private Filter timeFilterAll;

    private BenchmarkSchemaInfo schemaInfo;

    private static String JS_FN = "function(str) { return 'super-' + str; }";
    private static ExtractionFn JS_EXTRACTION_FN = new JavaScriptExtractionFn(JS_FN, false,
            JavaScriptConfig.getEnabledInstance());

    static {
        JSON_MAPPER = new DefaultObjectMapper();
        INDEX_IO = new IndexIO(JSON_MAPPER, new ColumnConfig() {
            @Override
            public int columnCacheSizeBytes() {
                return 0;
            }
        });
        INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO);
    }

    @Setup
    public void setup() throws IOException {
        log.info("SETUP CALLED AT " + System.currentTimeMillis());

        if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
            ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
        }

        schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema);

        BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED,
                schemaInfo.getDataInterval(), rowsPerSegment);

        incIndex = makeIncIndex();

        for (int j = 0; j < rowsPerSegment; j++) {
            InputRow row = gen.nextRow();
            if (j % 10000 == 0) {
                log.info(j + " rows generated.");
            }
            incIndex.add(row);
        }

        tmpDir = Files.createTempDir();
        log.info("Using temp dir: " + tmpDir.getAbsolutePath());

        indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
        qIndex = INDEX_IO.loadIndex(indexFile);

        Interval interval = schemaInfo.getDataInterval();
        timeFilterNone = new BoundFilter(new BoundDimFilter(Column.TIME_COLUMN_NAME, String.valueOf(Long.MAX_VALUE),
                String.valueOf(Long.MAX_VALUE), true, true, null, null, StringComparators.ALPHANUMERIC));

        long halfEnd = (interval.getEndMillis() + interval.getStartMillis()) / 2;
        timeFilterHalf = new BoundFilter(
                new BoundDimFilter(Column.TIME_COLUMN_NAME, String.valueOf(interval.getStartMillis()),
                        String.valueOf(halfEnd), true, true, null, null, StringComparators.ALPHANUMERIC));

        timeFilterAll = new BoundFilter(new BoundDimFilter(Column.TIME_COLUMN_NAME,
                String.valueOf(interval.getStartMillis()), String.valueOf(interval.getEndMillis()), true, true,
                null, null, StringComparators.ALPHANUMERIC));
    }

    @TearDown
    public void tearDown() throws IOException {
        FileUtils.deleteDirectory(tmpDir);
    }

    private IncrementalIndex makeIncIndex() {
        return new IncrementalIndex.Builder().setSimpleTestingIndexSchema(schemaInfo.getAggsArray())
                .setReportParseExceptions(false).setMaxRowCount(rowsPerSegment).buildOnheap();
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void stringRead(Blackhole blackhole) throws Exception {
        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, null);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void longRead(Blackhole blackhole) throws Exception {
        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, null);

        Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
        List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList())
                .get(0);
        for (Long st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void timeFilterNone(Blackhole blackhole) throws Exception {
        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, timeFilterNone);

        Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
        List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList())
                .get(0);
        for (Long st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void timeFilterHalf(Blackhole blackhole) throws Exception {
        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, timeFilterHalf);

        Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
        List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList())
                .get(0);
        for (Long st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void timeFilterAll(Blackhole blackhole) throws Exception {
        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, timeFilterAll);

        Sequence<List<Long>> longListSeq = readCursorsLong(cursors, blackhole);
        List<Long> strings = Sequences.toList(Sequences.limit(longListSeq, 1), Lists.<List<Long>>newArrayList())
                .get(0);
        for (Long st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readWithPreFilter(Blackhole blackhole) throws Exception {
        Filter filter = new SelectorFilter("dimSequential", "199");

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, filter);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readWithPostFilter(Blackhole blackhole) throws Exception {
        Filter filter = new NoBitmapSelectorFilter("dimSequential", "199");

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, filter);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readWithExFnPreFilter(Blackhole blackhole) throws Exception {
        Filter filter = new SelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, filter);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readWithExFnPostFilter(Blackhole blackhole) throws Exception {
        Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, filter);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readOrFilter(Blackhole blackhole) throws Exception {
        Filter filter = new NoBitmapSelectorFilter("dimSequential", "199");
        Filter filter2 = new AndFilter(
                Arrays.<Filter>asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"),
                        new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
        Filter orFilter = new OrFilter(Arrays.<Filter>asList(filter, filter2));

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, orFilter);

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readOrFilterCNF(Blackhole blackhole) throws Exception {
        Filter filter = new NoBitmapSelectorFilter("dimSequential", "199");
        Filter filter2 = new AndFilter(
                Arrays.<Filter>asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"),
                        new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
        Filter orFilter = new OrFilter(Arrays.<Filter>asList(filter, filter2));

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, Filters.convertToCNF(orFilter));

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readComplexOrFilter(Blackhole blackhole) throws Exception {
        DimFilter dimFilter1 = new OrDimFilter(
                Arrays.<DimFilter>asList(new SelectorDimFilter("dimSequential", "199", null),
                        new AndDimFilter(Arrays.<DimFilter>asList(
                                new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Corundum", null),
                                new SelectorDimFilter("dimMultivalEnumerated", "Bar", null)))));
        DimFilter dimFilter2 = new OrDimFilter(
                Arrays.<DimFilter>asList(new SelectorDimFilter("dimSequential", "299", null),
                        new SelectorDimFilter("dimSequential", "399", null),
                        new AndDimFilter(Arrays.<DimFilter>asList(
                                new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Xylophone", null),
                                new SelectorDimFilter("dimMultivalEnumerated", "Foo", null)))));
        DimFilter dimFilter3 = new OrDimFilter(Arrays.<DimFilter>asList(dimFilter1, dimFilter2,
                new AndDimFilter(Arrays.<DimFilter>asList(
                        new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Orange", null),
                        new SelectorDimFilter("dimMultivalEnumerated", "World", null)))));

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, dimFilter3.toFilter());

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.MICROSECONDS)
    public void readComplexOrFilterCNF(Blackhole blackhole) throws Exception {
        DimFilter dimFilter1 = new OrDimFilter(
                Arrays.<DimFilter>asList(new SelectorDimFilter("dimSequential", "199", null),
                        new AndDimFilter(Arrays.<DimFilter>asList(
                                new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Corundum", null),
                                new SelectorDimFilter("dimMultivalEnumerated", "Bar", null)))));
        DimFilter dimFilter2 = new OrDimFilter(
                Arrays.<DimFilter>asList(new SelectorDimFilter("dimSequential", "299", null),
                        new SelectorDimFilter("dimSequential", "399", null),
                        new AndDimFilter(Arrays.<DimFilter>asList(
                                new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Xylophone", null),
                                new SelectorDimFilter("dimMultivalEnumerated", "Foo", null)))));
        DimFilter dimFilter3 = new OrDimFilter(Arrays.<DimFilter>asList(dimFilter1, dimFilter2,
                new AndDimFilter(Arrays.<DimFilter>asList(
                        new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Orange", null),
                        new SelectorDimFilter("dimMultivalEnumerated", "World", null)))));

        StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
        Sequence<Cursor> cursors = makeCursors(sa, Filters.convertToCNF(dimFilter3.toFilter()));

        Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
        List<String> strings = Sequences
                .toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
        for (String st : strings) {
            blackhole.consume(st);
        }
    }

    private Sequence<Cursor> makeCursors(StorageAdapter sa, Filter filter) {
        return sa.makeCursors(filter, schemaInfo.getDataInterval(), VirtualColumns.EMPTY, Granularities.ALL, false,
                null);
    }

    private Sequence<List<String>> readCursors(Sequence<Cursor> cursors, final Blackhole blackhole) {
        return Sequences.map(cursors, new Function<Cursor, List<String>>() {
            @Override
            public List<String> apply(Cursor input) {
                List<String> strings = new ArrayList<String>();
                List<DimensionSelector> selectors = new ArrayList<>();
                selectors.add(input.getColumnSelectorFactory()
                        .makeDimensionSelector(new DefaultDimensionSpec("dimSequential", null)));
                //selectors.add(input.makeDimensionSelector(new DefaultDimensionSpec("dimB", null)));
                while (!input.isDone()) {
                    for (DimensionSelector selector : selectors) {
                        IndexedInts row = selector.getRow();
                        blackhole.consume(selector.lookupName(row.get(0)));
                        //strings.add(selector.lookupName(row.get(0)));
                    }
                    input.advance();
                }
                return strings;
            }
        });
    }

    private Sequence<List<Long>> readCursorsLong(Sequence<Cursor> cursors, final Blackhole blackhole) {
        return Sequences.map(cursors, new Function<Cursor, List<Long>>() {
            @Override
            public List<Long> apply(Cursor input) {
                List<Long> longvals = new ArrayList<Long>();
                BaseLongColumnValueSelector selector = input.getColumnSelectorFactory()
                        .makeColumnValueSelector("sumLongSequential");
                while (!input.isDone()) {
                    long rowval = selector.getLong();
                    blackhole.consume(rowval);
                    input.advance();
                }
                return longvals;
            }
        });
    }

    private static class NoBitmapSelectorFilter extends SelectorFilter {
        public NoBitmapSelectorFilter(String dimension, String value) {
            super(dimension, value);
        }

        @Override
        public boolean supportsBitmapIndex(BitmapIndexSelector selector) {
            return false;
        }
    }

    private static class NoBitmapDimensionPredicateFilter extends DimensionPredicateFilter {
        public NoBitmapDimensionPredicateFilter(final String dimension,
                final DruidPredicateFactory predicateFactory, final ExtractionFn extractionFn) {
            super(dimension, predicateFactory, extractionFn);
        }

        @Override
        public boolean supportsBitmapIndex(BitmapIndexSelector selector) {
            return false;
        }
    }

    private static class NoBitmapSelectorDimFilter extends SelectorDimFilter {
        public NoBitmapSelectorDimFilter(String dimension, String value, ExtractionFn extractionFn) {
            super(dimension, value, extractionFn);
        }

        @Override
        public Filter toFilter() {
            ExtractionFn extractionFn = getExtractionFn();
            String dimension = getDimension();
            final String value = getValue();
            if (extractionFn == null) {
                return new NoBitmapSelectorFilter(dimension, value);
            } else {
                final String valueOrNull = Strings.emptyToNull(value);

                final DruidPredicateFactory predicateFactory = new DruidPredicateFactory() {
                    @Override
                    public Predicate<String> makeStringPredicate() {
                        return new Predicate<String>() {
                            @Override
                            public boolean apply(String input) {
                                return Objects.equals(valueOrNull, input);
                            }
                        };
                    }

                    @Override
                    public DruidLongPredicate makeLongPredicate() {
                        return DruidLongPredicate.ALWAYS_FALSE;
                    }

                    @Override
                    public DruidFloatPredicate makeFloatPredicate() {
                        return DruidFloatPredicate.ALWAYS_FALSE;
                    }

                    @Override
                    public DruidDoublePredicate makeDoublePredicate() {
                        return DruidDoublePredicate.ALWAYS_FALSE;
                    }
                };

                return new NoBitmapDimensionPredicateFilter(dimension, predicateFactory, extractionFn);
            }
        }
    }
}