com.linkedin.pinot.query.aggregation.groupby.NoDictionaryGroupKeyGeneratorTest.java Source code

Introduction

Here is the source code for com.linkedin.pinot.query.aggregation.groupby.NoDictionaryGroupKeyGeneratorTest.java
Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.query.aggregation.groupby;

import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.request.transform.TransformExpressionTree;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.GenericRowRecordReader;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.operator.blocks.TransformBlock;
import com.linkedin.pinot.core.operator.transform.TransformOperator;
import com.linkedin.pinot.core.plan.TransformPlanNode;
import com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByTrimmingService;
import com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator;
import com.linkedin.pinot.core.query.aggregation.groupby.NoDictionaryMultiColumnGroupKeyGenerator;
import com.linkedin.pinot.core.query.aggregation.groupby.NoDictionarySingleColumnGroupKeyGenerator;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.index.loader.Loaders;
import com.linkedin.pinot.pql.parsers.Pql2Compiler;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

/**
 * Unit test for {@link NoDictionaryMultiColumnGroupKeyGenerator}
 */
public class NoDictionaryGroupKeyGeneratorTest {
    private static final String SEGMENT_NAME = "testSegment";
    private static final String INDEX_DIR_PATH = FileUtils.getTempDirectoryPath() + File.separator + SEGMENT_NAME;

    private static final String STRING_DICT_COLUMN = "string_dict_column";
    private static final String[] COLUMN_NAMES = { "int_column", "long_column", "float_column", "double_column",
            "string_column", STRING_DICT_COLUMN };
    private static final String[] NO_DICT_COLUMN_NAMES = { "int_column", "long_column", "float_column",
            "double_column", "string_column" };
    private static final FieldSpec.DataType[] DATA_TYPES = { FieldSpec.DataType.INT, FieldSpec.DataType.LONG,
            FieldSpec.DataType.FLOAT, FieldSpec.DataType.DOUBLE, FieldSpec.DataType.STRING,
            FieldSpec.DataType.STRING };
    private static final int NUM_COLUMNS = COLUMN_NAMES.length;
    private static final int NUM_ROWS = 1000;

    private RecordReader _recordReader;
    private TransformOperator _transformOperator;
    private TransformBlock _transformBlock;

    @BeforeClass
    public void setup() throws Exception {
        FileUtils.deleteQuietly(new File(INDEX_DIR_PATH));

        _recordReader = buildSegment();

        // Load the segment.
        IndexSegment indexSegment = Loaders.IndexSegment.load(new File(INDEX_DIR_PATH, SEGMENT_NAME),
                ReadMode.heap);

        // Create transform operator and block
        // NOTE: put all columns into group-by so that transform operator has expressions for all columns
        String query = String.format("SELECT COUNT(*) FROM table GROUP BY %s",
                StringUtils.join(COLUMN_NAMES, ", "));
        TransformPlanNode transformPlanNode = new TransformPlanNode(indexSegment,
                new Pql2Compiler().compileToBrokerRequest(query));
        _transformOperator = transformPlanNode.run();
        _transformBlock = _transformOperator.nextBlock();
    }

    /**
     * Unit test for {@link com.linkedin.pinot.core.query.aggregation.groupby.NoDictionarySingleColumnGroupKeyGenerator}
     * @throws Exception
     */
    @Test
    public void testSingleColumnGroupKeyGenerator() throws Exception {
        for (String column : COLUMN_NAMES) {
            testGroupKeyGenerator(new String[] { column });
        }
    }

    /**
     * Unit test for {@link NoDictionaryMultiColumnGroupKeyGenerator}
     * @throws Exception
     */
    @Test
    public void testMultiColumnGroupKeyGenerator() throws Exception {
        testGroupKeyGenerator(COLUMN_NAMES);
    }

    /**
     * Tests multi-column group key generator when at least one column as dictionary, and others don't.
     */
    @Test
    public void testMultiColumnHybridGroupKeyGenerator() throws Exception {
        for (String noDictColumn : NO_DICT_COLUMN_NAMES) {
            testGroupKeyGenerator(new String[] { noDictColumn, STRING_DICT_COLUMN });
        }
    }

    private void testGroupKeyGenerator(String[] groupByColumns) throws Exception {
        int numGroupByColumns = groupByColumns.length;
        TransformExpressionTree[] groupByExpressions = new TransformExpressionTree[numGroupByColumns];
        for (int i = 0; i < numGroupByColumns; i++) {
            groupByExpressions[i] = TransformExpressionTree.compileToExpressionTree(groupByColumns[i]);
        }

        GroupKeyGenerator groupKeyGenerator;
        if (numGroupByColumns == 1) {
            groupKeyGenerator = new NoDictionarySingleColumnGroupKeyGenerator(_transformOperator,
                    groupByExpressions[0]);
        } else {
            groupKeyGenerator = new NoDictionaryMultiColumnGroupKeyGenerator(_transformOperator,
                    groupByExpressions);
        }
        groupKeyGenerator.generateKeysForBlock(_transformBlock, new int[NUM_ROWS]);

        // Assert total number of group keys is as expected
        Set<String> expectedGroupKeys = getExpectedGroupKeys(_recordReader, groupByColumns);
        Assert.assertEquals(groupKeyGenerator.getCurrentGroupKeyUpperBound(), expectedGroupKeys.size(),
                "Number of group keys mis-match.");

        // Assert all group key values are as expected
        Iterator<GroupKeyGenerator.GroupKey> uniqueGroupKeys = groupKeyGenerator.getUniqueGroupKeys();
        while (uniqueGroupKeys.hasNext()) {
            GroupKeyGenerator.GroupKey groupKey = uniqueGroupKeys.next();
            String actual = groupKey._stringKey;
            Assert.assertTrue(expectedGroupKeys.contains(actual), "Unexpected group key: " + actual);
        }
    }

    /**
     * Helper method to build group keys for a given array of group-by columns.
     *
     * @param groupByColumns Group-by columns for which to generate the group-keys.
     * @return Set of unique group keys.
     * @throws Exception
     */
    private Set<String> getExpectedGroupKeys(RecordReader recordReader, String[] groupByColumns) throws Exception {
        Set<String> groupKeys = new HashSet<>();
        StringBuilder stringBuilder = new StringBuilder();

        recordReader.rewind();
        while (recordReader.hasNext()) {
            GenericRow row = recordReader.next();

            stringBuilder.setLength(0);
            for (int i = 0; i < groupByColumns.length; i++) {
                stringBuilder.append(row.getValue(groupByColumns[i]));
                if (i < groupByColumns.length - 1) {
                    stringBuilder.append(AggregationGroupByTrimmingService.GROUP_KEY_DELIMITER);
                }
            }
            groupKeys.add(stringBuilder.toString());
        }
        return groupKeys;
    }

    /**
     * Helper method to build a segment as follows:
     * <ul>
     *   <li> One string column without dictionary. </li>
     *   <li> One integer column with dictionary. </li>
     * </ul>
     *
     * It also computes the unique group keys while it generates the index.
     *
     * @return Set containing unique group keys from the created segment.
     *
     * @throws Exception
     */
    private static RecordReader buildSegment() throws Exception {
        Schema schema = new Schema();

        for (int i = 0; i < COLUMN_NAMES.length; i++) {
            DimensionFieldSpec dimensionFieldSpec = new DimensionFieldSpec(COLUMN_NAMES[i], DATA_TYPES[i], true);
            schema.addField(dimensionFieldSpec);
        }

        SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
        config.setRawIndexCreationColumns(Arrays.asList(NO_DICT_COLUMN_NAMES));

        config.setOutDir(INDEX_DIR_PATH);
        config.setSegmentName(SEGMENT_NAME);

        Random random = new Random();
        List<GenericRow> rows = new ArrayList<>(NUM_ROWS);
        for (int i = 0; i < NUM_ROWS; i++) {
            Map<String, Object> map = new HashMap<>(NUM_COLUMNS);

            for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
                String column = fieldSpec.getName();

                FieldSpec.DataType dataType = fieldSpec.getDataType();
                switch (dataType) {
                case INT:
                    map.put(column, random.nextInt());
                    break;

                case LONG:
                    map.put(column, random.nextLong());
                    break;

                case FLOAT:
                    map.put(column, random.nextFloat());
                    break;

                case DOUBLE:
                    map.put(column, random.nextDouble());
                    break;

                case STRING:
                    map.put(column, "value_" + i);
                    break;

                default:
                    throw new IllegalArgumentException("Illegal data type specified: " + dataType);
                }
            }

            GenericRow genericRow = new GenericRow();
            genericRow.init(map);
            rows.add(genericRow);
        }

        RecordReader recordReader = new GenericRowRecordReader(rows, schema);
        SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
        driver.init(config, recordReader);
        driver.build();

        return recordReader;
    }
}