com.linkedin.pinot.query.aggregation.AggregationMVGroupByMVQueriesTest.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.query.aggregation.AggregationMVGroupByMVQueriesTest.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.query.aggregation;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.linkedin.pinot.common.request.BrokerRequest;
import com.linkedin.pinot.common.response.ServerInstance;
import com.linkedin.pinot.common.response.broker.AggregationResult;
import com.linkedin.pinot.common.response.broker.BrokerResponseNative;
import com.linkedin.pinot.common.response.broker.GroupByResult;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.DataTable;
import com.linkedin.pinot.common.utils.NamedThreadFactory;
import com.linkedin.pinot.core.data.manager.offline.OfflineSegmentDataManager;
import com.linkedin.pinot.core.data.manager.offline.SegmentDataManager;
import com.linkedin.pinot.core.indexsegment.columnar.ColumnarSegmentLoader;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.plan.Plan;
import com.linkedin.pinot.core.plan.maker.InstancePlanMakerImplV2;
import com.linkedin.pinot.core.plan.maker.PlanMaker;
import com.linkedin.pinot.core.query.config.QueryExecutorConfig;
import com.linkedin.pinot.core.query.reduce.BrokerReduceService;
import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver;
import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory;
import com.linkedin.pinot.pql.parsers.Pql2Compiler;
import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils;
import com.linkedin.pinot.util.TestUtils;

public class AggregationMVGroupByMVQueriesTest {

    private final static Logger LOGGER = LoggerFactory.getLogger(AggregationMVGroupByMVQueriesTest.class);

    private final static String AVRO_DATA = "data/test_data-mv.avro";
    private final static File INDEXES_DIR = new File(
            FileUtils.getTempDirectory() + File.separator + "AggregationMVGroupByMVQueriesTest");
    private final static Pql2Compiler PQL2_COMPILER = new Pql2Compiler();
    private final static List<SegmentDataManager> INDEX_SEGMENT_LIST = new ArrayList<SegmentDataManager>();
    private final int NUM_SEGMENTS = 20;
    private static PlanMaker INSTANCE_PLAN_MAKER;

    @BeforeClass
    public void setup() throws Exception {
        INSTANCE_PLAN_MAKER = new InstancePlanMakerImplV2(new QueryExecutorConfig(new PropertiesConfiguration()));
        setupSegmentList(NUM_SEGMENTS);
    }

    @AfterClass
    public void tearDown() {
        if (INDEXES_DIR.exists()) {
            FileUtils.deleteQuietly(INDEXES_DIR);
        }
        for (SegmentDataManager segmentDataManager : INDEX_SEGMENT_LIST) {
            segmentDataManager.getSegment().destroy();
        }
        INDEX_SEGMENT_LIST.clear();
    }

    private void setupSegmentList(int numberOfSegments) throws Exception {
        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));

        if (INDEXES_DIR.exists()) {
            FileUtils.deleteQuietly(INDEXES_DIR);
        }
        INDEXES_DIR.mkdir();

        for (int i = 0; i < numberOfSegments; ++i) {
            final File segmentDir = new File(INDEXES_DIR, "segment_" + i);

            final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
                    new File(filePath), segmentDir, "daysSinceEpoch", TimeUnit.DAYS, "test");

            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(config);
            driver.build();

            LOGGER.debug("built at : {}", segmentDir.getAbsolutePath());
            INDEX_SEGMENT_LIST.add(new OfflineSegmentDataManager(
                    ColumnarSegmentLoader.load(new File(segmentDir, driver.getSegmentName()), ReadMode.heap)));
        }
    }

    @Test
    public void testSumAggrMVGroupBySV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select sumMV(column7) from myTable group by column6");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(100000 * NUM_SEGMENTS, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "sum_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column6");

        System.out.println("testSumAggrMVGroupBySV");
        System.out.println(aggregationResult.getGroupByResult());

        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[2147483647]", "1967352841480140.00000");
        expectedGroupByResults.put("[1417167]", "1460288879960.00000");
        expectedGroupByResults.put("[1044]", "1202591795900.00000");
        expectedGroupByResults.put("[1033]", "1159642469380.00000");
        expectedGroupByResults.put("[36614]", "1073742033720.00000");
        expectedGroupByResults.put("[200683]", "1073741823500.00000");
        expectedGroupByResults.put("[62009]", "1030792150560.00000");
        expectedGroupByResults.put("[1038]", "944893521040.00000");
        expectedGroupByResults.put("[113103]", "944892804680.00000");
        expectedGroupByResults.put("[1028]", "773096958740.00000");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);
    }

    @Test
    public void testSumAggrMVGroupByMV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select sumMV(column7) from myTable group by column7");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(2000000, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "sum_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column7");

        System.out.println("testSumAggrMVGroupByMV");
        System.out.println(aggregationResult.getGroupByResult());

        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[2147483647]", "2144863716950660.00000");
        expectedGroupByResults.put("[363]", "115825340.00000");
        expectedGroupByResults.put("[469]", "111044220.00000");
        expectedGroupByResults.put("[564]", "84726960.00000");
        expectedGroupByResults.put("[246]", "84011760.00000");
        expectedGroupByResults.put("[523]", "60105640.00000");
        expectedGroupByResults.put("[211]", "53276920.00000");
        expectedGroupByResults.put("[288]", "52121900.00000");
        expectedGroupByResults.put("[225]", "36344500.00000");
        expectedGroupByResults.put("[478]", "31704560.00000");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);

    }

    @Test
    public void testAvgAggrMVGroupBySV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select avgMV(column7) from myTable group by column6");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(100000 * NUM_SEGMENTS, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "avg_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column6");

        System.out.println("testAvgAggrMVGroupBySV");
        System.out.println(aggregationResult.getGroupByResult());
    }

    @Test
    public void testAvgAggrMVGroupByMV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select avgMV(column7) from myTable group by column7");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(2000000, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "avg_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column7");

        System.out.println("testAvgAggrMVGroupByMV");
        System.out.println(aggregationResult.getGroupByResult());

        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[2147483647]", "2147483647.00000");
        expectedGroupByResults.put("[536]", "536.00000");
        expectedGroupByResults.put("[566]", "522.00000");
        expectedGroupByResults.put("[532]", "519.75000");
        expectedGroupByResults.put("[552]", "504.96429");
        expectedGroupByResults.put("[529]", "499.44444");
        expectedGroupByResults.put("[555]", "495.66667");
        expectedGroupByResults.put("[570]", "495.00000");
        expectedGroupByResults.put("[554]", "493.92857");
        expectedGroupByResults.put("[545]", "493.03659");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);
    }

    @Test
    public void testMinAggrMVGroupBySV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select minMV(column7) from myTable group by column6");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(100000 * NUM_SEGMENTS, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "min_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column6");

        System.out.println("testMinAggrMVGroupBySV");
        System.out.println(aggregationResult.getGroupByResult());

    }

    @Test
    public void testMinAggrMVGroupByMV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select minMV(column7) from myTable group by column7");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(2000000, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "min_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column7");

        System.out.println("testMinAggrMVGroupByMV");
        System.out.println(aggregationResult.getGroupByResult());

    }

    @Test
    public void testMaxAggrMVGroupBySV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select maxMV(column7) from myTable group by column6");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(100000 * NUM_SEGMENTS, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "max_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column6");

        System.out.println("testMaxAggrMVGroupBySV");
        System.out.println(aggregationResult.getGroupByResult());

    }

    @Test
    public void testMaxAggrMVGroupByMV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select maxMV(column7) from myTable group by column7");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(2000000, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "max_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column7");

        System.out.println("testMaxAggrMVGroupByMV");
        System.out.println(aggregationResult.getGroupByResult());

        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[2147483647]", "2147483647.00000");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);

    }

    @Test
    public void testDistinctCountAggrMVGroupBySV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select distinctCountMV(column7) from myTable group by column6");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(100000 * NUM_SEGMENTS, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "distinctCount_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column6");

        System.out.println("testDistinctCountAggrMVGroupBySV");
        System.out.println(aggregationResult.getGroupByResult());
        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[2147483647]", "301");
        expectedGroupByResults.put("[1035]", "85");
        expectedGroupByResults.put("[1441]", "81");
        expectedGroupByResults.put("[1009]", "80");
        expectedGroupByResults.put("[1025]", "71");
        expectedGroupByResults.put("[1063]", "69");
        expectedGroupByResults.put("[1586]", "62");
        expectedGroupByResults.put("[1028]", "62");
        expectedGroupByResults.put("[1033]", "56");
        expectedGroupByResults.put("[162479]", "53");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);

    }

    @Test
    public void testDistinctCountAggrMVGroupByMV() throws Exception {
        final BrokerRequest brokerRequest = PQL2_COMPILER
                .compileToBrokerRequest("select distinctCountMV(column7) from myTable group by column7");
        final BrokerResponseNative brokerResponse = queryAndgetBrokerResponse(brokerRequest);
        Assert.assertEquals(2000000, brokerResponse.getNumDocsScanned());
        Assert.assertEquals(brokerRequest.getAggregationsInfo().size(),
                brokerResponse.getAggregationResults().size());

        // Assertion on GroupBy
        AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0);
        Assert.assertEquals(aggregationResult.getFunction(), "distinctCount_column7");
        Assert.assertEquals(aggregationResult.getGroupByColumns().get(0), "column7");

        System.out.println("testDistinctCountAggrMVGroupByMV");
        System.out.println(aggregationResult.getGroupByResult());
        Map<String, String> expectedGroupByResults = new HashMap<>();
        expectedGroupByResults.put("[469]", "228");
        expectedGroupByResults.put("[363]", "227");
        expectedGroupByResults.put("[288]", "226");
        expectedGroupByResults.put("[211]", "223");
        expectedGroupByResults.put("[246]", "222");
        expectedGroupByResults.put("[523]", "202");
        expectedGroupByResults.put("[225]", "192");
        expectedGroupByResults.put("[483]", "174");

        Map<String, String> actualGroupByResults = convertGroupByResultsToMap(aggregationResult.getGroupByResult());
        assertGroupByReulsts(actualGroupByResults, expectedGroupByResults);

    }

    private void assertGroupByReulsts(Map<String, String> actualGroupByResults,
            Map<String, String> expectedGroupByResults) {
        for (String groupKey : expectedGroupByResults.keySet()) {
            Assert.assertTrue(actualGroupByResults.containsKey(groupKey));
            Assert.assertEquals(actualGroupByResults.get(groupKey), expectedGroupByResults.get(groupKey));
        }
    }

    private Map<String, String> convertGroupByResultsToMap(List<GroupByResult> groupByResultList) {
        Map<String, String> groupByResultsMap = new HashMap<>();
        for (GroupByResult groupByResult : groupByResultList) {
            String key = Arrays.toString(groupByResult.getGroup().toArray());
            String value = groupByResult.getValue().toString();
            groupByResultsMap.put(key, value);
        }
        return groupByResultsMap;
    }

    private BrokerResponseNative queryAndgetBrokerResponse(final BrokerRequest brokerRequest) {
        final ExecutorService executorService = Executors
                .newCachedThreadPool(new NamedThreadFactory("test-plan-maker"));
        final Plan globalPlan = INSTANCE_PLAN_MAKER.makeInterSegmentPlan(INDEX_SEGMENT_LIST, brokerRequest,
                executorService, 150000);
        globalPlan.print();
        globalPlan.execute();
        final DataTable instanceResponse = globalPlan.getInstanceResponse();
        LOGGER.debug("Instance Response : {}", instanceResponse);

        final BrokerReduceService reduceService = new BrokerReduceService();
        final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>();
        instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse);
        final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest,
                instanceResponseMap);
        return brokerResponse;
    }
}