com.linkedin.pinot.query.selection.SelectionQueriesTest.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.query.selection.SelectionQueriesTest.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.query.selection;

import java.io.File;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.linkedin.pinot.common.request.BrokerRequest;
import com.linkedin.pinot.common.request.FilterOperator;
import com.linkedin.pinot.common.request.Selection;
import com.linkedin.pinot.common.request.SelectionSort;
import com.linkedin.pinot.common.response.ServerInstance;
import com.linkedin.pinot.common.response.broker.BrokerResponseNative;
import com.linkedin.pinot.common.response.broker.SelectionResults;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.DataTable;
import com.linkedin.pinot.common.utils.DataTableBuilder.DataSchema;
import com.linkedin.pinot.common.utils.NamedThreadFactory;
import com.linkedin.pinot.common.utils.request.FilterQueryTree;
import com.linkedin.pinot.common.utils.request.RequestUtils;
import com.linkedin.pinot.core.common.DataSource;
import com.linkedin.pinot.core.common.Operator;
import com.linkedin.pinot.core.data.manager.offline.OfflineSegmentDataManager;
import com.linkedin.pinot.core.data.manager.offline.SegmentDataManager;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.columnar.ColumnarSegmentLoader;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.operator.BReusableFilteredDocIdSetOperator;
import com.linkedin.pinot.core.operator.MProjectionOperator;
import com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock;
import com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator;
import com.linkedin.pinot.core.operator.query.MSelectionOrderByOperator;
import com.linkedin.pinot.core.plan.Plan;
import com.linkedin.pinot.core.plan.PlanNode;
import com.linkedin.pinot.core.plan.maker.InstancePlanMakerImplV2;
import com.linkedin.pinot.core.plan.maker.PlanMaker;
import com.linkedin.pinot.core.query.reduce.BrokerReduceService;
import com.linkedin.pinot.core.query.selection.SelectionOperatorService;
import com.linkedin.pinot.core.query.selection.SelectionOperatorUtils;
import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver;
import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory;
import com.linkedin.pinot.core.segment.index.ColumnMetadata;
import com.linkedin.pinot.core.segment.index.IndexSegmentImpl;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils;
import com.linkedin.pinot.util.TestUtils;

public class SelectionQueriesTest {

    private final String AVRO_DATA = "data/test_sample_data.avro";
    private static File INDEX_DIR = new File(
            FileUtils.getTempDirectory() + File.separator + "TestSelectionQueries");
    private static File INDEXES_DIR = new File(
            FileUtils.getTempDirectory() + File.separator + "TestSelectionQueriesList");

    public static IndexSegment _indexSegment;
    public Map<String, ColumnMetadata> _medataMap;

    private static List<SegmentDataManager> _indexSegmentList = new ArrayList<SegmentDataManager>();

    @BeforeClass
    public void setup() throws Exception {
        setupSegment();
    }

    @AfterClass
    public void tearDown() {
        if (INDEX_DIR.exists()) {
            FileUtils.deleteQuietly(INDEX_DIR);
        }
        if (INDEXES_DIR.exists()) {
            FileUtils.deleteQuietly(INDEXES_DIR);
        }
        if (_indexSegment != null) {
            _indexSegment.destroy();
            _indexSegment = null;
        }
        for (SegmentDataManager segmentDataManager : _indexSegmentList) {
            segmentDataManager.getSegment().destroy();
        }
        _indexSegmentList.clear();
    }

    private void setupSegment() throws Exception {
        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));

        if (INDEX_DIR.exists()) {
            FileUtils.deleteQuietly(INDEX_DIR);
        }

        final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
                new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");

        final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
        driver.init(config);
        driver.build();

        System.out.println("built at : " + INDEX_DIR.getAbsolutePath());
        final File indexSegmentDir = new File(INDEX_DIR, driver.getSegmentName());
        _indexSegment = ColumnarSegmentLoader.load(indexSegmentDir, ReadMode.heap);
        _medataMap = ((SegmentMetadataImpl) ((IndexSegmentImpl) _indexSegment).getSegmentMetadata())
                .getColumnMetadataMap();
    }

    private void setupSegmentList(int numberOfSegments) throws Exception {
        final String filePath = TestUtils
                .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA));

        if (INDEXES_DIR.exists()) {
            FileUtils.deleteQuietly(INDEXES_DIR);
        }
        INDEXES_DIR.mkdir();

        for (int i = 0; i < numberOfSegments; ++i) {
            final File segmentDir = new File(INDEXES_DIR, "segment_" + i);

            final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
                    new File(filePath), segmentDir, "time_day", TimeUnit.DAYS, "test");

            final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
            driver.init(config);
            driver.build();

            System.out.println("built at : " + segmentDir.getAbsolutePath());
            _indexSegmentList.add(new OfflineSegmentDataManager(
                    ColumnarSegmentLoader.load(new File(segmentDir, driver.getSegmentName()), ReadMode.heap)));
        }
    }

    @Test
    public void testSelectionIteration() {
        Operator filterOperator = new MatchEntireSegmentOperator(_indexSegment.getSegmentMetadata().getTotalDocs());
        final BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(
                filterOperator, _indexSegment.getSegmentMetadata().getTotalDocs(), 5000);
        final Map<String, DataSource> dataSourceMap = getDataSourceMap();

        final MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator);

        final Selection selection = getSelectionQuery();

        final MSelectionOrderByOperator selectionOperator = new MSelectionOrderByOperator(_indexSegment, selection,
                projectionOperator);

        final IntermediateResultsBlock block = (IntermediateResultsBlock) selectionOperator.nextBlock();
        final PriorityQueue<Serializable[]> pq = (PriorityQueue<Serializable[]>) block.getSelectionResult();
        final DataSchema dataSchema = block.getSelectionDataSchema();
        System.out.println(dataSchema);
        while (!pq.isEmpty()) {
            final Serializable[] row = pq.poll();
            System.out.println(SelectionOperatorUtils.getRowStringFromSerializable(row, dataSchema));
            Assert.assertEquals(row[0], "i");
        }
    }

    @Test
    public void testInnerSegmentPlanMakerForSelectionNoFilter() throws Exception {
        final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest();
        final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2();
        final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest);
        rootPlanNode.showTree("");
        final MSelectionOrderByOperator operator = (MSelectionOrderByOperator) rootPlanNode.run();
        final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) operator.nextBlock();
        System.out.println("RunningTime : " + resultBlock.getTimeUsedMs());
        System.out.println("NumDocsScanned : " + resultBlock.getNumDocsScanned());
        System.out.println("TotalDocs : " + resultBlock.getTotalRawDocs());

        List<Serializable[]> rows = getSelectionRows(brokerRequest, resultBlock);
        for (Serializable[] row : rows) {
            Assert.assertEquals(row[0], "i");
        }
    }

    @Test
    public void testInnerSegmentPlanMakerForSelectionWithFilter() throws Exception {
        final BrokerRequest brokerRequest = getSelectionWithFilterBrokerRequest();
        final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2();
        final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest);
        rootPlanNode.showTree("");
        final MSelectionOrderByOperator operator = (MSelectionOrderByOperator) rootPlanNode.run();
        final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) operator.nextBlock();
        System.out.println("RunningTime : " + resultBlock.getTimeUsedMs());
        System.out.println("NumDocsScanned : " + resultBlock.getNumDocsScanned());
        System.out.println("TotalDocs : " + resultBlock.getTotalRawDocs());
        Assert.assertEquals(resultBlock.getNumDocsScanned(), 582);
        Assert.assertEquals(resultBlock.getTotalRawDocs(), 10001);

        List<Serializable[]> rows = getSelectionRows(brokerRequest, resultBlock);
        for (Serializable[] row : rows) {
            Assert.assertEquals(row[0], "U");
        }
    }

    private List<Serializable[]> getSelectionRows(BrokerRequest brokerRequest, IntermediateResultsBlock resultBlock)
            throws Exception {
        final SelectionOperatorService selectionOperatorService = new SelectionOperatorService(
                brokerRequest.getSelections(), resultBlock.getSelectionDataSchema());

        final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>();
        instanceResponseMap.put(new ServerInstance("localhost:0000"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:1111"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:2222"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:3333"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:4444"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:5555"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:6666"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:7777"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:8888"), resultBlock.getDataTable());
        instanceResponseMap.put(new ServerInstance("localhost:9999"), resultBlock.getDataTable());
        final Collection<Serializable[]> reducedResults = selectionOperatorService
                .reduceWithOrdering(instanceResponseMap);

        SelectionResults selectionResults = selectionOperatorService
                .renderSelectionResultsWithOrdering(reducedResults);
        List<String> columns = selectionResults.getColumns();
        Assert.assertEquals(columns.get(0), "column11");
        Assert.assertEquals(columns.get(1), "column12");
        Assert.assertEquals(columns.get(2), "met_impressionCount");

        return selectionResults.getRows();
    }

    @Test
    public void testInterSegmentSelectionPlanMakerAndRun() throws Exception {
        final int numSegments = 20;
        setupSegmentList(numSegments);
        final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2();
        final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest1();
        final ExecutorService executorService = Executors
                .newCachedThreadPool(new NamedThreadFactory("test-plan-maker"));
        final Plan globalPlan = instancePlanMaker.makeInterSegmentPlan(_indexSegmentList, brokerRequest,
                executorService, 150000);
        globalPlan.print();
        globalPlan.execute();
        final DataTable instanceResponse = globalPlan.getInstanceResponse();
        System.out.println("instanceResponse : " + instanceResponse);

        final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>();
        instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse);
        final BrokerReduceService reduceService = new BrokerReduceService();
        final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest,
                instanceResponseMap);

        SelectionResults selectionResults = brokerResponse.getSelectionResults();
        List<String> columns = selectionResults.getColumns();
        Assert.assertEquals(columns.get(0), "column12");
        Assert.assertEquals(columns.get(1), "met_impressionCount");
        Assert.assertEquals(columns.get(2), "column11");

        List<Serializable[]> rows = selectionResults.getRows();
        for (int i = 0; i < rows.size(); ++i) {
            Serializable[] row = rows.get(i);
            Assert.assertEquals(row.length, 3);
            Assert.assertEquals(row[2], "i");
        }

        final BrokerReduceService brokerReduceService = new BrokerReduceService();
        final BrokerResponseNative brokerResponseNative = brokerReduceService.reduceOnDataTable(brokerRequest,
                instanceResponseMap);
        System.out.println("Selection Result : " + brokerResponseNative.getSelectionResults().toString());
        System.out.println("Time used : " + brokerResponseNative.getTimeUsedMs());

        selectionResults = brokerResponseNative.getSelectionResults();
        List<String> columnArray = selectionResults.getColumns();
        Assert.assertEquals(columnArray.size(), 3);
        Assert.assertEquals(columnArray.get(0), "column12");
        Assert.assertEquals(columnArray.get(1), "met_impressionCount");
        Assert.assertEquals(columnArray.get(2), "column11");

        List<Serializable[]> resultRows = selectionResults.getRows();
        Assert.assertEquals(resultRows.size(), 10);
        for (int i = 0; i < resultRows.size(); ++i) {
            Serializable[] resultRow = resultRows.get(i);
            Assert.assertEquals(resultRow.length, 3);
            Serializable[] expectedValues = rows.get(i);
            Assert.assertEquals(resultRow[0], expectedValues[0]);
            Assert.assertEquals(resultRow[1], expectedValues[1]);
            Assert.assertEquals(resultRow[2], expectedValues[2]);
        }
    }

    @Test
    public void testInterSegmentSelectionWithOrderByColumnNotInSelectionColumnsPlanMakerAndRun() throws Exception {
        final int numSegments = 20;
        setupSegmentList(numSegments);
        final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2();
        final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest2();
        final ExecutorService executorService = Executors
                .newCachedThreadPool(new NamedThreadFactory("test-plan-maker"));
        final Plan globalPlan = instancePlanMaker.makeInterSegmentPlan(_indexSegmentList, brokerRequest,
                executorService, 150000);
        globalPlan.print();
        globalPlan.execute();
        final DataTable instanceResponse = globalPlan.getInstanceResponse();
        System.out.println("instanceResponse : " + instanceResponse);

        final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>();
        instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse);

        final BrokerReduceService reduceService = new BrokerReduceService();
        final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest,
                instanceResponseMap);
        System.out.println("Selection Result : " + brokerResponse.getSelectionResults());
        System.out.println("Time used : " + brokerResponse.getTimeUsedMs());

        SelectionResults selectionResults = brokerResponse.getSelectionResults();
        List<String> columns = selectionResults.getColumns();
        Assert.assertEquals(columns.get(0), "column11");
        Assert.assertEquals(columns.get(1), "column12");
        Assert.assertEquals(columns.get(2), "met_impressionCount");

        List<Serializable[]> rows = selectionResults.getRows();
        for (int i = 0; i < rows.size(); ++i) {
            Serializable[] row = rows.get(i);
            Assert.assertEquals(row.length, 3);
            Assert.assertEquals(row[0], "U");
            Assert.assertEquals(row[1], "db");
            Assert.assertEquals(row[2], "6240989492723764727");
        }

        final BrokerReduceService brokerReduceService = new BrokerReduceService();
        final BrokerResponseNative brokerResponseNative = brokerReduceService.reduceOnDataTable(brokerRequest,
                instanceResponseMap);
        System.out.println("Selection Result : " + brokerResponseNative.getSelectionResults());
        System.out.println("Time used : " + brokerResponseNative.getTimeUsedMs());

        selectionResults = brokerResponseNative.getSelectionResults();
        List<String> columnArray = selectionResults.getColumns();
        Assert.assertEquals(columnArray.size(), 3);
        Assert.assertEquals(columnArray.get(0), "column11");
        Assert.assertEquals(columnArray.get(1), "column12");
        Assert.assertEquals(columnArray.get(2), "met_impressionCount");

        List<Serializable[]> resultRows = selectionResults.getRows();
        Assert.assertEquals(resultRows.size(), 10);
        for (int i = 0; i < resultRows.size(); ++i) {
            Serializable[] actualRow = resultRows.get(i);
            Assert.assertEquals(actualRow.length, 3);
            Serializable[] expectedRow = rows.get(i);
            Assert.assertEquals(actualRow[0], expectedRow[0]);
            Assert.assertEquals(actualRow[1], expectedRow[1]);
            Assert.assertEquals(actualRow[2], expectedRow[2]);
        }
    }

    private static Map<String, DataSource> getDataSourceMap() {
        final Map<String, DataSource> dataSourceMap = new HashMap<String, DataSource>();
        dataSourceMap.put("column11", _indexSegment.getDataSource("column11"));
        dataSourceMap.put("column12", _indexSegment.getDataSource("column12"));
        dataSourceMap.put("met_impressionCount", _indexSegment.getDataSource("met_impressionCount"));
        return dataSourceMap;
    }

    private BrokerRequest getSelectionNoFilterBrokerRequest() {
        final BrokerRequest brokerRequest = new BrokerRequest();
        brokerRequest.setSelections(getSelectionQuery());
        return brokerRequest;
    }

    private BrokerRequest getSelectionNoFilterBrokerRequest2() {
        final BrokerRequest brokerRequest = new BrokerRequest();
        brokerRequest.setSelections(getSelectionOrderbyQuery2());
        return brokerRequest;
    }

    private BrokerRequest getSelectionNoFilterBrokerRequest1() {
        final BrokerRequest brokerRequest = new BrokerRequest();
        brokerRequest.setSelections(getSelectionQuery1());
        return brokerRequest;
    }

    private BrokerRequest getSelectionWithFilterBrokerRequest() {
        final BrokerRequest brokerRequest = new BrokerRequest();
        brokerRequest.setSelections(getSelectionQuery());
        setFilterQuery(brokerRequest);
        return brokerRequest;
    }

    private static BrokerRequest setFilterQuery(BrokerRequest brokerRequest) {
        FilterQueryTree filterQueryTree;
        final String filterColumn = "column11";
        final String filterVal = "U";
        if (filterColumn.contains(",")) {
            final String[] filterColumns = filterColumn.split(",");
            final String[] filterValues = filterVal.split(",");
            final List<FilterQueryTree> nested = new ArrayList<FilterQueryTree>();
            for (int i = 0; i < filterColumns.length; i++) {

                final List<String> vals = new ArrayList<String>();
                vals.add(filterValues[i]);
                final FilterQueryTree d = new FilterQueryTree(i + 1, filterColumns[i], vals,
                        FilterOperator.EQUALITY, null);
                nested.add(d);
            }
            filterQueryTree = new FilterQueryTree(0, null, null, FilterOperator.AND, nested);
        } else {
            final List<String> vals = new ArrayList<String>();
            vals.add(filterVal);
            filterQueryTree = new FilterQueryTree(0, filterColumn, vals, FilterOperator.EQUALITY, null);
        }
        RequestUtils.generateFilterFromTree(filterQueryTree, brokerRequest);
        return brokerRequest;
    }

    private Selection getSelectionQuery() {
        final Selection selection = new Selection();
        final List<String> selectionColumns = new ArrayList<String>();
        selectionColumns.add("column11");
        selectionColumns.add("column12");
        selectionColumns.add("met_impressionCount");
        selection.setSelectionColumns(selectionColumns);
        selection.setOffset(0);
        selection.setSize(10);
        final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>();
        final SelectionSort selectionSort = new SelectionSort();
        selectionSort.setColumn("column11");
        selectionSort.setIsAsc(false);
        selectionSortSequence.add(selectionSort);
        selection.setSelectionSortSequence(selectionSortSequence);
        return selection;
    }

    private Selection getSelectionQuery1() {
        final Selection selection = new Selection();
        final List<String> selectionColumns = new ArrayList<String>();
        selectionColumns.add("column12");
        selectionColumns.add("met_impressionCount");
        selectionColumns.add("column11");
        selection.setSelectionColumns(selectionColumns);
        selection.setOffset(0);
        selection.setSize(10);
        final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>();
        final SelectionSort selectionSort = new SelectionSort();
        selectionSort.setColumn("column11");
        selectionSort.setIsAsc(false);
        selectionSortSequence.add(selectionSort);
        selection.setSelectionSortSequence(selectionSortSequence);
        return selection;
    }

    private Selection getSelectionOrderbyQuery2() {
        final Selection selection = new Selection();
        final List<String> selectionColumns = new ArrayList<String>();
        selectionColumns.add("column11");
        selectionColumns.add("column12");
        selectionColumns.add("met_impressionCount");
        selection.setSelectionColumns(selectionColumns);
        selection.setOffset(0);
        selection.setSize(10);
        final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>();
        final SelectionSort selectionSort = new SelectionSort();
        selectionSort.setColumn("column13");
        selectionSort.setIsAsc(false);
        selectionSortSequence.add(selectionSort);
        selection.setSelectionSortSequence(selectionSortSequence);
        return selection;
    }
}