Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.query.selection; import java.io.File; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.request.FilterOperator; import com.linkedin.pinot.common.request.Selection; import com.linkedin.pinot.common.request.SelectionSort; import com.linkedin.pinot.common.response.ServerInstance; import com.linkedin.pinot.common.response.broker.BrokerResponseNative; import com.linkedin.pinot.common.response.broker.SelectionResults; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.utils.DataTable; import com.linkedin.pinot.common.utils.DataTableBuilder.DataSchema; import com.linkedin.pinot.common.utils.NamedThreadFactory; import com.linkedin.pinot.common.utils.request.FilterQueryTree; import com.linkedin.pinot.common.utils.request.RequestUtils; import com.linkedin.pinot.core.common.DataSource; import com.linkedin.pinot.core.common.Operator; import com.linkedin.pinot.core.data.manager.offline.OfflineSegmentDataManager; import com.linkedin.pinot.core.data.manager.offline.SegmentDataManager; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.indexsegment.columnar.ColumnarSegmentLoader; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.operator.BReusableFilteredDocIdSetOperator; import com.linkedin.pinot.core.operator.MProjectionOperator; import com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock; import com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator; import com.linkedin.pinot.core.operator.query.MSelectionOrderByOperator; import com.linkedin.pinot.core.plan.Plan; import com.linkedin.pinot.core.plan.PlanNode; import com.linkedin.pinot.core.plan.maker.InstancePlanMakerImplV2; import com.linkedin.pinot.core.plan.maker.PlanMaker; import com.linkedin.pinot.core.query.reduce.BrokerReduceService; import com.linkedin.pinot.core.query.selection.SelectionOperatorService; import com.linkedin.pinot.core.query.selection.SelectionOperatorUtils; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver; import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory; import com.linkedin.pinot.core.segment.index.ColumnMetadata; import com.linkedin.pinot.core.segment.index.IndexSegmentImpl; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils; import com.linkedin.pinot.util.TestUtils; public class SelectionQueriesTest { private final String AVRO_DATA = "data/test_sample_data.avro"; private static File INDEX_DIR = new File( FileUtils.getTempDirectory() + File.separator + "TestSelectionQueries"); private static File INDEXES_DIR = new File( FileUtils.getTempDirectory() + File.separator + "TestSelectionQueriesList"); public static IndexSegment _indexSegment; public Map<String, ColumnMetadata> _medataMap; private static List<SegmentDataManager> _indexSegmentList = new ArrayList<SegmentDataManager>(); @BeforeClass public void setup() throws Exception { setupSegment(); } @AfterClass public void tearDown() { if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } if (INDEXES_DIR.exists()) { FileUtils.deleteQuietly(INDEXES_DIR); } if (_indexSegment != null) { _indexSegment.destroy(); _indexSegment = null; } for (SegmentDataManager segmentDataManager : _indexSegmentList) { segmentDataManager.getSegment().destroy(); } _indexSegmentList.clear(); } private void setupSegment() throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); System.out.println("built at : " + INDEX_DIR.getAbsolutePath()); final File indexSegmentDir = new File(INDEX_DIR, driver.getSegmentName()); _indexSegment = ColumnarSegmentLoader.load(indexSegmentDir, ReadMode.heap); _medataMap = ((SegmentMetadataImpl) ((IndexSegmentImpl) _indexSegment).getSegmentMetadata()) .getColumnMetadataMap(); } private void setupSegmentList(int numberOfSegments) throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA)); if (INDEXES_DIR.exists()) { FileUtils.deleteQuietly(INDEXES_DIR); } INDEXES_DIR.mkdir(); for (int i = 0; i < numberOfSegments; ++i) { final File segmentDir = new File(INDEXES_DIR, "segment_" + i); final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), segmentDir, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); System.out.println("built at : " + segmentDir.getAbsolutePath()); _indexSegmentList.add(new OfflineSegmentDataManager( ColumnarSegmentLoader.load(new File(segmentDir, driver.getSegmentName()), ReadMode.heap))); } } @Test public void testSelectionIteration() { Operator filterOperator = new MatchEntireSegmentOperator(_indexSegment.getSegmentMetadata().getTotalDocs()); final BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator( filterOperator, _indexSegment.getSegmentMetadata().getTotalDocs(), 5000); final Map<String, DataSource> dataSourceMap = getDataSourceMap(); final MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator); final Selection selection = getSelectionQuery(); final MSelectionOrderByOperator selectionOperator = new MSelectionOrderByOperator(_indexSegment, selection, projectionOperator); final IntermediateResultsBlock block = (IntermediateResultsBlock) selectionOperator.nextBlock(); final PriorityQueue<Serializable[]> pq = (PriorityQueue<Serializable[]>) block.getSelectionResult(); final DataSchema dataSchema = block.getSelectionDataSchema(); System.out.println(dataSchema); while (!pq.isEmpty()) { final Serializable[] row = pq.poll(); System.out.println(SelectionOperatorUtils.getRowStringFromSerializable(row, dataSchema)); Assert.assertEquals(row[0], "i"); } } @Test public void testInnerSegmentPlanMakerForSelectionNoFilter() throws Exception { final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest(); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest); rootPlanNode.showTree(""); final MSelectionOrderByOperator operator = (MSelectionOrderByOperator) rootPlanNode.run(); final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) operator.nextBlock(); System.out.println("RunningTime : " + resultBlock.getTimeUsedMs()); System.out.println("NumDocsScanned : " + resultBlock.getNumDocsScanned()); System.out.println("TotalDocs : " + resultBlock.getTotalRawDocs()); List<Serializable[]> rows = getSelectionRows(brokerRequest, resultBlock); for (Serializable[] row : rows) { Assert.assertEquals(row[0], "i"); } } @Test public void testInnerSegmentPlanMakerForSelectionWithFilter() throws Exception { final BrokerRequest brokerRequest = getSelectionWithFilterBrokerRequest(); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest); rootPlanNode.showTree(""); final MSelectionOrderByOperator operator = (MSelectionOrderByOperator) rootPlanNode.run(); final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) operator.nextBlock(); System.out.println("RunningTime : " + resultBlock.getTimeUsedMs()); System.out.println("NumDocsScanned : " + resultBlock.getNumDocsScanned()); System.out.println("TotalDocs : " + resultBlock.getTotalRawDocs()); Assert.assertEquals(resultBlock.getNumDocsScanned(), 582); Assert.assertEquals(resultBlock.getTotalRawDocs(), 10001); List<Serializable[]> rows = getSelectionRows(brokerRequest, resultBlock); for (Serializable[] row : rows) { Assert.assertEquals(row[0], "U"); } } private List<Serializable[]> getSelectionRows(BrokerRequest brokerRequest, IntermediateResultsBlock resultBlock) throws Exception { final SelectionOperatorService selectionOperatorService = new SelectionOperatorService( brokerRequest.getSelections(), resultBlock.getSelectionDataSchema()); final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>(); instanceResponseMap.put(new ServerInstance("localhost:0000"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:1111"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:2222"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:3333"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:4444"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:5555"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:6666"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:7777"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:8888"), resultBlock.getDataTable()); instanceResponseMap.put(new ServerInstance("localhost:9999"), resultBlock.getDataTable()); final Collection<Serializable[]> reducedResults = selectionOperatorService .reduceWithOrdering(instanceResponseMap); SelectionResults selectionResults = selectionOperatorService .renderSelectionResultsWithOrdering(reducedResults); List<String> columns = selectionResults.getColumns(); Assert.assertEquals(columns.get(0), "column11"); Assert.assertEquals(columns.get(1), "column12"); Assert.assertEquals(columns.get(2), "met_impressionCount"); return selectionResults.getRows(); } @Test public void testInterSegmentSelectionPlanMakerAndRun() throws Exception { final int numSegments = 20; setupSegmentList(numSegments); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest1(); final ExecutorService executorService = Executors .newCachedThreadPool(new NamedThreadFactory("test-plan-maker")); final Plan globalPlan = instancePlanMaker.makeInterSegmentPlan(_indexSegmentList, brokerRequest, executorService, 150000); globalPlan.print(); globalPlan.execute(); final DataTable instanceResponse = globalPlan.getInstanceResponse(); System.out.println("instanceResponse : " + instanceResponse); final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>(); instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse); final BrokerReduceService reduceService = new BrokerReduceService(); final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest, instanceResponseMap); SelectionResults selectionResults = brokerResponse.getSelectionResults(); List<String> columns = selectionResults.getColumns(); Assert.assertEquals(columns.get(0), "column12"); Assert.assertEquals(columns.get(1), "met_impressionCount"); Assert.assertEquals(columns.get(2), "column11"); List<Serializable[]> rows = selectionResults.getRows(); for (int i = 0; i < rows.size(); ++i) { Serializable[] row = rows.get(i); Assert.assertEquals(row.length, 3); Assert.assertEquals(row[2], "i"); } final BrokerReduceService brokerReduceService = new BrokerReduceService(); final BrokerResponseNative brokerResponseNative = brokerReduceService.reduceOnDataTable(brokerRequest, instanceResponseMap); System.out.println("Selection Result : " + brokerResponseNative.getSelectionResults().toString()); System.out.println("Time used : " + brokerResponseNative.getTimeUsedMs()); selectionResults = brokerResponseNative.getSelectionResults(); List<String> columnArray = selectionResults.getColumns(); Assert.assertEquals(columnArray.size(), 3); Assert.assertEquals(columnArray.get(0), "column12"); Assert.assertEquals(columnArray.get(1), "met_impressionCount"); Assert.assertEquals(columnArray.get(2), "column11"); List<Serializable[]> resultRows = selectionResults.getRows(); Assert.assertEquals(resultRows.size(), 10); for (int i = 0; i < resultRows.size(); ++i) { Serializable[] resultRow = resultRows.get(i); Assert.assertEquals(resultRow.length, 3); Serializable[] expectedValues = rows.get(i); Assert.assertEquals(resultRow[0], expectedValues[0]); Assert.assertEquals(resultRow[1], expectedValues[1]); Assert.assertEquals(resultRow[2], expectedValues[2]); } } @Test public void testInterSegmentSelectionWithOrderByColumnNotInSelectionColumnsPlanMakerAndRun() throws Exception { final int numSegments = 20; setupSegmentList(numSegments); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final BrokerRequest brokerRequest = getSelectionNoFilterBrokerRequest2(); final ExecutorService executorService = Executors .newCachedThreadPool(new NamedThreadFactory("test-plan-maker")); final Plan globalPlan = instancePlanMaker.makeInterSegmentPlan(_indexSegmentList, brokerRequest, executorService, 150000); globalPlan.print(); globalPlan.execute(); final DataTable instanceResponse = globalPlan.getInstanceResponse(); System.out.println("instanceResponse : " + instanceResponse); final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>(); instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse); final BrokerReduceService reduceService = new BrokerReduceService(); final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest, instanceResponseMap); System.out.println("Selection Result : " + brokerResponse.getSelectionResults()); System.out.println("Time used : " + brokerResponse.getTimeUsedMs()); SelectionResults selectionResults = brokerResponse.getSelectionResults(); List<String> columns = selectionResults.getColumns(); Assert.assertEquals(columns.get(0), "column11"); Assert.assertEquals(columns.get(1), "column12"); Assert.assertEquals(columns.get(2), "met_impressionCount"); List<Serializable[]> rows = selectionResults.getRows(); for (int i = 0; i < rows.size(); ++i) { Serializable[] row = rows.get(i); Assert.assertEquals(row.length, 3); Assert.assertEquals(row[0], "U"); Assert.assertEquals(row[1], "db"); Assert.assertEquals(row[2], "6240989492723764727"); } final BrokerReduceService brokerReduceService = new BrokerReduceService(); final BrokerResponseNative brokerResponseNative = brokerReduceService.reduceOnDataTable(brokerRequest, instanceResponseMap); System.out.println("Selection Result : " + brokerResponseNative.getSelectionResults()); System.out.println("Time used : " + brokerResponseNative.getTimeUsedMs()); selectionResults = brokerResponseNative.getSelectionResults(); List<String> columnArray = selectionResults.getColumns(); Assert.assertEquals(columnArray.size(), 3); Assert.assertEquals(columnArray.get(0), "column11"); Assert.assertEquals(columnArray.get(1), "column12"); Assert.assertEquals(columnArray.get(2), "met_impressionCount"); List<Serializable[]> resultRows = selectionResults.getRows(); Assert.assertEquals(resultRows.size(), 10); for (int i = 0; i < resultRows.size(); ++i) { Serializable[] actualRow = resultRows.get(i); Assert.assertEquals(actualRow.length, 3); Serializable[] expectedRow = rows.get(i); Assert.assertEquals(actualRow[0], expectedRow[0]); Assert.assertEquals(actualRow[1], expectedRow[1]); Assert.assertEquals(actualRow[2], expectedRow[2]); } } private static Map<String, DataSource> getDataSourceMap() { final Map<String, DataSource> dataSourceMap = new HashMap<String, DataSource>(); dataSourceMap.put("column11", _indexSegment.getDataSource("column11")); dataSourceMap.put("column12", _indexSegment.getDataSource("column12")); dataSourceMap.put("met_impressionCount", _indexSegment.getDataSource("met_impressionCount")); return dataSourceMap; } private BrokerRequest getSelectionNoFilterBrokerRequest() { final BrokerRequest brokerRequest = new BrokerRequest(); brokerRequest.setSelections(getSelectionQuery()); return brokerRequest; } private BrokerRequest getSelectionNoFilterBrokerRequest2() { final BrokerRequest brokerRequest = new BrokerRequest(); brokerRequest.setSelections(getSelectionOrderbyQuery2()); return brokerRequest; } private BrokerRequest getSelectionNoFilterBrokerRequest1() { final BrokerRequest brokerRequest = new BrokerRequest(); brokerRequest.setSelections(getSelectionQuery1()); return brokerRequest; } private BrokerRequest getSelectionWithFilterBrokerRequest() { final BrokerRequest brokerRequest = new BrokerRequest(); brokerRequest.setSelections(getSelectionQuery()); setFilterQuery(brokerRequest); return brokerRequest; } private static BrokerRequest setFilterQuery(BrokerRequest brokerRequest) { FilterQueryTree filterQueryTree; final String filterColumn = "column11"; final String filterVal = "U"; if (filterColumn.contains(",")) { final String[] filterColumns = filterColumn.split(","); final String[] filterValues = filterVal.split(","); final List<FilterQueryTree> nested = new ArrayList<FilterQueryTree>(); for (int i = 0; i < filterColumns.length; i++) { final List<String> vals = new ArrayList<String>(); vals.add(filterValues[i]); final FilterQueryTree d = new FilterQueryTree(i + 1, filterColumns[i], vals, FilterOperator.EQUALITY, null); nested.add(d); } filterQueryTree = new FilterQueryTree(0, null, null, FilterOperator.AND, nested); } else { final List<String> vals = new ArrayList<String>(); vals.add(filterVal); filterQueryTree = new FilterQueryTree(0, filterColumn, vals, FilterOperator.EQUALITY, null); } RequestUtils.generateFilterFromTree(filterQueryTree, brokerRequest); return brokerRequest; } private Selection getSelectionQuery() { final Selection selection = new Selection(); final List<String> selectionColumns = new ArrayList<String>(); selectionColumns.add("column11"); selectionColumns.add("column12"); selectionColumns.add("met_impressionCount"); selection.setSelectionColumns(selectionColumns); selection.setOffset(0); selection.setSize(10); final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>(); final SelectionSort selectionSort = new SelectionSort(); selectionSort.setColumn("column11"); selectionSort.setIsAsc(false); selectionSortSequence.add(selectionSort); selection.setSelectionSortSequence(selectionSortSequence); return selection; } private Selection getSelectionQuery1() { final Selection selection = new Selection(); final List<String> selectionColumns = new ArrayList<String>(); selectionColumns.add("column12"); selectionColumns.add("met_impressionCount"); selectionColumns.add("column11"); selection.setSelectionColumns(selectionColumns); selection.setOffset(0); selection.setSize(10); final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>(); final SelectionSort selectionSort = new SelectionSort(); selectionSort.setColumn("column11"); selectionSort.setIsAsc(false); selectionSortSequence.add(selectionSort); selection.setSelectionSortSequence(selectionSortSequence); return selection; } private Selection getSelectionOrderbyQuery2() { final Selection selection = new Selection(); final List<String> selectionColumns = new ArrayList<String>(); selectionColumns.add("column11"); selectionColumns.add("column12"); selectionColumns.add("met_impressionCount"); selection.setSelectionColumns(selectionColumns); selection.setOffset(0); selection.setSize(10); final List<SelectionSort> selectionSortSequence = new ArrayList<SelectionSort>(); final SelectionSort selectionSort = new SelectionSort(); selectionSort.setColumn("column13"); selectionSort.setIsAsc(false); selectionSortSequence.add(selectionSort); selection.setSelectionSortSequence(selectionSortSequence); return selection; } }