Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.query.aggregation; import com.linkedin.pinot.common.query.ReduceService; import com.linkedin.pinot.common.request.AggregationInfo; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.request.FilterOperator; import com.linkedin.pinot.common.response.ServerInstance; import com.linkedin.pinot.common.response.broker.AggregationResult; import com.linkedin.pinot.common.response.broker.BrokerResponseNative; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.utils.DataTable; import com.linkedin.pinot.common.utils.NamedThreadFactory; import com.linkedin.pinot.common.utils.request.FilterQueryTree; import com.linkedin.pinot.common.utils.request.RequestUtils; import com.linkedin.pinot.core.common.DataSource; import com.linkedin.pinot.core.common.Operator; import com.linkedin.pinot.core.data.manager.offline.OfflineSegmentDataManager; import com.linkedin.pinot.core.data.manager.offline.SegmentDataManager; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.indexsegment.columnar.ColumnarSegmentLoader; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.operator.BReusableFilteredDocIdSetOperator; import com.linkedin.pinot.core.operator.MProjectionOperator; import com.linkedin.pinot.core.operator.UReplicatedProjectionOperator; import com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock; import com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator; import com.linkedin.pinot.core.operator.query.BAggregationFunctionOperator; import com.linkedin.pinot.core.operator.query.MAggregationOperator; import com.linkedin.pinot.core.plan.Plan; import com.linkedin.pinot.core.plan.PlanNode; import com.linkedin.pinot.core.plan.maker.InstancePlanMakerImplV2; import com.linkedin.pinot.core.plan.maker.PlanMaker; import com.linkedin.pinot.core.query.aggregation.CombineService; import com.linkedin.pinot.core.query.reduce.BrokerReduceService; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver; import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory; import com.linkedin.pinot.core.segment.index.ColumnMetadata; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.util.DoubleComparisonUtil; import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils; import com.linkedin.pinot.util.TestUtils; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.json.JSONException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class AggregationQueriesTest { protected static Logger LOGGER = LoggerFactory.getLogger(AggregationQueriesTest.class); private final String AVRO_DATA = "data/test_sample_data.avro"; private static File INDEX_DIR = new File( FileUtils.getTempDirectory() + File.separator + "TestAggregationQueries"); private static File INDEXES_DIR = new File( FileUtils.getTempDirectory() + File.separator + "TestAggregationQueriesList"); public static IndexSegment _indexSegment; private static List<SegmentDataManager> _indexSegmentList; public static List<AggregationInfo> _aggregationInfos; public static int _numAggregations = 7; public Map<String, ColumnMetadata> _medataMap; @BeforeClass public void setup() throws Exception { setupSegment(); setupQuery(); _indexSegmentList = new ArrayList<SegmentDataManager>(); } @AfterClass public void tearDown() { if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } if (INDEXES_DIR.exists()) { FileUtils.deleteQuietly(INDEXES_DIR); } if (_indexSegment != null) { _indexSegment.destroy(); } for (SegmentDataManager segmentDataManager : _indexSegmentList) { segmentDataManager.getSegment().destroy(); } _indexSegmentList.clear(); } private void setupSegment() throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); LOGGER.debug("built at : {}", INDEX_DIR.getAbsolutePath()); final File indexSegmentDir = new File(INDEX_DIR, driver.getSegmentName()); _indexSegment = ColumnarSegmentLoader.load(indexSegmentDir, ReadMode.heap); _medataMap = ((SegmentMetadataImpl) _indexSegment.getSegmentMetadata()).getColumnMetadataMap(); } private void setupSegmentList(int numberOfSegments) throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA)); if (INDEXES_DIR.exists()) { FileUtils.deleteQuietly(INDEXES_DIR); } INDEXES_DIR.mkdir(); for (int i = 0; i < numberOfSegments; ++i) { final File segmentDir = new File(INDEXES_DIR, "segment_" + i); final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), segmentDir, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); LOGGER.debug("built at : {}", segmentDir.getAbsolutePath()); _indexSegmentList.add(new OfflineSegmentDataManager( ColumnarSegmentLoader.load(new File(segmentDir, driver.getSegmentName()), ReadMode.heap))); } } public void setupQuery() { _aggregationInfos = getAggregationsInfo(); } @Test public void testAggregationFunctions() { getIntermediateResultsBlock(); } private IntermediateResultsBlock getIntermediateResultsBlock() { final List<BAggregationFunctionOperator> aggregationFunctionOperatorList = new ArrayList<BAggregationFunctionOperator>(); Operator filterOperator = new MatchEntireSegmentOperator(_indexSegment.getSegmentMetadata().getTotalDocs()); final BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator( filterOperator, _indexSegment.getSegmentMetadata().getTotalDocs(), 5000); final Map<String, DataSource> dataSourceMap = getDataSourceMap(); final MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator); for (int i = 0; i < _numAggregations; ++i) { final BAggregationFunctionOperator aggregationFunctionOperator = new BAggregationFunctionOperator( _aggregationInfos.get(i), new UReplicatedProjectionOperator(projectionOperator), true); aggregationFunctionOperatorList.add(aggregationFunctionOperator); } final MAggregationOperator aggregationOperator = new MAggregationOperator(_indexSegment, _aggregationInfos, projectionOperator, aggregationFunctionOperatorList); final IntermediateResultsBlock block = (IntermediateResultsBlock) aggregationOperator.nextBlock(); for (int i = 0; i < _numAggregations; ++i) { LOGGER.debug("Result : {}", block.getAggregationResult().get(i)); } return block; } @Test public void testAggregationFunctionsWithCombine() { IntermediateResultsBlock block = getIntermediateResultsBlock(); final List<BAggregationFunctionOperator> aggregationFunctionOperatorList1 = new ArrayList<BAggregationFunctionOperator>(); Operator filterOperator1 = new MatchEntireSegmentOperator( _indexSegment.getSegmentMetadata().getTotalDocs()); final BReusableFilteredDocIdSetOperator docIdSetOperator1 = new BReusableFilteredDocIdSetOperator( filterOperator1, _indexSegment.getSegmentMetadata().getTotalDocs(), 5000); final Map<String, DataSource> dataSourceMap1 = getDataSourceMap(); final MProjectionOperator projectionOperator1 = new MProjectionOperator(dataSourceMap1, docIdSetOperator1); for (int i = 0; i < _numAggregations; ++i) { final BAggregationFunctionOperator aggregationFunctionOperator1 = new BAggregationFunctionOperator( _aggregationInfos.get(i), new UReplicatedProjectionOperator(projectionOperator1), true); aggregationFunctionOperatorList1.add(aggregationFunctionOperator1); } final MAggregationOperator aggregationOperator1 = new MAggregationOperator(_indexSegment, _aggregationInfos, projectionOperator1, aggregationFunctionOperatorList1); final IntermediateResultsBlock block1 = (IntermediateResultsBlock) aggregationOperator1.nextBlock(); for (int i = 0; i < _numAggregations; ++i) { LOGGER.debug("Result 2: {}", block1.getAggregationResult().get(i)); } CombineService.mergeTwoBlocks(getAggregationNoFilterBrokerRequest(), block, block1); for (int i = 0; i < _numAggregations; ++i) { LOGGER.debug("Combined Result : {}", block.getAggregationResult().get(i)); } } @Test public void testInnerSegmentPlanMakerForAggregationFunctionOperatorNoFilter() throws Exception { final BrokerRequest brokerRequest = getAggregationNoFilterBrokerRequest(); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest); rootPlanNode.showTree(""); final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) rootPlanNode.run().nextBlock(); LOGGER.debug("RunningTime : {}", resultBlock.getTimeUsedMs()); LOGGER.debug("NumDocsScanned : {}", resultBlock.getNumDocsScanned()); LOGGER.debug("TotalDocs : {}", resultBlock.getTotalRawDocs()); logReducedResults(resultBlock); } private void logReducedResults(IntermediateResultsBlock resultBlock) throws Exception { final ReduceService reduceService = new BrokerReduceService(); final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>(); instanceResponseMap.put(new ServerInstance("localhost:0000"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:1111"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:2222"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:3333"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:4444"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:5555"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:6666"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:7777"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:8888"), resultBlock.getAggregationResultDataTable()); instanceResponseMap.put(new ServerInstance("localhost:9999"), resultBlock.getAggregationResultDataTable()); final BrokerResponseNative reducedResults = (BrokerResponseNative) reduceService .reduceOnDataTable(getAggregationNoFilterBrokerRequest(), instanceResponseMap); LOGGER.debug("Reduced Result : {}", reducedResults); } @Test public void testInnerSegmentPlanMakerForAggregationFunctionOperatorWithFilter() throws Exception { final BrokerRequest brokerRequest = getAggregationWithFilterBrokerRequest(); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final PlanNode rootPlanNode = instancePlanMaker.makeInnerSegmentPlan(_indexSegment, brokerRequest); rootPlanNode.showTree(""); final IntermediateResultsBlock resultBlock = (IntermediateResultsBlock) rootPlanNode.run().nextBlock(); LOGGER.debug("RunningTime : {}", resultBlock.getTimeUsedMs()); LOGGER.debug("NumDocsScanned : {}", resultBlock.getNumDocsScanned()); LOGGER.debug("TotalDocs : {}", resultBlock.getTotalRawDocs()); Assert.assertEquals(resultBlock.getNumDocsScanned(), 582); Assert.assertEquals(resultBlock.getTotalRawDocs(), 10001); logReducedResults(resultBlock); } @Test public void testInterSegmentAggregationFunctionPlanMakerAndRun() throws Exception { final int numSegments = 20; setupSegmentList(numSegments); final PlanMaker instancePlanMaker = new InstancePlanMakerImplV2(); final BrokerRequest brokerRequest = getAggregationNoFilterBrokerRequest(); final ExecutorService executorService = Executors .newCachedThreadPool(new NamedThreadFactory("test-plan-maker")); final Plan globalPlan = instancePlanMaker.makeInterSegmentPlan(_indexSegmentList, brokerRequest, executorService, 150000); globalPlan.print(); globalPlan.execute(); final DataTable instanceResponse = globalPlan.getInstanceResponse(); LOGGER.debug("Instance Response : {}", instanceResponse); final BrokerReduceService reduceService = new BrokerReduceService(); final Map<ServerInstance, DataTable> instanceResponseMap = new HashMap<ServerInstance, DataTable>(); instanceResponseMap.put(new ServerInstance("localhost:0000"), instanceResponse); final BrokerResponseNative brokerResponse = reduceService.reduceOnDataTable(brokerRequest, instanceResponseMap); assertBrokerResponse(numSegments, brokerResponse); } private void assertBrokerResponse(int numSegments, BrokerResponseNative brokerResponse) throws JSONException { Assert.assertEquals(10001 * numSegments, brokerResponse.getNumDocsScanned()); Assert.assertEquals(_numAggregations, brokerResponse.getAggregationResults().size()); // Assertion on Count AggregationResult aggregationResult = brokerResponse.getAggregationResults().get(0); Assert.assertEquals(aggregationResult.getFunction(), "count_star"); Assert.assertEquals(Integer.parseInt(aggregationResult.getValue().toString()), 10001 * numSegments); aggregationResult = brokerResponse.getAggregationResults().get(1); Assert.assertEquals(aggregationResult.getFunction(), "sum_met_impressionCount"); Assert.assertEquals(DoubleComparisonUtil.defaultDoubleCompare(1343930646015719300000000.00000, Double.parseDouble(aggregationResult.getValue().toString())), 0); aggregationResult = brokerResponse.getAggregationResults().get(2); Assert.assertEquals(aggregationResult.getFunction(), "max_met_impressionCount"); Assert.assertEquals(DoubleComparisonUtil.defaultDoubleCompare(8637957270245934100.0, Double.parseDouble(aggregationResult.getValue().toString())), 0); aggregationResult = brokerResponse.getAggregationResults().get(3); Assert.assertEquals(aggregationResult.getFunction(), "min_met_impressionCount"); Assert.assertEquals(DoubleComparisonUtil.defaultDoubleCompare(614819680033322500.0, Double.parseDouble(aggregationResult.getValue().toString())), 0); aggregationResult = brokerResponse.getAggregationResults().get(4); Assert.assertEquals(aggregationResult.getFunction(), "avg_met_impressionCount"); Assert.assertEquals(DoubleComparisonUtil.defaultDoubleCompare(6718981331945402400.0, Double.parseDouble(aggregationResult.getValue().toString())), 0); aggregationResult = brokerResponse.getAggregationResults().get(5); Assert.assertEquals(aggregationResult.getFunction(), "distinctCount_column12"); Assert.assertEquals(Integer.parseInt(aggregationResult.getValue().toString()), 146); aggregationResult = brokerResponse.getAggregationResults().get(6); Assert.assertEquals(aggregationResult.getFunction(), "distinctCount_met_impressionCount"); Assert.assertEquals(Integer.parseInt(aggregationResult.getValue().toString()), 21); } private static BrokerRequest getAggregationNoFilterBrokerRequest() { final BrokerRequest brokerRequest = new BrokerRequest(); final List<AggregationInfo> aggregationsInfo = new ArrayList<AggregationInfo>(); aggregationsInfo.add(getCountAggregationInfo()); aggregationsInfo.add(getSumAggregationInfo()); aggregationsInfo.add(getMaxAggregationInfo()); aggregationsInfo.add(getMinAggregationInfo()); aggregationsInfo.add(getAvgAggregationInfo()); aggregationsInfo.add(getDistinctCountAggregationInfo("column12")); aggregationsInfo.add(getDistinctCountAggregationInfo("met_impressionCount")); brokerRequest.setAggregationsInfo(aggregationsInfo); return brokerRequest; } private static List<AggregationInfo> getAggregationsInfo() { final List<AggregationInfo> aggregationsInfo = new ArrayList<AggregationInfo>(); aggregationsInfo.add(getCountAggregationInfo()); aggregationsInfo.add(getSumAggregationInfo()); aggregationsInfo.add(getMaxAggregationInfo()); aggregationsInfo.add(getMinAggregationInfo()); aggregationsInfo.add(getAvgAggregationInfo()); aggregationsInfo.add(getDistinctCountAggregationInfo("column12")); aggregationsInfo.add(getDistinctCountAggregationInfo("met_impressionCount")); return aggregationsInfo; } private static Map<String, DataSource> getDataSourceMap() { final Map<String, DataSource> dataSourceMap = new HashMap<String, DataSource>(); dataSourceMap.put("column11", _indexSegment.getDataSource("column11")); dataSourceMap.put("column12", _indexSegment.getDataSource("column12")); dataSourceMap.put("met_impressionCount", _indexSegment.getDataSource("met_impressionCount")); return dataSourceMap; } private static AggregationInfo getCountAggregationInfo() { final String type = "count"; final Map<String, String> params = new HashMap<String, String>(); params.put("column", "*"); final AggregationInfo aggregationInfo = new AggregationInfo(); aggregationInfo.setAggregationType(type); aggregationInfo.setAggregationParams(params); return aggregationInfo; } private static AggregationInfo getSumAggregationInfo() { final String type = "sum"; return getAggregationInfo(type); } private static AggregationInfo getAggregationInfo(String type) { final Map<String, String> params = new HashMap<String, String>(); params.put("column", "met_impressionCount"); final AggregationInfo aggregationInfo = new AggregationInfo(); aggregationInfo.setAggregationType(type); aggregationInfo.setAggregationParams(params); return aggregationInfo; } private static AggregationInfo getMaxAggregationInfo() { final String type = "max"; return getAggregationInfo(type); } private static AggregationInfo getMinAggregationInfo() { final String type = "min"; return getAggregationInfo(type); } private static AggregationInfo getAvgAggregationInfo() { final String type = "avg"; return getAggregationInfo(type); } private static AggregationInfo getDistinctCountAggregationInfo(String dim) { final String type = "distinctCount"; final Map<String, String> params = new HashMap<String, String>(); params.put("column", dim); final AggregationInfo aggregationInfo = new AggregationInfo(); aggregationInfo.setAggregationType(type); aggregationInfo.setAggregationParams(params); return aggregationInfo; } private static BrokerRequest getAggregationWithFilterBrokerRequest() { final BrokerRequest brokerRequest = new BrokerRequest(); final List<AggregationInfo> aggregationsInfo = getAggregationsInfo(); brokerRequest.setAggregationsInfo(aggregationsInfo); setFilterQuery(brokerRequest); return brokerRequest; } private static BrokerRequest setFilterQuery(BrokerRequest brokerRequest) { FilterQueryTree filterQueryTree; final String filterColumn = "column11"; final String filterVal = "U"; if (filterColumn.contains(",")) { final String[] filterColumns = filterColumn.split(","); final String[] filterValues = filterVal.split(","); final List<FilterQueryTree> nested = new ArrayList<FilterQueryTree>(); for (int i = 0; i < filterColumns.length; i++) { final List<String> vals = new ArrayList<String>(); vals.add(filterValues[i]); final FilterQueryTree d = new FilterQueryTree(i + 1, filterColumns[i], vals, FilterOperator.EQUALITY, null); nested.add(d); } filterQueryTree = new FilterQueryTree(0, null, null, FilterOperator.AND, nested); } else { final List<String> vals = new ArrayList<String>(); vals.add(filterVal); filterQueryTree = new FilterQueryTree(0, filterColumn, vals, FilterOperator.EQUALITY, null); } RequestUtils.generateFilterFromTree(filterQueryTree, brokerRequest); return brokerRequest; } }