Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.server.integration.realtime; import com.linkedin.pinot.common.metrics.ServerMetrics; import com.yammer.metrics.core.MetricsRegistry; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.TimerTask; import java.util.concurrent.TimeUnit; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.ConfigurationException; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.helix.ZNRecord; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.annotations.BeforeClass; import com.linkedin.pinot.common.config.AbstractTableConfig; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.FieldSpec.FieldType; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata; import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.utils.CommonConstants.Segment.Realtime.Status; import com.linkedin.pinot.common.utils.CommonConstants.Segment.SegmentType; import com.linkedin.pinot.core.common.Block; import com.linkedin.pinot.core.common.BlockMetadata; import com.linkedin.pinot.core.common.BlockMultiValIterator; import com.linkedin.pinot.core.common.BlockSingleValIterator; import com.linkedin.pinot.core.common.BlockValSet; import com.linkedin.pinot.core.common.Constants; import com.linkedin.pinot.core.data.manager.config.TableDataManagerConfig; import com.linkedin.pinot.core.data.manager.realtime.HLRealtimeSegmentDataManager; import com.linkedin.pinot.core.data.manager.realtime.TimerService; import com.linkedin.pinot.core.realtime.RealtimeFileBasedReaderTest; import com.linkedin.pinot.core.realtime.RealtimeSegment; import com.linkedin.pinot.core.realtime.impl.datasource.RealtimeColumnDataSource; import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils; public class RealtimeTableDataManagerTest { private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeTableDataManagerTest.class); private static AbstractTableConfig tableConfig; private static InstanceZKMetadata instanceZKMetadata; private static RealtimeSegmentZKMetadata realtimeSegmentZKMetadata; private static TableDataManagerConfig tableDataManagerConfig; private static final String AVRO_DATA = "data/test_data-mv.avro"; private static String filePath; private static Map<String, FieldType> fieldTypeMap; private static final String TABLE_DATA_MANAGER_NUM_QUERY_EXECUTOR_THREADS = "numQueryExecutorThreads"; private static final String TABLE_DATA_MANAGER_TYPE = "dataManagerType"; private static final String READ_MODE = "readMode"; private static final String TABLE_DATA_MANAGER_DATA_DIRECTORY = "directory"; private static final String TABLE_DATA_MANAGER_NAME = "name"; private static final long SEGMENT_CONSUMING_TIME = 1000 * 60 * 3; private static volatile boolean keepOnRunning = true; @BeforeClass public static void setup() throws Exception { instanceZKMetadata = getInstanceZKMetadata(); realtimeSegmentZKMetadata = getRealtimeSegmentZKMetadata(); tableDataManagerConfig = getTableDataManagerConfig(); JSONObject request = new JSONObject(); request.put("tableName", "mirror"); request.put("tableType", "REALTIME"); JSONObject indexing = new JSONObject(); indexing.put("loadMode", "HEAP"); JSONObject stream = new JSONObject(); stream.put("streamType", "kafka"); stream.put("stream.kafka.consumer.type", "highLevel"); stream.put("stream.kafka.topic.name", "MirrorDecoratedProfileViewEvent"); stream.put("stream.kafka.decoder.class.name", "com.linkedin.pinot.core.realtime.impl.kafka.KafkaAvroMessageDecoder"); stream.put("stream.kafka.hlc.zk.connect.string", "zk-eat1-kafka.corp.linkedin.com:12913/kafka-aggregate-tracking"); stream.put("stream.kafka.decoder.prop.schema.registry.rest.url", "http://eat1-ei2-schema-vip-z.stg.linkedin.com:10252/schemaRegistry/schemas"); indexing.put("streamConfigs", stream); request.put("tableIndexConfig", indexing); request.put("segmentsConfig", new JSONObject()); request.put("tenants", new JSONObject()); request.put("metadata", new JSONObject()); tableConfig = AbstractTableConfig.init(request.toString()); } private static TableDataManagerConfig getTableDataManagerConfig() throws ConfigurationException { String tableName = "testTable_R"; Configuration defaultConfig = new PropertiesConfiguration(); defaultConfig.addProperty(TABLE_DATA_MANAGER_NAME, tableName); String dataDir = "/tmp/" + tableName; defaultConfig.addProperty(TABLE_DATA_MANAGER_DATA_DIRECTORY, dataDir); defaultConfig.addProperty(READ_MODE, ReadMode.heap.toString()); defaultConfig.addProperty(TABLE_DATA_MANAGER_NUM_QUERY_EXECUTOR_THREADS, 20); TableDataManagerConfig tableDataManagerConfig = new TableDataManagerConfig(defaultConfig); defaultConfig.addProperty(TABLE_DATA_MANAGER_TYPE, "realtime"); return tableDataManagerConfig; } public void testSetup() throws Exception { final HLRealtimeSegmentDataManager manager = new HLRealtimeSegmentDataManager(realtimeSegmentZKMetadata, tableConfig, instanceZKMetadata, null, tableDataManagerConfig.getDataDir(), ReadMode.valueOf(tableDataManagerConfig.getReadMode()), getTestSchema(), new ServerMetrics(new MetricsRegistry())); final long start = System.currentTimeMillis(); TimerService.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { if (System.currentTimeMillis() - start >= (SEGMENT_CONSUMING_TIME)) { keepOnRunning = false; } } }, 1000, 1000 * 60 * 1); TimerService.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { long start = System.currentTimeMillis(); long sum = 0; try { RealtimeSegment segment = (RealtimeSegment) manager.getSegment(); RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment.getDataSource("count"); BlockValSet valSet = mDs.nextBlock().getBlockValueSet(); BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator(); int val = valIt.nextIntVal(); while (val != Constants.EOF) { val = valIt.nextIntVal(); sum += val; } } catch (Exception e) { LOGGER.info("count column exception"); e.printStackTrace(); } long stop = System.currentTimeMillis(); LOGGER.info("time to scan metric col count : " + (stop - start) + " sum : " + sum); } }, 20000, 1000 * 5); TimerService.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { long start = System.currentTimeMillis(); long sum = 0; try { RealtimeSegment segment = (RealtimeSegment) manager.getSegment(); RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment.getDataSource("viewerId"); BlockValSet valSet = mDs.nextBlock().getBlockValueSet(); BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator(); int val = valIt.nextIntVal(); while (val != Constants.EOF) { val = valIt.nextIntVal(); sum += val; } } catch (Exception e) { LOGGER.info("viewerId column exception"); e.printStackTrace(); } long stop = System.currentTimeMillis(); LOGGER.info("time to scan SV dimension col viewerId : " + (stop - start) + " sum : " + sum); } }, 20000, 1000 * 5); TimerService.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { long start = System.currentTimeMillis(); long sum = 0; try { RealtimeSegment segment = (RealtimeSegment) manager.getSegment(); RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment .getDataSource("daysSinceEpoch"); BlockValSet valSet = mDs.nextBlock().getBlockValueSet(); BlockSingleValIterator valIt = (BlockSingleValIterator) valSet.iterator(); int val = valIt.nextIntVal(); while (val != Constants.EOF) { val = valIt.nextIntVal(); sum += val; } } catch (Exception e) { LOGGER.info("daysSinceEpoch column exception"); e.printStackTrace(); } long stop = System.currentTimeMillis(); LOGGER.info("time to scan SV time col daysSinceEpoch : " + (stop - start) + " sum : " + sum); } }, 20000, 1000 * 5); TimerService.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { long start = System.currentTimeMillis(); long sum = 0; float sumOfLengths = 0F; float counter = 0F; try { RealtimeSegment segment = (RealtimeSegment) manager.getSegment(); RealtimeColumnDataSource mDs = (RealtimeColumnDataSource) segment .getDataSource("viewerCompanies"); Block b = mDs.nextBlock(); BlockValSet valSet = b.getBlockValueSet(); BlockMultiValIterator valIt = (BlockMultiValIterator) valSet.iterator(); BlockMetadata m = b.getMetadata(); int maxVams = m.getMaxNumberOfMultiValues(); while (valIt.hasNext()) { int[] vals = new int[maxVams]; int len = valIt.nextIntVal(vals); for (int i = 0; i < len; i++) { sum += vals[i]; } sumOfLengths += len; counter++; } } catch (Exception e) { LOGGER.info("daysSinceEpoch column exception"); e.printStackTrace(); } long stop = System.currentTimeMillis(); LOGGER.info("time to scan MV col viewerCompanies : " + (stop - start) + " sum : " + sum + " average len : " + (sumOfLengths / counter)); } }, 20000, 1000 * 5); while (keepOnRunning) { // Wait for keepOnRunning to be set to false } } private static InstanceZKMetadata getInstanceZKMetadata() { ZNRecord record = new ZNRecord("Server_lva1-app0120.corp.linkedin.com_8001"); Map<String, String> groupIdMap = new HashMap<String, String>(); Map<String, String> partitionMap = new HashMap<String, String>(); groupIdMap.put("mirror", "groupId_testTable_" + String.valueOf(System.currentTimeMillis())); partitionMap.put("testTable_R", "0"); record.setMapField("KAFKA_HLC_GROUP_MAP", groupIdMap); record.setMapField("KAFKA_HLC_PARTITION_MAP", partitionMap); return new InstanceZKMetadata(record); } private static RealtimeSegmentZKMetadata getRealtimeSegmentZKMetadata() { RealtimeSegmentZKMetadata realtimeSegmentMetadata = new RealtimeSegmentZKMetadata(); realtimeSegmentMetadata.setSegmentName("testTable_R_1000_groupId0_part0"); realtimeSegmentMetadata.setTableName("testTable"); realtimeSegmentMetadata.setSegmentType(SegmentType.REALTIME); realtimeSegmentMetadata.setIndexVersion("v1"); realtimeSegmentMetadata.setStartTime(1000); realtimeSegmentMetadata.setEndTime(-1); realtimeSegmentMetadata.setTimeUnit(TimeUnit.HOURS); realtimeSegmentMetadata.setStatus(Status.IN_PROGRESS); realtimeSegmentMetadata.setTotalRawDocs(-1); realtimeSegmentMetadata.setCrc(-1); realtimeSegmentMetadata.setCreationTime(1000); return realtimeSegmentMetadata; } private static Schema getTestSchema() throws FileNotFoundException, IOException { filePath = RealtimeFileBasedReaderTest.class.getClassLoader().getResource(AVRO_DATA).getFile(); fieldTypeMap = new HashMap<String, FieldSpec.FieldType>(); fieldTypeMap.put("viewerId", FieldType.DIMENSION); fieldTypeMap.put("vieweeId", FieldType.DIMENSION); fieldTypeMap.put("viewerPrivacySetting", FieldType.DIMENSION); fieldTypeMap.put("vieweePrivacySetting", FieldType.DIMENSION); fieldTypeMap.put("viewerObfuscationType", FieldType.DIMENSION); fieldTypeMap.put("viewerCompanies", FieldType.DIMENSION); fieldTypeMap.put("viewerOccupations", FieldType.DIMENSION); fieldTypeMap.put("viewerRegionCode", FieldType.DIMENSION); fieldTypeMap.put("viewerIndustry", FieldType.DIMENSION); fieldTypeMap.put("viewerSchool", FieldType.DIMENSION); fieldTypeMap.put("weeksSinceEpochSunday", FieldType.DIMENSION); fieldTypeMap.put("daysSinceEpoch", FieldType.DIMENSION); fieldTypeMap.put("minutesSinceEpoch", FieldType.TIME); fieldTypeMap.put("count", FieldType.METRIC); return SegmentTestUtils.extractSchemaFromAvro(new File(filePath), fieldTypeMap, TimeUnit.MINUTES); } }