Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.index; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.MetricFieldSpec; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.segment.StarTreeMetadata; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver; import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory; import com.linkedin.pinot.core.segment.index.loader.Loaders; import com.linkedin.pinot.core.startree.hll.HllConfig; import com.linkedin.pinot.core.startree.hll.SegmentWithHllIndexCreateHelper; import com.linkedin.pinot.segments.v1.creator.SegmentTestUtils; import com.linkedin.pinot.util.TestUtils; import java.io.File; import java.util.Arrays; import java.util.HashSet; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class ColumnMetadataTest { private static final String AVRO_DATA = "data/test_data-mv.avro"; private static final File INDEX_DIR = new File(ColumnMetadataTest.class.toString()); private static final String CREATOR_VERSION = "TestHadoopJar.1.1.1"; @BeforeMethod public void setUp() throws Exception { FileUtils.deleteQuietly(INDEX_DIR); } @AfterMethod public void tearDown() { FileUtils.deleteQuietly(INDEX_DIR); } public SegmentGeneratorConfig CreateSegmentConfigWithoutCreator() throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(ColumnMetadataTest.class.getClassLoader().getResource(AVRO_DATA)); // Intentionally changed this to TimeUnit.Hours to make it non-default for testing. SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS, "testTable"); config.setSegmentNamePostfix("1"); config.setTimeColumnName("daysSinceEpoch"); return config; } public SegmentGeneratorConfig createSegmentConfigWithCreator() throws Exception { SegmentGeneratorConfig config = CreateSegmentConfigWithoutCreator(); config.setCreatorVersion(CREATOR_VERSION); return config; } public void verifySegmentAfterLoading(SegmentMetadataImpl metadata) { // Multi-value numeric dimension column. ColumnMetadata col7Meta = metadata.getColumnMetadataFor("column7"); Assert.assertEquals(col7Meta.getColumnName(), "column7"); Assert.assertEquals(col7Meta.getCardinality(), 359); Assert.assertEquals(col7Meta.getTotalDocs(), 100000); Assert.assertEquals(col7Meta.getTotalRawDocs(), 100000); Assert.assertEquals(col7Meta.getTotalAggDocs(), 0); Assert.assertEquals(col7Meta.getDataType(), FieldSpec.DataType.INT); Assert.assertEquals(col7Meta.getBitsPerElement(), 9); Assert.assertEquals(col7Meta.getStringColumnMaxLength(), 0); Assert.assertEquals(col7Meta.getFieldType(), FieldSpec.FieldType.DIMENSION); Assert.assertFalse(col7Meta.isSorted()); Assert.assertFalse(col7Meta.hasNulls()); Assert.assertTrue(col7Meta.hasDictionary()); Assert.assertTrue(col7Meta.hasInvertedIndex()); Assert.assertFalse(col7Meta.isSingleValue()); Assert.assertEquals(col7Meta.getMaxNumberOfMultiValues(), 24); Assert.assertEquals(col7Meta.getTotalNumberOfEntries(), 134090); Assert.assertFalse(col7Meta.isAutoGenerated()); Assert.assertEquals(col7Meta.getDefaultNullValueString(), String.valueOf(Integer.MIN_VALUE)); // Single-value string dimension column. ColumnMetadata col3Meta = metadata.getColumnMetadataFor("column3"); Assert.assertEquals(col3Meta.getColumnName(), "column3"); Assert.assertEquals(col3Meta.getCardinality(), 5); Assert.assertEquals(col3Meta.getTotalDocs(), 100000); Assert.assertEquals(col3Meta.getTotalRawDocs(), 100000); Assert.assertEquals(col3Meta.getTotalAggDocs(), 0); Assert.assertEquals(col3Meta.getDataType(), FieldSpec.DataType.STRING); Assert.assertEquals(col3Meta.getBitsPerElement(), 3); Assert.assertEquals(col3Meta.getStringColumnMaxLength(), 4); Assert.assertEquals(col3Meta.getFieldType(), FieldSpec.FieldType.DIMENSION); Assert.assertFalse(col3Meta.isSorted()); Assert.assertFalse(col3Meta.hasNulls()); Assert.assertTrue(col3Meta.hasDictionary()); Assert.assertTrue(col3Meta.hasInvertedIndex()); Assert.assertTrue(col3Meta.isSingleValue()); Assert.assertEquals(col3Meta.getMaxNumberOfMultiValues(), 0); Assert.assertEquals(col3Meta.getTotalNumberOfEntries(), 100000); Assert.assertFalse(col3Meta.isAutoGenerated()); Assert.assertEquals(col3Meta.getDefaultNullValueString(), "null"); // Time column. ColumnMetadata timeColumn = metadata.getColumnMetadataFor("daysSinceEpoch"); Assert.assertEquals(timeColumn.getColumnName(), "daysSinceEpoch"); Assert.assertEquals(timeColumn.getCardinality(), 1); Assert.assertEquals(timeColumn.getTotalDocs(), 100000); Assert.assertEquals(timeColumn.getTotalRawDocs(), 100000); Assert.assertEquals(timeColumn.getTotalAggDocs(), 0); Assert.assertEquals(timeColumn.getDataType(), FieldSpec.DataType.INT); Assert.assertEquals(timeColumn.getBitsPerElement(), 1); Assert.assertEquals(timeColumn.getStringColumnMaxLength(), 0); Assert.assertEquals(timeColumn.getFieldType(), FieldSpec.FieldType.DIMENSION); Assert.assertTrue(timeColumn.isSorted()); Assert.assertFalse(timeColumn.hasNulls()); Assert.assertTrue(timeColumn.hasDictionary()); Assert.assertTrue(timeColumn.hasInvertedIndex()); Assert.assertTrue(timeColumn.isSingleValue()); Assert.assertEquals(timeColumn.getMaxNumberOfMultiValues(), 0); Assert.assertEquals(timeColumn.getTotalNumberOfEntries(), 100000); Assert.assertFalse(timeColumn.isAutoGenerated()); Assert.assertEquals(timeColumn.getDefaultNullValueString(), String.valueOf(Integer.MIN_VALUE)); } @Test public void testAllFieldsInitialized() throws Exception { // Build the Segment metadata. SegmentGeneratorConfig config = createSegmentConfigWithCreator(); SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); // Load segment metadata. IndexSegment segment = Loaders.IndexSegment.load(INDEX_DIR.listFiles()[0], ReadMode.mmap); SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata(); verifySegmentAfterLoading(metadata); // Make sure we got the creator name as well. String creatorName = metadata.getCreatorName(); Assert.assertEquals(creatorName, CREATOR_VERSION); } @Test public void testAllFieldsExceptCreatorName() throws Exception { // Build the Segment metadata. SegmentGeneratorConfig config = CreateSegmentConfigWithoutCreator(); SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); // Load segment metadata. IndexSegment segment = Loaders.IndexSegment.load(INDEX_DIR.listFiles()[0], ReadMode.mmap); SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata(); verifySegmentAfterLoading(metadata); // Make sure we get null for creator name. String creatorName = metadata.getCreatorName(); Assert.assertEquals(creatorName, null); } @Test public void testPaddingCharacter() throws Exception { // Build the Segment metadata. SegmentGeneratorConfig config = CreateSegmentConfigWithoutCreator(); config.setPaddingCharacter('\0'); SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); // Load segment metadata. IndexSegment segment = Loaders.IndexSegment.load(INDEX_DIR.listFiles()[0], ReadMode.mmap); SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata(); verifySegmentAfterLoading(metadata); // Make sure we get null for creator name. char paddingCharacter = metadata.getPaddingCharacter(); Assert.assertEquals(paddingCharacter, '\0'); } @Test public void testHllIndexRelatedMetadata() throws Exception { SegmentWithHllIndexCreateHelper helper = null; try { // Build the Segment metadata. helper = new SegmentWithHllIndexCreateHelper("testHllIndexRelatedMetadata", "data/test_data-sv.avro", "daysSinceEpoch", TimeUnit.DAYS); helper.build(true, new HllConfig(9, new HashSet<String>(Arrays.asList("column7")), "_hllSuffix")); // Load segment metadata. IndexSegment segment = Loaders.IndexSegment.load(helper.getSegmentDirectory(), ReadMode.mmap); SegmentMetadataImpl metadata = (SegmentMetadataImpl) segment.getSegmentMetadata(); Assert.assertEquals(metadata.getHllLog2m(), 9); // Verify Hll Related Info StarTreeMetadata starTreeMetadata = metadata.getStarTreeMetadata(); Assert.assertNotNull(starTreeMetadata); ColumnMetadata column = metadata.getColumnMetadataFor("column7_hllSuffix"); Assert.assertEquals(column.getDerivedMetricType(), MetricFieldSpec.DerivedMetricType.HLL); Assert.assertEquals(column.getOriginColumnName(), "column7"); } finally { if (helper != null) { helper.cleanTempDir(); } } } }