Java tutorial
/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.segments.v1.creator; import com.linkedin.pinot.util.TestUtils; import java.io.File; import java.io.FileInputStream; import java.util.concurrent.TimeUnit; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; import org.apache.avro.util.Utf8; import org.apache.commons.io.FileUtils; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.core.indexsegment.columnar.ColumnarSegmentLoader; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver; import com.linkedin.pinot.core.segment.creator.impl.SegmentCreationDriverFactory; import com.linkedin.pinot.core.segment.index.IndexSegmentImpl; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; public class BitmapInvertedIndexTest { private final String AVRO_DATA = "data/test_sample_data.avro"; private static File INDEX_DIR = new File(BitmapInvertedIndexTest.class.toString()); @Test public void test1() throws Exception { // load segment in heap mode final IndexSegmentImpl heapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR, ReadMode.heap); // compare the loaded inverted index with the record in avro file final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>()); int docId = 0; while (reader.hasNext()) { final GenericRecord rec = reader.next(); for (final String column : ((SegmentMetadataImpl) heapSegment.getSegmentMetadata()) .getColumnMetadataMap().keySet()) { Object entry = rec.get(column); if (entry instanceof Utf8) { entry = ((Utf8) entry).toString(); } final int dicId = heapSegment.getDictionaryFor(column).indexOf(entry); // make sure that docId for dicId exist in the inverted index Assert.assertTrue(heapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId)); final int size = heapSegment.getDictionaryFor(column).length(); for (int i = 0; i < size; ++i) { // remove this for-loop for quick test if (i == dicId) { continue; } // make sure that docId for dicId does not exist in the inverted index Assert.assertFalse(heapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId)); } } ++docId; } } @Test public void test2() throws Exception { // load segment in mmap mode final IndexSegmentImpl mmapSegment = (IndexSegmentImpl) ColumnarSegmentLoader.load(INDEX_DIR, ReadMode.mmap); // compare the loaded inverted index with the record in avro file final DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>( new FileInputStream(new File(getClass().getClassLoader().getResource(AVRO_DATA).getFile())), new GenericDatumReader<GenericRecord>()); int docId = 0; while (reader.hasNext()) { final GenericRecord rec = reader.next(); for (final String column : ((SegmentMetadataImpl) mmapSegment.getSegmentMetadata()) .getColumnMetadataMap().keySet()) { Object entry = rec.get(column); if (entry instanceof Utf8) { entry = ((Utf8) entry).toString(); } final int dicId = mmapSegment.getDictionaryFor(column).indexOf(entry); // make sure that docId for dicId exist in the inverted index Assert.assertTrue(mmapSegment.getInvertedIndexFor(column).getImmutable(dicId).contains(docId)); final int size = mmapSegment.getDictionaryFor(column).length(); for (int i = 0; i < size; ++i) { // remove this for-loop for quick test if (i == dicId) { continue; } // make sure that docId for dicId does not exist in the inverted index Assert.assertFalse(mmapSegment.getInvertedIndexFor(column).getImmutable(i).contains(docId)); } } ++docId; } } @AfterClass public void teardown() { FileUtils.deleteQuietly(INDEX_DIR); } @BeforeClass public void setup() throws Exception { final String filePath = TestUtils .getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA)); if (INDEX_DIR.exists()) { FileUtils.deleteQuietly(INDEX_DIR); } final SegmentGeneratorConfig config = SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns( new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test"); final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null); driver.init(config); driver.build(); System.out.println("built at : " + INDEX_DIR.getAbsolutePath()); } }