com.linkedin.pinot.core.data.readers.PinotSegmentRecordReaderTest.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.data.readers.PinotSegmentRecordReaderTest.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.data.readers;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import com.google.common.io.Files;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.MetricFieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.data.TimeFieldSpec;
import com.linkedin.pinot.common.data.TimeGranularitySpec;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;

/**
 * Tests the PinotSegmentRecordReader to check that the records being generated
 * are the same as the records used to create the segment
 */
public class PinotSegmentRecordReaderTest {

    private String segmentName;
    private Schema schema;
    private String segmentOutputDir;
    private File segmentIndexDir;
    private List<GenericRow> rows;
    private TestRecordReader recordReader;

    private static String D_SV_1 = "d_sv_1";
    private static String D_MV_1 = "d_mv_1";
    private static String M1 = "m1";
    private static String M2 = "m2";
    private static String TIME = "t";

    @BeforeClass
    public void setup() throws Exception {
        segmentName = "pinotSegmentRecordReaderTest";
        schema = createPinotSchema();
        segmentOutputDir = Files.createTempDir().toString();
        segmentIndexDir = new File(segmentOutputDir, segmentName);
        rows = createTestData();
        recordReader = new TestRecordReader(rows, schema);
        createSegment();
    }

    private List<GenericRow> createTestData() {
        List<GenericRow> rows = new ArrayList<>();
        Random random = new Random();

        Map<String, Object> fields;
        for (int i = 0; i < 10000; i++) {
            fields = new HashMap<>();
            fields.put(D_SV_1, D_SV_1 + "_" + RandomStringUtils.randomAlphabetic(2));
            Object[] d2Array = new Object[5];
            for (int j = 0; j < 5; j++) {
                d2Array[j] = D_MV_1 + "_" + j + "_" + RandomStringUtils.randomAlphabetic(2);
            }
            fields.put(D_MV_1, d2Array);
            fields.put(M1, Math.abs(random.nextInt()));
            fields.put(M2, Math.abs(random.nextFloat()));
            fields.put(TIME, Math.abs(random.nextLong()));

            GenericRow row = new GenericRow();
            row.init(fields);
            rows.add(row);
        }
        return rows;
    }

    private Schema createPinotSchema() {
        Schema testSchema = new Schema();
        testSchema.setSchemaName("schema");
        FieldSpec spec;
        spec = new DimensionFieldSpec(D_SV_1, DataType.STRING, true);
        testSchema.addField(spec);
        spec = new DimensionFieldSpec(D_MV_1, DataType.STRING, false);
        testSchema.addField(spec);
        spec = new MetricFieldSpec(M1, DataType.INT);
        testSchema.addField(spec);
        spec = new MetricFieldSpec(M2, DataType.FLOAT);
        testSchema.addField(spec);
        spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, TIME));
        testSchema.addField(spec);
        return testSchema;
    }

    private void createSegment() throws Exception {

        SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
        segmentGeneratorConfig.setTableName(segmentName);
        segmentGeneratorConfig.setOutDir(segmentOutputDir);
        segmentGeneratorConfig.setSegmentName(segmentName);

        SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
        driver.init(segmentGeneratorConfig, recordReader);
        driver.build();

        if (!segmentIndexDir.exists()) {
            throw new IllegalStateException("Segment generation failed");
        }
    }

    @Test
    public void testPinotSegmentRecordReader() throws Exception {
        List<GenericRow> outputRows = new ArrayList<>();

        PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir);
        pinotSegmentRecordReader.init();
        while (pinotSegmentRecordReader.hasNext()) {
            outputRows.add(pinotSegmentRecordReader.next());
        }
        pinotSegmentRecordReader.close();

        Assert.assertEquals(outputRows.size(), rows.size(),
                "Number of rows returned by PinotSegmentRecordReader is incorrect");
        for (int i = 0; i < outputRows.size(); i++) {
            GenericRow outputRow = outputRows.get(i);
            GenericRow row = rows.get(i);
            Assert.assertEquals(outputRow.getValue(D_SV_1), row.getValue(D_SV_1));
            Assert.assertEquals(outputRow.getValue(D_MV_1), row.getValue(D_MV_1));
            Assert.assertEquals(outputRow.getValue(M1), row.getValue(M1));
            Assert.assertEquals(outputRow.getValue(M2), row.getValue(M2));
            Assert.assertEquals(outputRow.getValue(TIME), row.getValue(TIME));
        }
    }

    @AfterClass
    public void cleanup() {
        FileUtils.deleteQuietly(new File(segmentOutputDir));
    }
}