com.linkedin.pinot.perf.ForwardIndexWriterBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.perf.ForwardIndexWriterBenchmark.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.perf;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileReader;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.roaringbitmap.buffer.MutableRoaringBitmap;

import com.linkedin.pinot.core.io.writer.impl.v1.FixedBitMultiValueWriter;

public class ForwardIndexWriterBenchmark {
    public static void convertRawToForwardIndex(File rawFile) throws Exception {
        List<String> lines = IOUtils.readLines(new FileReader(rawFile));
        int totalDocs = lines.size();
        int max = Integer.MIN_VALUE;
        int maxNumberOfMultiValues = Integer.MIN_VALUE;
        int totalNumValues = 0;
        int data[][] = new int[totalDocs][];
        for (int i = 0; i < lines.size(); i++) {
            String line = lines.get(i);
            String[] split = line.split(",");
            totalNumValues = totalNumValues + split.length;
            if (split.length > maxNumberOfMultiValues) {
                maxNumberOfMultiValues = split.length;
            }
            data[i] = new int[split.length];
            for (int j = 0; j < split.length; j++) {
                String token = split[j];
                int val = Integer.parseInt(token);
                data[i][j] = val;
                if (val > max) {
                    max = val;
                }
            }
        }
        int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2));
        int size = 2048;
        int[] offsets = new int[size];
        int bitMapSize = 0;
        File outputFile = new File("output.mv.fwd");

        FixedBitMultiValueWriter fixedBitSkipListSCMVWriter = new FixedBitMultiValueWriter(outputFile, totalDocs,
                totalNumValues, maxBitsNeeded);

        for (int i = 0; i < totalDocs; i++) {
            fixedBitSkipListSCMVWriter.setIntArray(i, data[i]);
            if (i % size == size - 1) {
                MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets);
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                DataOutputStream dos = new DataOutputStream(bos);
                rr1.serialize(dos);
                dos.close();
                // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
                bitMapSize += bos.size();
            } else if (i == totalDocs - 1) {
                MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size));
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                DataOutputStream dos = new DataOutputStream(bos);
                rr1.serialize(dos);
                dos.close();
                // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size());
                bitMapSize += bos.size();
            }
        }
        fixedBitSkipListSCMVWriter.close();
        System.out.println("Output file size:" + outputFile.length());
        System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs);
        System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues);
        System.out.println("chunk size\t\t\t\t:" + size);
        System.out.println("Num chunks\t\t\t\t:" + totalDocs / size);
        int numChunks = totalDocs / size + 1;
        int totalBits = (totalNumValues * maxBitsNeeded);
        int dataSizeinBytes = (totalBits + 7) / 8;

        System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes);
        System.out.println("\nPer encoding size");
        System.out.println();
        System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes));
        System.out.println();
        System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes));
        System.out.println();
        System.out.println("bitMapSize\t\t\t\t:" + bitMapSize);
        System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes));

        System.out.println();
        System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8);
        System.out.println("size (with custom bitset)\t\t\t:"
                + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes));
    }

    public static void main(String[] args) throws Exception {
        convertRawToForwardIndex(new File("/tmp/output.mv.raw"));
    }
}