com.linkedin.pinot.core.startree.StarTreeDataTable.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.pinot.core.startree.StarTreeDataTable.java

Source

/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.startree;

import com.linkedin.pinot.common.utils.Pairs.IntPair;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.tuple.Pair;
import xerial.larray.mmap.MMapBuffer;
import xerial.larray.mmap.MMapMode;

/**
 * The StarTreeDataTable should be able to handle the memory range greater than 2GB.
 * As a result, all fields related to memory position should be declared as long to avoid int overflow.
 */
public class StarTreeDataTable {

    private File file;
    private int dimensionSizeInBytes;
    private int metricSizeInBytes;
    private int totalSizeInBytes;
    final int[] sortOrder;

    public StarTreeDataTable(File file, int dimensionSizeInBytes, int metricSizeInBytes, int[] sortOrder) {
        this.file = file;
        this.dimensionSizeInBytes = dimensionSizeInBytes;
        this.metricSizeInBytes = metricSizeInBytes;
        this.sortOrder = sortOrder;
        this.totalSizeInBytes = dimensionSizeInBytes + metricSizeInBytes;
    }

    /**
     *
     * @param startRecordId inclusive
     * @param endRecordId exclusive
     */
    public void sort(int startRecordId, int endRecordId) {
        sort(startRecordId, endRecordId, 0, dimensionSizeInBytes);
    }

    public void sort(int startRecordId, int endRecordId, final int startOffsetInRecord,
            final int endOffsetInRecord) {
        final MMapBuffer mappedByteBuffer;
        try {
            int length = endRecordId - startRecordId;
            final long startOffset = startRecordId * (long) totalSizeInBytes;
            mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes,
                    MMapMode.READ_WRITE);

            List<Integer> idList = new ArrayList<Integer>();
            for (int i = startRecordId; i < endRecordId; i++) {
                idList.add(i - startRecordId);
            }
            Comparator<Integer> comparator = new Comparator<Integer>() {
                byte[] buf1 = new byte[dimensionSizeInBytes];
                byte[] buf2 = new byte[dimensionSizeInBytes];

                @Override
                public int compare(Integer o1, Integer o2) {
                    long pos1 = (o1) * (long) totalSizeInBytes;
                    long pos2 = (o2) * (long) totalSizeInBytes;

                    //System.out.println("pos1="+ pos1 +" , pos2="+ pos2);
                    mappedByteBuffer.toDirectByteBuffer(pos1, dimensionSizeInBytes).get(buf1);
                    mappedByteBuffer.toDirectByteBuffer(pos2, dimensionSizeInBytes).get(buf2);
                    IntBuffer bb1 = ByteBuffer.wrap(buf1).asIntBuffer();
                    IntBuffer bb2 = ByteBuffer.wrap(buf2).asIntBuffer();
                    for (int dimIndex : sortOrder) {
                        int v1 = bb1.get(dimIndex);
                        int v2 = bb2.get(dimIndex);
                        if (v1 != v2) {
                            return v1 - v2;
                        }
                    }
                    return 0;
                }
            };
            Collections.sort(idList, comparator);
            //System.out.println("AFter sorting:" + idList);
            int[] currentPositions = new int[length];
            int[] indexToRecordIdMapping = new int[length];
            byte[] buf1 = new byte[totalSizeInBytes];
            byte[] buf2 = new byte[totalSizeInBytes];

            for (int i = 0; i < length; i++) {
                currentPositions[i] = i;
                indexToRecordIdMapping[i] = i;
            }
            for (int i = 0; i < length; i++) {
                int thisRecordId = indexToRecordIdMapping[i];
                int thisRecordIdPos = currentPositions[thisRecordId];

                int thatRecordId = idList.get(i);
                int thatRecordIdPos = currentPositions[thatRecordId];

                //swap the buffers
                mappedByteBuffer.toDirectByteBuffer(thisRecordIdPos * (long) totalSizeInBytes, totalSizeInBytes)
                        .get(buf1);
                mappedByteBuffer.toDirectByteBuffer(thatRecordIdPos * (long) totalSizeInBytes, totalSizeInBytes)
                        .get(buf2);
                //        mappedByteBuffer.position(thisRecordIdPos * totalSizeInBytes);
                //        mappedByteBuffer.get(buf1);
                //        mappedByteBuffer.position(thatRecordIdPos * totalSizeInBytes);
                //        mappedByteBuffer.get(buf2);
                mappedByteBuffer.readFrom(buf2, 0, thisRecordIdPos * (long) totalSizeInBytes, totalSizeInBytes);
                mappedByteBuffer.readFrom(buf1, 0, thatRecordIdPos * (long) totalSizeInBytes, totalSizeInBytes);
                //        mappedByteBuffer.position(thisRecordIdPos * totalSizeInBytes);
                //        mappedByteBuffer.put(buf2);
                //        mappedByteBuffer.position(thatRecordIdPos * totalSizeInBytes);
                //        mappedByteBuffer.put(buf1);
                //update the positions
                indexToRecordIdMapping[i] = thatRecordId;
                indexToRecordIdMapping[thatRecordIdPos] = thisRecordId;

                currentPositions[thatRecordId] = i;
                currentPositions[thisRecordId] = thatRecordIdPos;
            }
            if (mappedByteBuffer != null) {
                mappedByteBuffer.flush();
                mappedByteBuffer.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            //      IOUtils.closeQuietly(randomAccessFile);
        }
    }

    /**
     *
     * @param startDocId inclusive
     * @param endDocId exclusive
     * @param colIndex
     * @return start,end for each value. inclusive start, exclusive end
     */
    public Map<Integer, IntPair> groupByIntColumnCount(int startDocId, int endDocId, Integer colIndex) {
        MMapBuffer mappedByteBuffer = null;
        try {
            int length = endDocId - startDocId;
            Map<Integer, IntPair> rangeMap = new LinkedHashMap<>();
            final long startOffset = startDocId * (long) totalSizeInBytes;
            mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes,
                    MMapMode.READ_WRITE);
            int prevValue = -1;
            int prevStart = 0;
            byte[] dimBuff = new byte[dimensionSizeInBytes];
            for (int i = 0; i < length; i++) {
                mappedByteBuffer.toDirectByteBuffer(i * (long) totalSizeInBytes, dimensionSizeInBytes).get(dimBuff);
                int value = ByteBuffer.wrap(dimBuff).asIntBuffer().get(colIndex);
                if (prevValue != -1 && prevValue != value) {
                    rangeMap.put(prevValue, new IntPair(startDocId + prevStart, startDocId + i));
                    prevStart = i;
                }
                prevValue = value;
            }
            rangeMap.put(prevValue, new IntPair(startDocId + prevStart, endDocId));
            return rangeMap;
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (mappedByteBuffer != null) {
                try {
                    mappedByteBuffer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return Collections.emptyMap();
    }

    public Iterator<Pair<byte[], byte[]>> iterator(int startDocId, int endDocId) throws IOException {
        try {
            final int length = endDocId - startDocId;
            final long startOffset = startDocId * (long) totalSizeInBytes;
            final MMapBuffer mappedByteBuffer = new MMapBuffer(file, startOffset, length * (long) totalSizeInBytes,
                    MMapMode.READ_WRITE);
            return new Iterator<Pair<byte[], byte[]>>() {
                int pointer = 0;

                @Override
                public boolean hasNext() {
                    return pointer < length;
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

                @Override
                public Pair<byte[], byte[]> next() {
                    byte[] dimBuff = new byte[dimensionSizeInBytes];
                    byte[] metBuff = new byte[metricSizeInBytes];
                    mappedByteBuffer.toDirectByteBuffer(pointer * (long) totalSizeInBytes, dimensionSizeInBytes)
                            .get(dimBuff);
                    //        mappedByteBuffer.position(pointer * totalSizeInBytes);
                    //        mappedByteBuffer.get(dimBuff);
                    if (metricSizeInBytes > 0) {
                        mappedByteBuffer
                                .toDirectByteBuffer(pointer * (long) totalSizeInBytes + dimensionSizeInBytes,
                                        metricSizeInBytes)
                                .get(metBuff);
                        //          mappedByteBuffer.get(metBuff);
                    }
                    pointer = pointer + 1;
                    if (pointer == length) {
                        try {
                            mappedByteBuffer.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                    return Pair.of(dimBuff, metBuff);
                }
            };

        } catch (IOException e) {
            throw e;
        } finally {
            //IOUtils.closeQuietly(randomAccessFile);
        }
    }
}