com.addthis.ccompressor.ColumnRowCompressor.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.ccompressor.ColumnRowCompressor.java

Source

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.addthis.ccompressor;

import javax.annotation.concurrent.GuardedBy;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.io.DataChannelCodec;
import com.addthis.bundle.value.ValueObject;

import com.google.common.collect.Iterables;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This class is thread safe in the sense that multiple threads calling write should
 * not both be able to actually modify the output stream they passed in at the same time.
 * This is regardless of whether or not they are the same output stream object. It is not
 * okay to modify the columnList after constructing the object though.
 */
public class ColumnRowCompressor implements RowWriter {

    private static final Logger logger = LoggerFactory.getLogger(ColumnRowCompressor.class);
    private static final byte VERSION = 1;

    private final List<Column> columnList;
    private final int blockSize;
    private final BlockingQueue<Bundle> rowBuffer = new LinkedBlockingQueue<>();
    private final Lock flushLock = new ReentrantLock();

    public ColumnRowCompressor(List<Column> columnMetaDataList, int blockSize) {
        this.columnList = columnMetaDataList;
        this.blockSize = blockSize;
    }

    @Override
    public boolean write(Iterable<Bundle> rows, OutputStream outputStream) throws IOException {
        assert (rows != null);
        Iterables.addAll(rowBuffer, rows);
        return maybeFlush(outputStream);
    }

    @Override
    public boolean write(Bundle row, OutputStream outputStream) throws IOException {
        if (row == null) {
            return false;
        }
        rowBuffer.add(row);
        return maybeFlush(outputStream);
    }

    @Override
    public void flush(OutputStream outputStream) throws IOException {
        flushLock.lock();
        try {
            while (!rowBuffer.isEmpty()) {
                singleFlush(outputStream);
            }
        } finally {
            flushLock.unlock();
        }
    }

    private boolean maybeFlush(OutputStream out) throws IOException {
        boolean flushed = false;
        if (rowBuffer.size() >= blockSize) {
            if (flushLock.tryLock()) {
                try {
                    while (rowBuffer.size() >= blockSize) {
                        singleFlush(out);
                        flushed = true;
                    }
                } finally {
                    flushLock.unlock();
                }
            }
        }
        return flushed;
    }

    @GuardedBy("flushLock")
    private void singleFlush(OutputStream outputStream) throws IOException {
        List<Bundle> blockBundles = new ArrayList<>();
        rowBuffer.drainTo(blockBundles, blockSize);
        ByteArrayOutputStream valueOutputStream = new ByteArrayOutputStream();
        DataChannelCodec.ClassIndexMap classIndexMap = DataChannelCodec.createClassIndexMap();
        for (Bundle blockBundle : blockBundles) {
            for (Column column : columnList) {
                ValueObject val = column.getValue(blockBundle);
                switch (column.getColumnType()) {
                case RAW:
                    DataChannelCodec.encodeValue(val, valueOutputStream, classIndexMap);
                    column.push(valueOutputStream.toByteArray());
                    valueOutputStream.reset();
                    break;
                case TEXT255:
                case RUNLENGTH:
                    column.push(val == null ? new byte[0] : val.asString().toString().getBytes());
                    break;
                case DELTAINT:
                    column.push(val == null ? 0 : ((int) val.asLong().getLong()));
                    break;
                case DELTALONG:
                    column.push(val == null ? 0L : val.asLong().getLong());
                    break;
                default:
                    logger.error("Unknown column type {}", column.getColumnType());
                    throw new IOException("Unknown column type " + column.getColumnType());
                }

            }
        }
        int totalValueBytes = 0;
        List<byte[]> columnByteList = new ArrayList<>();
        for (Column column : columnList) {
            byte[] bytes = column.flush();
            totalValueBytes += bytes.length;
            columnByteList.add(bytes);
        }

        // write header
        BlockHeader.writeHeader(VERSION, totalValueBytes, columnList, outputStream);
        // write payload
        for (byte[] bytes : columnByteList) {
            outputStream.write(bytes);
        }
    }
}