io.airlift.compress.SnappyBench.java Source code

Java tutorial

Introduction

Here is the source code for io.airlift.compress.SnappyBench.java

Source

/*
 * Copyright (C) 2011 the original author or authors.
 * See the notice.md file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.airlift.compress;

import com.google.common.base.Throwables;
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.google.common.primitives.Longs;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.concurrent.TimeUnit;

import static java.lang.String.format;
import static io.airlift.compress.BenchmarkDriver.JAVA_BLOCK;
import static io.airlift.compress.BenchmarkDriver.JAVA_STREAM;
import static io.airlift.compress.BenchmarkDriver.JNI_BLOCK;
import static io.airlift.compress.BenchmarkDriver.JNI_STREAM;

/**
 * Port of the micro-benchmarks for  Snappy.
 * <p/>
 * Make sure to run these with the server version of hot spot.  I use the following configuration:
 * <pre>
 * {@code
 *   -Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib -server -XX:+UseCompressedOops -Xms128M -Xmx128M -XX:+UseConcMarkSweepGC
 * }
 * </pre>
 */
public class SnappyBench {
    private static final int NUMBER_OF_RUNS = 5;
    private static final int CALIBRATE_ITERATIONS = 100;
    private static final int WARM_UP_SECONDS = 45;
    private static final int SECONDS_PER_RUN = 1;

    public static void main(String[] args) {
        System.err.printf("Running micro-benchmarks.\n");

        SnappyBench snappyBench = new SnappyBench();

        // verify implementation with a round trip for every input
        snappyBench.verify();

        // warm up the code paths so hot spot optimizes the code
        snappyBench.warmUp();

        // Easy to use individual tests
        //        for (int i = 0; i < 100; i++) {
        //            snappyBench.runUncompress(TestData.txt1);
        //            snappyBench.runUncompress(TestData.txt2);
        //            snappyBench.runUncompress(TestData.txt3);
        //            snappyBench.runUncompress(TestData.txt4);
        //            snappyBench.runUncompress(TestData.sum);
        //            snappyBench.runUncompress(TestData.lsp);
        //            snappyBench.runUncompress(TestData.man);
        //            snappyBench.runUncompress(TestData.c);
        //            snappyBench.runUncompress(TestData.cp);
        //        }

        snappyBench.runCompress("Block Compress", JNI_BLOCK, JAVA_BLOCK);
        snappyBench.runUncompress("Block Uncompress", JNI_BLOCK, JAVA_BLOCK);
        snappyBench.runRoundTrip("Block Round Trip", JNI_BLOCK, JAVA_BLOCK);

        snappyBench.runCompress("Stream Compress (no checksum)", JNI_STREAM, JAVA_STREAM);
        snappyBench.runUncompress("Stream Uncompress (no checksum)", JNI_STREAM, JAVA_STREAM);
        snappyBench.runRoundTrip("Stream RoundTrip (no checksum)", JNI_STREAM, JAVA_STREAM);
    }

    public void verify() {
        for (TestData testData : TestData.values()) {
            byte[] contents = testData.getContents();
            byte[] compressed = new byte[Snappy.maxCompressedLength(contents.length)];
            int compressedSize = Snappy.compress(contents, 0, contents.length, compressed, 0);

            byte[] uncompressed = new byte[contents.length];

            Snappy.uncompress(compressed, 0, compressedSize, uncompressed, 0);
            if (!Arrays.equals(uncompressed, testData.getContents())) {
                throw new AssertionError("Failed for " + testData);
            }

            Arrays.fill(uncompressed, (byte) 0);
            compressed = Arrays.copyOf(compressed, compressedSize);
            Snappy.uncompress(compressed, 0, compressedSize, uncompressed, 0);
            if (!Arrays.equals(uncompressed, testData.getContents())) {
                throw new AssertionError("Failed for " + testData);
            }

        }

        for (TestData testData : TestData.values()) {
            try {
                byte[] contents = testData.getContents();

                ByteArrayOutputStream rawOut = new ByteArrayOutputStream(
                        Snappy.maxCompressedLength(contents.length));
                SnappyOutputStream out = new SnappyOutputStream(rawOut);
                out.write(contents);
                out.close();

                SnappyInputStream in = new SnappyInputStream(new ByteArrayInputStream(rawOut.toByteArray()));
                byte[] uncompressed = ByteStreams.toByteArray(in);

                if (!Arrays.equals(uncompressed, testData.getContents())) {
                    throw new AssertionError("Failed for " + testData);
                }
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    public void warmUp() {
        // Warm up the code
        {
            long end = System.nanoTime() + TimeUnit.SECONDS.toNanos(WARM_UP_SECONDS);
            do {
                for (TestData testData : TestData.values()) {
                    benchmarkCompress(testData, JAVA_BLOCK, 100);
                }
            } while (System.nanoTime() < end);
            end = System.nanoTime() + TimeUnit.SECONDS.toNanos(WARM_UP_SECONDS);
            do {
                for (TestData testData : TestData.values()) {
                    benchmarkUncompress(testData, JAVA_BLOCK, 100);
                }
            } while (System.nanoTime() < end);
            end = System.nanoTime() + TimeUnit.SECONDS.toNanos(WARM_UP_SECONDS);
            do {
                for (TestData testData : TestData.values()) {
                    benchmarkCompress(testData, JAVA_STREAM, 100);
                }
            } while (System.nanoTime() < end);
            end = System.nanoTime() + TimeUnit.SECONDS.toNanos(WARM_UP_SECONDS);
            do {
                for (TestData testData : TestData.values()) {
                    benchmarkUncompress(testData, JAVA_STREAM, 100);
                }
            } while (System.nanoTime() < end);
        }

    }

    private static void printHeader(String benchmarkTitle) {
        System.err.println();
        System.err.println();
        System.err.println("### " + benchmarkTitle);
        System.err.println("<pre><code>");
        System.err.printf("%-8s %8s %9s %9s %11s %11s %7s\n", "", "", "JNI", "Java", "JNI", "Java", "");
        System.err.printf("%-8s %8s %9s %9s %11s %11s %7s\n", "Input", "Size", "Compress", "Compress", "Throughput",
                "Throughput", "Change");
        System.err.printf("---------------------------------------------------------------------\n");
    }

    private static void printFooter() {
        System.err.println("</code></pre>");
    }

    public void runCompress(String benchmarkTitle, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        printHeader(benchmarkTitle);
        for (TestData testData : TestData.values()) {
            runCompress(testData, oldDriver, newDriver);
        }
        printFooter();
    }

    private void runCompress(TestData testData, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        long iterations = calibrateIterations(testData, oldDriver, true);

        long oldBytesPerSecond = benchmarkCompress(testData, oldDriver, iterations);
        long newBytesPerSecond = benchmarkCompress(testData, newDriver, iterations);

        // results
        String oldHumanReadableSpeed = toHumanReadableSpeed(oldBytesPerSecond);
        String newHumanReadableSpeed = toHumanReadableSpeed(newBytesPerSecond);
        double improvement = 100.0d * (newBytesPerSecond - oldBytesPerSecond) / oldBytesPerSecond;

        System.err.printf("%-8s %8d %8.1f%% %8.1f%% %11s %11s %+6.1f%%  %s\n", testData, testData.size(),
                oldDriver.getCompressionRatio(testData) * 100.0, newDriver.getCompressionRatio(testData) * 100.0,
                oldHumanReadableSpeed, newHumanReadableSpeed, improvement, testData.getInfo());
    }

    private long benchmarkCompress(TestData testData, BenchmarkDriver driver, long iterations) {
        long[] firstBenchmarkRuns = new long[NUMBER_OF_RUNS];
        for (int run = 0; run < NUMBER_OF_RUNS; ++run) {
            firstBenchmarkRuns[run] = driver.compress(testData, iterations);
        }
        long firstMedianTimeInNanos = getMedianValue(firstBenchmarkRuns);
        return (long) (1.0 * iterations * testData.size() / nanosToSeconds(firstMedianTimeInNanos));
    }

    public void runUncompress(String benchmarkTitle, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        printHeader(benchmarkTitle);
        for (TestData testData : TestData.values()) {
            runUncompress(testData, oldDriver, newDriver);
        }
        printFooter();
    }

    private void runUncompress(TestData testData, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        long iterations = calibrateIterations(testData, oldDriver, false);

        long oldBytesPerSecond = benchmarkUncompress(testData, oldDriver, iterations);
        long newBytesPerSecond = benchmarkUncompress(testData, newDriver, iterations);

        // results
        String newHumanReadableSpeed = toHumanReadableSpeed(newBytesPerSecond);
        String oldHumanReadableSpeed = toHumanReadableSpeed(oldBytesPerSecond);
        double improvement = 100.0d * (newBytesPerSecond - oldBytesPerSecond) / oldBytesPerSecond;

        System.err.printf("%-8s %8d %8.1f%% %8.1f%% %11s %11s %+6.1f%%  %s\n", testData, testData.size(),
                oldDriver.getCompressionRatio(testData) * 100.0, newDriver.getCompressionRatio(testData) * 100.0,
                oldHumanReadableSpeed, newHumanReadableSpeed, improvement, testData.getInfo());
    }

    private long benchmarkUncompress(TestData testData, BenchmarkDriver driver, long iterations) {
        long[] jniBenchmarkRuns = new long[NUMBER_OF_RUNS];
        for (int run = 0; run < NUMBER_OF_RUNS; ++run) {
            jniBenchmarkRuns[run] = driver.uncompress(testData, iterations);
        }
        long jniMedianTimeInNanos = getMedianValue(jniBenchmarkRuns);
        return (long) (1.0 * iterations * testData.size() / nanosToSeconds(jniMedianTimeInNanos));
    }

    public void runRoundTrip(String benchmarkTitle, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        printHeader(benchmarkTitle);
        for (TestData testData : TestData.values()) {
            runRoundTrip(testData, oldDriver, newDriver);
        }
        printFooter();
    }

    private void runRoundTrip(TestData testData, BenchmarkDriver oldDriver, BenchmarkDriver newDriver) {
        long iterations = calibrateIterations(testData, oldDriver, true);

        long oldBytesPerSecond = benchmarkRoundTrip(testData, oldDriver, iterations);
        long newBytesPerSecond = benchmarkRoundTrip(testData, newDriver, iterations);

        // results
        String newHumanReadableSpeed = toHumanReadableSpeed(newBytesPerSecond);
        String oldHumanReadableSpeed = toHumanReadableSpeed(oldBytesPerSecond);
        double improvement = 100.0d * (newBytesPerSecond - oldBytesPerSecond) / oldBytesPerSecond;

        System.err.printf("%-8s %8d %8.1f%% %8.1f%% %11s %11s %+6.1f%%  %s\n", testData, testData.size(),
                oldDriver.getCompressionRatio(testData) * 100.0, newDriver.getCompressionRatio(testData) * 100.0,
                oldHumanReadableSpeed, newHumanReadableSpeed, improvement, testData.getInfo());
    }

    private long benchmarkRoundTrip(TestData testData, BenchmarkDriver driver, long iterations) {
        long[] jniBenchmarkRuns = new long[NUMBER_OF_RUNS];
        for (int run = 0; run < NUMBER_OF_RUNS; ++run) {
            jniBenchmarkRuns[run] = driver.roundTrip(testData, iterations);
        }
        long jniMedianTimeInNanos = getMedianValue(jniBenchmarkRuns);
        return (long) (1.0 * iterations * testData.size() / nanosToSeconds(jniMedianTimeInNanos));
    }

    private long calibrateIterations(TestData testData, BenchmarkDriver driver, boolean compression) {
        // Run a few iterations first to find out approximately how fast
        // the benchmark is.
        long start = System.nanoTime();
        if (compression) {
            driver.compress(testData, CALIBRATE_ITERATIONS);
        } else {
            driver.uncompress(testData, CALIBRATE_ITERATIONS);
        }
        long timeInNanos = System.nanoTime() - start;

        // Let each test case run for about 200ms, but at least as many
        // as we used to calibrate.
        // Run five times and pick the median.
        long iterations = 0;
        if (timeInNanos > 0) {
            double iterationsPerSecond = CALIBRATE_ITERATIONS / nanosToSeconds(timeInNanos);
            iterations = (long) (SECONDS_PER_RUN * iterationsPerSecond);
        }
        iterations = Math.max(iterations, CALIBRATE_ITERATIONS);
        return iterations;
    }

    private double nanosToSeconds(long nanos) {
        return 1.0 * nanos / TimeUnit.SECONDS.toNanos(1);
    }

    private String toHumanReadableSpeed(long bytesPerSecond) {
        String humanReadableSpeed;
        if (bytesPerSecond < 1024) {
            humanReadableSpeed = format("%dB/s", bytesPerSecond);
        } else if (bytesPerSecond < 1024 * 1024) {
            humanReadableSpeed = format("%.1fkB/s", bytesPerSecond / 1024.0f);
        } else if (bytesPerSecond < 1024 * 1024 * 1024) {
            humanReadableSpeed = format("%.1fMB/s", bytesPerSecond / (1024.0f * 1024.0f));
        } else {
            humanReadableSpeed = format("%.1fGB/s", bytesPerSecond / (1024.0f * 1024.0f * 1024.0f));
        }
        return humanReadableSpeed;
    }

    private long getMedianValue(long[] benchmarkRuns) {
        ArrayList<Long> list = new ArrayList<Long>(Longs.asList(benchmarkRuns));
        Collections.sort(list);
        return list.get(benchmarkRuns.length / 2);
    }

    @SuppressWarnings({ "UnusedDeclaration" })
    public enum TestData {
        html("html"), urls("urls.10K"), jpg("house.jpg", false), pdf("mapreduce-osdi-1.pdf"), html4("html_x_4"), cp(
                "cp.html"), c("fields.c"), lsp("grammar.lsp"), xls("kennedy.xls"), txt1("alice29.txt"), txt2(
                        "asyoulik.txt"), txt3("lcet10.txt"), txt4("plrabn12.txt"), bin(
                                "ptt5"), sum("sum"), man("xargs.1"), pb("geo.protodata"), gaviota("kppkn.gtb");

        private final String fileName;
        private final boolean compressibleData;
        private final byte[] contents;
        private final byte[] compressed;

        TestData(String fileName) {
            this(fileName, true);
        }

        TestData(String fileName, boolean compressibleData) {
            this.fileName = fileName;
            this.compressibleData = compressibleData;
            try {
                contents = Files.toByteArray(new File("testdata", fileName));
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }

            // Read the file and create buffers out side of timing
            byte[] compressed = new byte[Snappy.maxCompressedLength(contents.length)];
            int compressedSize;
            try {
                compressedSize = org.xerial.snappy.Snappy.compress(contents, 0, contents.length, compressed, 0);
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
            this.compressed = Arrays.copyOf(compressed, compressedSize);
        }

        public String getFileName() {
            return fileName;
        }

        public boolean isCompressibleData() {
            return compressibleData;
        }

        public String getInfo() {
            if (compressibleData) {
                return name();
            } else {
                return name() + " (not compressible)";
            }
        }

        public byte[] getContents() {
            return Arrays.copyOf(contents, contents.length);
        }

        public int size() {
            return contents.length;
        }

        public byte[] getCompressed() {
            return Arrays.copyOf(compressed, compressed.length);
        }

        public int compressedSize() {
            return compressed.length;
        }
    }
}