de.unentscheidbar.csv2.ParserBenchmark.java Source code

Java tutorial

Introduction

Here is the source code for de.unentscheidbar.csv2.ParserBenchmark.java

Source

/*
 * Copyright 2015 Daniel Huss
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package de.unentscheidbar.csv2;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.io.IOUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.skife.csv.SimpleReader;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

import au.com.bytecode.opencsv.CSVReader;

/**
 * Compares the speed of some CSV readers/parsers, including ours. All readers are fed the same CSV data from memory, so
 * we're really just testing the parser speed, not the underlying I/O.
 */
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Thread)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@Fork(value = 1, jvmArgsPrepend = { "-Xmx1024m", "-Xms512m" })
@SuppressWarnings("javadoc")
public class ParserBenchmark {

    @Param({ "1", "10", "100", "1000", "10000" })
    private int kiloBytes;

    @Param
    private Dataset dataset;

    @Param
    private CsvImpl impl;

    private byte[] csv;

    public static final Object notARow = new Object();

    public static final String randomCsvChars = "abcdefghijklmnopqrstuvqxyz0123456789        ,,\r\n";

    @Setup(Level.Trial)
    public void setup() {

        int bytesPerCharacter = 2; // UTF-16
        int charCount = (kiloBytes * 1000 / bytesPerCharacter);

        try {
            csv = dataset.get(charCount).getBytes(StandardCharsets.UTF_16BE);
        } catch (Exception e) {
            throw new AssertionError(e);
        }
    }

    @Benchmark
    public void readFromMemory() {

        try {
            Reader r = new InputStreamReader(new ByteArrayInputStream(csv), StandardCharsets.UTF_16BE);
            Collection<?> rows = impl.getRows(r);
            if (rows.size() == 0)
                throw new AssertionError();
            for (Object row : rows) {
                if (row == notARow)
                    throw new AssertionError();
            }
        } catch (Exception e) {
            throw new AssertionError(e);
        }
    }

    @TearDown(Level.Trial)
    public void teardown() {

        // Prevent memory leak in case JMH keeps a reference to this object (don't think it does, though)
        csv = null;
        impl = null;
    }

    static interface Operation {

        Collection<?> getRows(Reader input) throws Exception;
    }

    public enum Dataset {
        sample1 {

            @Override
            String get(int charCount) throws IOException {

                String sample = IOUtils.toString(ParserBenchmark.class.getResource("/csv-500-chars.txt"));
                StringBuilder rows = new StringBuilder(charCount);
                while (rows.length() < charCount)
                    rows.append(sample);
                return rows.toString();
            }
        },
        random {

            @Override
            String get(int charCount) throws IOException {

                char[] chars = new char[charCount];
                try (Reader r = new RndCharReader(randomCsvChars.toCharArray())) {
                    IOUtils.readFully(r, chars);
                }
                return String.valueOf(chars);
            }
        };

        abstract String get(int charCount) throws Exception;
    }

    public enum CsvImpl implements Operation {
        commonsCsv1 {

            @Override
            public Collection<?> getRows(Reader input) throws Exception {

                return CSVFormat.DEFAULT.parse(input).getRecords();
            }
        },
        csv2 {

            private final CsvParser parser = CsvFormat.rfc4180Format().parser().setMaxRowLength(Integer.MAX_VALUE);

            @Override
            public Collection<?> getRows(Reader input) throws Exception {

                return parser.parse(input).getRows();
            }
        },
        openCsv {

            @Override
            public Collection<?> getRows(Reader input) throws Exception {

                try (CSVReader reader = new CSVReader(input, ',')) {
                    return reader.readAll();
                }
            }
        },
        skifeCsv {

            private final SimpleReader reader = new SimpleReader();

            @Override
            public Collection<?> getRows(Reader input) throws Exception {

                return reader.parse(input);
            }
        },
        superCsv {

            @Override
            public Collection<?> getRows(Reader input) throws Exception {

                @SuppressWarnings("resource")
                CsvListReader r = new CsvListReader(input, CsvPreference.STANDARD_PREFERENCE);
                List<Object> result = new ArrayList<>();
                List<?> line;
                while ((line = r.read()) != null) {
                    result.add(line);
                }
                return result;
            }
        };

        @Override
        public abstract Collection<?> getRows(Reader input) throws Exception;

    }
}