Java tutorial
/* * Copyright 2015 Daniel Huss * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package de.unentscheidbar.csv2; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.commons.csv.CSVFormat; import org.apache.commons.io.IOUtils; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Warmup; import org.skife.csv.SimpleReader; import org.supercsv.io.CsvListReader; import org.supercsv.prefs.CsvPreference; import au.com.bytecode.opencsv.CSVReader; /** * Compares the speed of some CSV readers/parsers, including ours. All readers are fed the same CSV data from memory, so * we're really just testing the parser speed, not the underlying I/O. */ @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) @State(Scope.Thread) @Warmup(iterations = 10) @Measurement(iterations = 10) @Fork(value = 1, jvmArgsPrepend = { "-Xmx1024m", "-Xms512m" }) @SuppressWarnings("javadoc") public class ParserBenchmark { @Param({ "1", "10", "100", "1000", "10000" }) private int kiloBytes; @Param private Dataset dataset; @Param private CsvImpl impl; private byte[] csv; public static final Object notARow = new Object(); public static final String randomCsvChars = "abcdefghijklmnopqrstuvqxyz0123456789 ,,\r\n"; @Setup(Level.Trial) public void setup() { int bytesPerCharacter = 2; // UTF-16 int charCount = (kiloBytes * 1000 / bytesPerCharacter); try { csv = dataset.get(charCount).getBytes(StandardCharsets.UTF_16BE); } catch (Exception e) { throw new AssertionError(e); } } @Benchmark public void readFromMemory() { try { Reader r = new InputStreamReader(new ByteArrayInputStream(csv), StandardCharsets.UTF_16BE); Collection<?> rows = impl.getRows(r); if (rows.size() == 0) throw new AssertionError(); for (Object row : rows) { if (row == notARow) throw new AssertionError(); } } catch (Exception e) { throw new AssertionError(e); } } @TearDown(Level.Trial) public void teardown() { // Prevent memory leak in case JMH keeps a reference to this object (don't think it does, though) csv = null; impl = null; } static interface Operation { Collection<?> getRows(Reader input) throws Exception; } public enum Dataset { sample1 { @Override String get(int charCount) throws IOException { String sample = IOUtils.toString(ParserBenchmark.class.getResource("/csv-500-chars.txt")); StringBuilder rows = new StringBuilder(charCount); while (rows.length() < charCount) rows.append(sample); return rows.toString(); } }, random { @Override String get(int charCount) throws IOException { char[] chars = new char[charCount]; try (Reader r = new RndCharReader(randomCsvChars.toCharArray())) { IOUtils.readFully(r, chars); } return String.valueOf(chars); } }; abstract String get(int charCount) throws Exception; } public enum CsvImpl implements Operation { commonsCsv1 { @Override public Collection<?> getRows(Reader input) throws Exception { return CSVFormat.DEFAULT.parse(input).getRecords(); } }, csv2 { private final CsvParser parser = CsvFormat.rfc4180Format().parser().setMaxRowLength(Integer.MAX_VALUE); @Override public Collection<?> getRows(Reader input) throws Exception { return parser.parse(input).getRows(); } }, openCsv { @Override public Collection<?> getRows(Reader input) throws Exception { try (CSVReader reader = new CSVReader(input, ',')) { return reader.readAll(); } } }, skifeCsv { private final SimpleReader reader = new SimpleReader(); @Override public Collection<?> getRows(Reader input) throws Exception { return reader.parse(input); } }, superCsv { @Override public Collection<?> getRows(Reader input) throws Exception { @SuppressWarnings("resource") CsvListReader r = new CsvListReader(input, CsvPreference.STANDARD_PREFERENCE); List<Object> result = new ArrayList<>(); List<?> line; while ((line = r.read()) != null) { result.add(line); } return result; } }; @Override public abstract Collection<?> getRows(Reader input) throws Exception; } }