dk.statsbiblioteket.util.LineReaderTest.java Source code

Java tutorial

Introduction

Here is the source code for dk.statsbiblioteket.util.LineReaderTest.java

Source

/* $Id: LineReaderTest.java,v 1.2 2007/12/04 13:22:01 mke Exp $
 * $Revision: 1.2 $
 * $Date: 2007/12/04 13:22:01 $
 * $Author: mke $
 *
 * The SB Util Library.
 * Copyright (C) 2005-2007  The State and University Library of Denmark
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
package dk.statsbiblioteket.util;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.*;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.*;

@SuppressWarnings({ "DuplicateStringLiteralInspection" })
public class LineReaderTest extends TestCase {
    private static Log log = LogFactory.getLog(LineReaderTest.class);

    private static final int LINES = 376;
    private final File tempFolder;
    File logfile;
    private File TMPFOLDER = new File(System.getProperty("java.io.tmpdir"), "linereadertest");

    public LineReaderTest(String name) throws URISyntaxException {
        super(name);
        logfile = new File(Thread.currentThread().getContextClassLoader()
                .getResource("data/website-performance-info.log.2007-04-01").toURI());
        tempFolder = logfile.getParentFile();
    }

    @Override
    public void setUp() throws Exception {
        super.setUp();
        if (TMPFOLDER.exists()) {
            Files.delete(TMPFOLDER);
        }
        TMPFOLDER.mkdirs();
    }

    @Override
    public void tearDown() throws Exception {
        super.tearDown();
    }

    public void testNIO() throws Exception {
        byte[] INITIAL = new byte[] { 1, 2, 3, 4 };
        byte[] EXTRA = new byte[] { 5, 6, 7, 8 };
        byte[] FULL = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
        byte[] FIFTH = new byte[] { 87 };
        byte[] FULL_WITH_FIFTH = new byte[] { 1, 2, 3, 4, 87, 6, 7, 8 };

        // Create temp-file with content
        File temp = createTempFile();
        FileOutputStream fileOut = new FileOutputStream(temp, true);
        fileOut.write(INITIAL);
        fileOut.close();

        checkContent("The plain test-file should be correct", temp, INITIAL);
        {
            // Read the 4 bytes
            RandomAccessFile input = new RandomAccessFile(temp, "r");
            FileChannel channelIn = input.getChannel();
            ByteBuffer buffer = ByteBuffer.allocate(4096);
            channelIn.position(0);
            assertEquals("Buffer read should read full length", INITIAL.length, channelIn.read(buffer));
            buffer.position(0);

            checkContent("Using buffer should produce the right bytes", INITIAL, buffer);
            channelIn.close();
            input.close();
        }
        {
            // Fill new buffer
            ByteBuffer outBuffer = ByteBuffer.allocate(4096);
            outBuffer.put(EXTRA);
            outBuffer.flip();
            assertEquals("The limit of the outBuffer should be correct", EXTRA.length, outBuffer.limit());

            // Append new buffer to end
            RandomAccessFile output = new RandomAccessFile(temp, "rw");
            FileChannel channelOut = output.getChannel();
            channelOut.position(INITIAL.length);
            assertEquals("All bytes should be written", EXTRA.length, channelOut.write(outBuffer));
            channelOut.close();
            output.close();
            checkContent("The resulting file should have the full output", temp, FULL);
        }

        {
            // Fill single byte buffer
            ByteBuffer outBuffer2 = ByteBuffer.allocate(4096);
            outBuffer2.put(FIFTH);
            outBuffer2.flip();
            assertEquals("The limit of the second outBuffer should be correct", FIFTH.length, outBuffer2.limit());

            // Insert byte in the middle
            RandomAccessFile output2 = new RandomAccessFile(temp, "rw");
            FileChannel channelOut2 = output2.getChannel();
            channelOut2.position(4);
            assertEquals("The FIFTH should be written", FIFTH.length, channelOut2.write(outBuffer2));
            channelOut2.close();
            output2.close();
            checkContent("The resulting file with fifth should be complete", temp, FULL_WITH_FIFTH);
        }
    }

    public void testBitFiddling() throws Exception {
        assertEquals("Simple byte => int 129", 129, (byte) -127 & 0xFF);
        assertEquals("Simple byte => int 131", 131, (byte) -125 & 0xFF);
        assertEquals("Simple byte => int 80", 80, (byte) 80 & 0xFF);
    }

    private String fixISO(String in) {
        if (in == null) {
            return in;
        }
        return in.replaceAll("", "").replaceAll("", "").replaceAll("", "")
                .replaceAll("", "").replaceAll("", "").replaceAll("", "").replaceAll("", "")
                .replaceAll("", "");
    }

    public void testEOF() throws Exception {
        LineReader lr = new LineReader(logfile, "r");
        long fileSize = logfile.length();
        long counter = 0;
        while (!lr.eof()) {
            lr.readByte();
            counter++;
        }
        assertEquals("The amount of read bytes should match the file size", fileSize, counter);
        try {
            lr.readByte();
            fail("Reading past the file length should throw an exception");
        } catch (EOFException e) {
            // Expected
        }
        lr.close();
    }

    public void testReadByte() throws Exception {
        RandomAccessFile ra = new RandomAccessFile(logfile, "r");
        LineReader lr = new LineReader(logfile, "r");
        int counter = 1;
        while (!lr.eof()) {
            assertEquals("Byte #" + counter++ + " should be read correct", ra.readByte(), lr.readByte());
        }
        ra.close();
        lr.close();
    }

    public void testVsRandomAccess() throws Exception {
        RandomAccessFile ra = new RandomAccessFile(logfile, "r");
        LineReader lr = new LineReader(logfile, "r");
        int count = 0;
        while (count++ < LINES) {
            assertEquals("The lr line should match ra line", ++count + "'" + fixISO(ra.readLine()) + "'",
                    count + "'" + lr.readLine() + "'");
        }
    }

    public void testEOFLines() throws Exception {
        LineReader lr = new LineReader(logfile, "r");
        assertFalse("EOL should not be reached for fresh file", lr.eof());
        for (int i = 0; i < LINES - 1; i++) {
            lr.readLine();
            assertFalse("EOL should not be reached at line " + (i + 1), lr.eof());
        }
        lr.readLine();
        assertTrue("EOL should be reached after " + LINES + " lines", lr.eof());
    }

    public void dumpSpeeds() throws Exception {
        dumpSequentialLR();
        dumpSequentialRA();
        dumpSpeedLR();
        dumpSpeedRA();
    }

    /*    public void testMem() throws Exception {
    System.out.println("Total mem before allocation: "
                       + Runtime.getRuntime().totalMemory());
    //noinspection MismatchedReadAndWriteOfArray
    int[] a = new int[1024*1024*10];
    for (int i = 0 ; i < a.length ; i++) {
        a[i] = i;
    }
    System.out.println("Total mem after allocation: " 
                       + Runtime.getRuntime().totalMemory());
        }*/

    private int SPEED_SEEKS = 20000;
    private int SEQUENTIAL_RUNS = 50;

    public void dumpSpeedLR() throws Exception {
        Random random = new Random();
        LineReader lr = new LineReader(logfile, "r");
        long[] pos = getPositions();
        // Warming up
        for (int i = 0; i < 1000; i++) {
            lr.seek(pos[random.nextInt(LINES)]);
            lr.readLine();
        }
        Profiler profiler = new Profiler();
        profiler.setExpectedTotal(SPEED_SEEKS);
        for (int i = 0; i < SPEED_SEEKS; i++) {
            lr.seek(pos[random.nextInt(LINES)]);
            lr.readLine();
            profiler.beat();
        }
        System.out.println("Performed " + SPEED_SEEKS + " LR seeks & " + "reads at "
                + Math.round(profiler.getBps(false)) + " seeks/second");
    }

    public void dumpSequentialLR() throws Exception {
        LineReader lr = new LineReader(logfile, "r");
        for (int i = 0; i < LINES; i++) {
            lr.readLine();
        }
        lr.seek(0);
        Profiler profiler = new Profiler();
        profiler.setExpectedTotal(SEQUENTIAL_RUNS);
        for (int i = 0; i < SEQUENTIAL_RUNS; i++) {
            lr.seek(0);
            for (int j = 0; j < LINES; j++) {
                lr.readLine();
            }
            profiler.beat();
        }
        System.out.println("Performed " + SEQUENTIAL_RUNS + " full LR reads at "
                + Math.round(profiler.getBps(false)) + " reads/second");
    }

    public void dumpSpeedRA() throws Exception {
        Random random = new Random();
        RandomAccessFile ra = new RandomAccessFile(logfile, "r");
        long[] pos = getPositions();
        // Warming up
        for (int i = 0; i < 1000; i++) {
            ra.seek(pos[random.nextInt(LINES)]);
            ra.readLine();
        }
        Profiler profiler = new Profiler();
        profiler.setExpectedTotal(SPEED_SEEKS);
        for (int i = 0; i < SPEED_SEEKS; i++) {
            ra.seek(pos[random.nextInt(LINES)]);
            ra.readLine();
            profiler.beat();
        }
        System.out.println("Performed " + SPEED_SEEKS + " RA seeks & " + "reads at "
                + Math.round(profiler.getBps(false)) + " seeks/second");
    }

    public void dumpSequentialRA() throws Exception {
        RandomAccessFile ra = new RandomAccessFile(logfile, "r");
        for (int i = 0; i < LINES; i++) {
            ra.readLine();
        }
        Profiler profiler = new Profiler();
        profiler.setExpectedTotal(SEQUENTIAL_RUNS);
        for (int i = 0; i < SEQUENTIAL_RUNS; i++) {
            ra.seek(0);
            for (int j = 0; j < LINES; j++) {
                ra.readLine();
            }
            profiler.beat();
        }
        System.out.println("Performed " + SEQUENTIAL_RUNS + " full RA reads at "
                + Math.round(profiler.getBps(false)) + " reads/second");
    }

    private String[] getLines() throws Exception {
        String[] lines = new String[LINES];
        LineReader lr = new LineReader(logfile, "r");
        int counter = 0;
        while (counter < LINES) {
            lines[counter] = fixISO(lr.readLine());
            counter++;
        }
        lr.close();
        return lines;
    }

    private long[] getPositions() throws Exception {
        long[] pos = new long[LINES];
        LineReader lr = new LineReader(logfile, "r");
        int counter = 0;
        while (counter < LINES) {
            pos[counter] = lr.getPosition();
            lr.readLine();
            counter++;
        }
        lr.close();
        return pos;
    }

    public void testRandomisedAccess() throws Exception {
        int RUNS = 10000;
        Random random = new Random();
        // Collect starting points
        long[] pos = getPositions();
        String[] lines = getLines();

        LineReader lr = new LineReader(logfile, "r");
        for (int i = 0; i < RUNS; i++) {
            int line = random.nextInt(LINES);
            lr.seek(pos[line]);
            assertEquals("Random access to line " + line + " should give the same output for both readers",
                    lines[line], lr.readLine());
        }
    }

    public void testPseudoRandomisedAccess() throws Exception {
        int[] wantedLines = new int[] { 1, 2, 3, 218, 3, 216 };
        long[] pos = getPositions();
        String[] lines = getLines();

        LineReader lr = new LineReader(logfile, "r");
        for (int line : wantedLines) {
            lr.seek(pos[line]);
            assertEquals("Access to line " + line + " should give the same output for both readers", lines[line],
                    lr.readLine());
        }
    }

    public void testConstruction() throws Exception {
        assertTrue("The logfile " + logfile.getAbsoluteFile() + " should exist", logfile.exists());
        RandomAccessFile rReader = new RandomAccessFile(logfile, "r");
        LineReader lReader = new LineReader(logfile, "r");
        assertEquals("The first line should match",
                "INFO  [TP-Processor128] [2007-04-01 00:00:00,109] [website.performance.search_classic] FEBBCAC5ABBA604784A4990025CF0197|hitcount[9988]|searchwsc[1033]|clusterwsc[119]|didyoumeanwsc[342]|didyoumean_check[0]|page_render[1137]|66.249.66.193|Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)einstein lma_long:\"tekst\"",
                lReader.readLine());
        rReader.readLine();
        String secondLine = "INFO  [TP-Processor185] [2007-04-01 00:00:47,074] [website.performance.search_classic] 9BC1F1B16AAE36840B6A13C63F67B806|hitcount[214]|searchwsc[182]|clusterwsc[121]|didyoumeanwsc[420]|didyoumean_check[0]|page_render[879]|66.249.66.193|Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)einstein cluster:\"systems\"";
        assertEquals("The second line should match", secondLine, lReader.readLine());
        assertEquals("The second RAF line should match", secondLine, fixISO(rReader.readLine()));
        for (int i = 0; i < LINES - 4; i++) {
            String l = lReader.readLine();
            assertNotNull("Line " + i + " should be extractable from the logfile", l);
            assertEquals("Line #" + i + " should be the same for RAF and LR", fixISO(rReader.readLine()), l);
        }

        String secondLast = "INFO  [TP-Processor225] [2007-04-01 23:59:32,128] [website.performance.search_cl"
                + "assic] 617D24C65E2F53E56E95FBADCF390189|hitcount[6]|searchwsc[42]|clusterwsc[95]"
                + "|didyoumeanwsc[382]|didyoumean_check[15]|page_render[475]|66.249.66.193|Mozilla/"
                + "5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)au:\"branner h c"
                + "\" author_normalised:\"vosmar j\" lkl:\"bl skr 33\"";
        String rLast = fixISO(rReader.readLine());
        assertEquals("The second last line using RAF should be known", secondLast, fixISO(rLast));

        assertEquals("The second last line should be known", secondLast, lReader.readLine());
        assertNotNull("The last line should be something", lReader.readLine());
    }

    public void testReadBytes() throws Exception {
        LineReader lr = new LineReader(logfile, "r");
        byte[] buf = new byte[5000];
        assertEquals("All bytes should be read at the start of the file", buf.length, lr.read(buf));
        lr.seek(lr.length() - buf.length);
        assertEquals("All bytes should be read at the end of the file", buf.length, lr.read(buf));
        lr.seek(lr.length() - buf.length + 1);
        assertEquals("All bytes - 1 should be read exceeding the length", buf.length - 1, lr.read(buf));
        assertEquals("-1 should be returned when reading past EOF", -1, lr.read(buf));
    }

    public static Test suite() {
        return new TestSuite(LineReaderTest.class);
    }

    public void testWrite() throws Exception {
        File temp = new File(tempFolder, "testWrite.tmp");
        temp.deleteOnExit();
        temp.createNewFile();
        LineReader lr = new LineReader(temp, "rw");
        assertEquals("Newly created file should be empty", 0, lr.length());

        byte[] myBytes = new byte[10];
        lr.write(myBytes, 0, myBytes.length);
        assertEquals("After writing 10 bytes, the length should be 10", 10, lr.length());

        lr.seek(0);
        lr.write(myBytes, 0, myBytes.length);
        assertEquals("After writing 10 bytes from position 0, the length " + "should still be 10", 10, lr.length());

        lr.seek(5);
        lr.write(myBytes, 0, myBytes.length);
        assertEquals("After writing 10 bytes from position 5, the length " + "should be incremented", 15,
                lr.length());

        lr.write(myBytes, 0, myBytes.length);
        assertEquals("After writing 10 bytes without changing position, " + "the length should be increased", 25,
                lr.length());
    }

    public void testWritePermission() throws Exception {
        File temp = createTempFile();
        LineReader lr = new LineReader(temp, "r");
        try {
            lr.write("Hello");
            fail("Writing should not be allowed");
        } catch (Exception e) {
            // Expected
        }
    }

    public void writeSample(DataOutput out) throws Exception {
        out.writeInt(12345);
        out.writeInt(-87);
        out.writeLong(123456789L);
        out.write("Hello World!\n".getBytes("utf-8"));
        out.write("Another world\n".getBytes("utf-8"));
        out.writeFloat(0.5f);
        out.writeBoolean(true);
        out.writeBoolean(false);
        out.writeByte(12);
        out.writeByte(-12);
        out.write(129);
        out.writeShort(-4567);
        out.writeBytes("ASCII");
    }

    public void testSample(String type, DataInput in) throws Exception {
        assertEquals("Int 1 should work for " + type, 12345, in.readInt());
        assertEquals("Int 2 should work for " + type, -87, in.readInt());
        assertEquals("Long should work for " + type, 123456789L, in.readLong());
        assertEquals("String 1 should work for " + type, "Hello World!", in.readLine());
        assertEquals("String 2 should work for " + type, "Another world", in.readLine());
        assertEquals("Float should work for " + type, 0.5f, in.readFloat());
        assertEquals("Boolean 1 should work for " + type, true, in.readBoolean());
        assertEquals("Boolean 2 should work for " + type, false, in.readBoolean());
        assertEquals("Byte 1 should work for " + type, (byte) 12, in.readByte());
        assertEquals("Byte 2 should work for " + type, (byte) -12, in.readByte());
        assertEquals("Unsigned byte should work for " + type, 129, in.readUnsignedByte());
        assertEquals("Short should work for " + type, -4567, in.readShort());
        byte[] loaded = new byte[5];
        byte[] expected = new byte[] { (byte) 'A', (byte) 'S', (byte) 'C', (byte) 'I', (byte) 'I' };
        in.readFully(loaded);
        for (int i = 0; i < loaded.length; i++) {
            assertEquals("Byte-stored string should be equal at byte " + i + " for " + type, expected[i],
                    loaded[i]);
        }
    }

    public void testReadTypes() throws Exception {
        File temp = createTempFile();
        RandomAccessFile raf = new RandomAccessFile(temp, "rw");
        writeSample(raf);
        raf.close();

        LineReader lr = new LineReader(temp, "rw");
        testSample("LR", lr);
        lr.close();

        temp.createNewFile();
        lr = new LineReader(temp, "rw");
        writeSample(lr);
        lr.close();

        raf = new RandomAccessFile(temp, "rw");
        testSample("RA", raf);
        raf.close();
    }

    public void dumpSpeed2Helper(LineReader lr, RandomAccessFile ra, boolean warmup) throws Exception {
        int seeks = 10000;
        Profiler profiler = new Profiler();
        profiler.setExpectedTotal(seeks);
        profiler.setBpsSpan(1000);
        long size = lr.length();
        Random random = new Random();

        profiler.reset();
        for (int i = 0; i < seeks; i++) {
            long pos = Math.round(Math.floor(random.nextDouble() * (size - 6)));
            try {
                lr.seek(pos);
            } catch (EOFException e) {
                fail("Reached EOF at position " + pos);
            }
            lr.readInt();
            profiler.beat();
        }
        if (!warmup) {
            System.out.println("Seeked and read an int " + seeks + " times with LR " + "on a file of size " + size
                    + " at " + Math.round(profiler.getBps(true)) + " seeks/second");
        }

        profiler.reset();
        for (int i = 0; i < seeks; i++) {
            long pos = Math.round(Math.floor(random.nextDouble() * (size - 6)));
            try {
                ra.seek(pos);
            } catch (EOFException e) {
                fail("Reached EOF at position " + pos);
            }
            ra.readInt();
            profiler.beat();
        }
        if (!warmup) {
            System.out.println("Seeked and read an int " + seeks + " times with RA " + "on a file of size " + size
                    + " at " + Math.round(profiler.getBps(true)) + " seeks/second");
        }
    }

    public void dumpSpeed2Helper(File file) throws Exception {
        int WARMUP = 2;
        int RUNS = 3;
        LineReader lr = new LineReader(file, "r");
        RandomAccessFile raf = new RandomAccessFile(file, "r");
        for (int i = 0; i < WARMUP; i++) {
            dumpSpeed2Helper(lr, raf, true);
        }
        for (int i = 0; i < RUNS; i++) {
            dumpSpeed2Helper(lr, raf, false);
        }
        lr.close();
        raf.close();
    }

    public void dumpSpeed() throws Exception {
        Random random = new Random();
        int[] sizes = new int[] { 100, 10000, 1000000, 10000000 };
        Profiler profiler = new Profiler();
        for (int size : sizes) {
            System.out.print("Creating test-file of size " + size + "...");
            File temp = createTempFile();
            LineReader lr = new LineReader(temp, "rw");
            byte[] bytes = new byte[size];
            random.nextBytes(bytes);
            System.gc();
            profiler.reset();
            lr.write(bytes);
            lr.close();
            System.out.println(" in " + profiler.getSpendTime());
            dumpSpeed2Helper(temp);
            temp.delete();
        }
    }

    public void testBufferOverflow() throws Exception {
        LineReader lr = getLR();
        for (int i = 0; i < LineReader.BUFFER_SIZE + 2; i++) {
            lr.writeByte(87);
        }
        lr.close();
    }

    public void testWriteLarge() throws Exception {
        int size = LineReader.BUFFER_SIZE + 10;
        File temp = createTempFile();
        LineReader lr = new LineReader(temp, "rw");
        lr.write(new byte[size]);
        lr.close();
        assertEquals("The generated file should be of the right size", size, temp.length());
    }

    public void testWriteSmall() throws Exception {
        int size = LineReader.BUFFER_SIZE - 10;
        File temp = createTempFile();
        LineReader lr = new LineReader(temp, "rw");
        lr.write(new byte[size]);
        lr.close();
        assertEquals("The generated file should be of the right size", size, temp.length());
    }

    public void testWrite2Bytes() throws Exception {
        File temp = new File(tempFolder, "testWrite2Bytes.tmp");
        temp.deleteOnExit();
        temp.createNewFile();
        LineReader lr = new LineReader(temp, "rw");
        byte[] b1 = new byte[] { 1, 2 };
        byte[] b2 = new byte[] { 3, 4 };
        lr.write(b1);
        lr.write(b2);
        lr.close();

        LineReader lread = new LineReader(temp, "r");
        byte[] buf = new byte[4];
        lread.readFully(buf);
        for (int i = 0; i < buf.length; i++) {
            assertEquals("The byte at position " + i + " should be as expected", i + 1, buf[i]);
        }

    }

    public void testWriteCloseReadWrite() throws Exception {
        File temp = new File(tempFolder, "testWriteCloseReadWrite.tmp");
        temp.deleteOnExit();
        temp.createNewFile();
        LineReader lr = new LineReader(temp, "rw");
        byte[] b1 = new byte[] { 1, 2 };
        byte[] b2 = new byte[] { 3, 4 };
        lr.write(b1);
        lr.close();

        checkContent("Simple write of 2 bytes should work", temp, b1);

        lr = new LineReader(temp, "rw");
        lr.seek(0);
        lr.readFully(b1);
        assertEquals("The first byte should be correct", 1, b1[0]);
        assertEquals("The second byte should be correct", 2, b1[1]);
        lr.close();

        checkContent("Open-read should not change anything", temp, b1);

        lr = new LineReader(temp, "rw");
        lr.seek(lr.length());
        lr.write(b2);
        lr.close();

        checkContent("The final file should be as expected", temp, new byte[] { 1, 2, 3, 4 });
    }

    public void testWriteCloseNewWrite() throws Exception {
        File temp = new File(tempFolder, "testWriteCloseNewWrite.tmp");
        temp.deleteOnExit();
        temp.createNewFile();
        LineReader lr = new LineReader(temp, "rw");
        byte[] b1 = new byte[] { 1, 2 };
        byte[] b2 = new byte[] { 3, 4 };
        lr.write(b1);
        lr.close();

        checkContent("Simple write of 2 bytes should work", temp, b1);
        checkContent("Checking bytes should not disturb anything", temp, b1);

        lr = new LineReader(temp, "rw");
        log.debug("The file length is " + lr.length());
        lr.seek(lr.length());
        lr.write(b2);
        lr.close();

        checkContent("The final file should be as expected", temp, new byte[] { 1, 2, 3, 4 });
    }

    public void testWriteSeekWrite() throws Exception {
        File temp = new File(tempFolder, "testWriteSeekWrite.tmp");
        temp.deleteOnExit();
        temp.createNewFile();
        LineReader lr = new LineReader(temp, "rw");
        byte[] b1 = new byte[] { 1, 2 };
        byte[] b2 = new byte[] { 3, 4 };
        lr.write(b1);

        lr.seek(lr.length());
        lr.write(b2);
        lr.close();

        checkContent("The final file should be as expected", temp, new byte[] { 1, 2, 3, 4 });
    }

    private void checkContent(String message, File temp, byte[] expected) throws IOException {
        LineReader lread = new LineReader(temp, "r");
        checkContent(message, expected, lread);
        lread.close();
    }

    private void checkContent(String message, byte[] expected, LineReader lread) throws IOException {
        byte[] buf = new byte[expected.length];
        lread.readFully(buf);
        log.debug("Buffer: " + Logs.expand(buf, 100, 10));
        for (int i = 0; i < buf.length; i++) {
            assertEquals(message + ": The byte at position " + i + " should be as expected", expected[i], buf[i]);
        }
    }

    private void checkContent(String message, byte[] expected, ByteBuffer buffer) throws IOException {
        byte[] actual = new byte[expected.length];
        buffer.get(actual);
        for (int i = 0; i < expected.length; i++) {
            assertEquals(message + ": The byte at position " + i + " should be as expected", expected[i],
                    actual[i]);
        }
    }

    public void testWritereadAlternate() throws Exception {
        File temp = createTempFile();
        LineReader lr = new LineReader(temp, "rw");
        byte[] b1 = new byte[] { 1, 2 };
        byte[] b2 = new byte[] { 3, 4 };
        assertEquals("The position should be at the beginning from start", 0, lr.getPosition());
        log.debug("Writing 2 bytes");
        lr.write(b1);
        assertEquals("The length of the file should updated after write", 2, lr.length());
        log.debug("Seeking 0");
        lr.seek(0);
        assertEquals("The position should be at the beginning after seek", 0, lr.getPosition());
        log.debug("Reading 2 bytes");
        lr.readFully(b1);
        assertEquals("The position should be at the end", 2, lr.getPosition());
        assertEquals("The first byte should be correct", 1, b1[0]);
        assertEquals("The second byte should be correct", 2, b1[1]);
        assertEquals("The length of the file should unchanged after read", 2, lr.length());
        log.debug("Seeking to length " + lr.length());
        lr.seek(lr.length());
        log.debug("Writing 2 bytes more");
        lr.write(b2);
        log.debug("Closing");
        lr.close();

        checkContent("Final content should be correct", temp, new byte[] { 1, 2, 3, 4 });
    }

    public File createTempFile() throws IOException {
        File temp = File.createTempFile("filereader", ".tmp");
        temp.deleteOnExit();
        return temp;
    }

    public LineReader getLR() throws IOException {
        return new LineReader(createTempFile(), "rw");
    }

    public void testMonkeyBinarySearch() throws Exception {
        File testFile = File.createTempFile("binarySearch", ".tmp");
        testFile.deleteOnExit();
        Random random = new Random(87);

        int LINES = 50;
        int MINWORDLENGTH = 1;
        int MAXWORDLENGTH = 10;
        int MINCHAR = 32;
        int MAXCHAR = 14200;
        Set<String> lineSet = new HashSet<String>(LINES);
        for (int line = 0; line < LINES; line++) {
            int wordLength = random.nextInt(MAXWORDLENGTH - MINWORDLENGTH) + MINWORDLENGTH;
            StringWriter word = new StringWriter(wordLength);
            for (int i = 0; i < wordLength; i++) {
                word.append((char) (random.nextInt(MAXCHAR - MINCHAR) + MINCHAR));
            }
            lineSet.add(word.toString());
        }

        List<String> lines = new ArrayList<String>(lineSet);
        Collections.sort(lines);
        log.info("Created content: " + Strings.join(lines, "  "));
        StringWriter sw = new StringWriter(MAXWORDLENGTH * LINES);
        for (String word : lines) {
            sw.append(word);
            sw.append("\n");
        }
        String content = sw.toString();
        Files.saveString(content, testFile);

        LineReader reader = new LineReader(testFile, "r");
        int pos = 0;
        for (String word : lines) {
            assertPos(reader, pos, word);
            pos += word.getBytes("utf-8").length + 1;
        }
        reader.close();
    }

    public void testBinarySearch2() throws Exception {
        File testFile = File.createTempFile("binarySearch", ".tmp");
        testFile.deleteOnExit();
        String content = "a\nbb";
        Files.saveString(content, testFile);
        LineReader reader = new LineReader(testFile, "r");
        assertPos(reader, 0, "a");
        assertPos(reader, 2, "bb");
        reader.close();
    }

    public void testBinarySearch() throws Exception {
        File testFile = File.createTempFile("binarySearch", ".tmp");
        testFile.deleteOnExit();
        String content = "a\naabb\nab\nc\nd\nde\nzz\nzzz";
        Files.saveString(content, testFile);
        LineReader reader = new LineReader(testFile, "r");
        assertPos(reader, 0, "a");
        assertPos(reader, 7, "ab");
        assertPos(reader, 2, "aabb");
        assertPos(reader, 10, "c");
        reader.close();
    }

    private void assertPos(LineReader reader, int expectedPos, String query) throws IOException {
        assertEquals(String.format("The query '%s' should have the correct position", query), expectedPos,
                reader.binaryLineSearch(null, query));
    }

    public void testPerformanceSmall() throws Exception {
        testBSPerformance(new File(TMPFOLDER, "small"), 100, 1000);
    }

    public void disabledtestPerformanceMediumCorpusManyLookups() throws Exception {
        testBSPerformance(new File(TMPFOLDER, "small"), 100000, 100000);
    }

    public void disabledtestPerformanceMediumCorpusEternalLookups() throws Exception {
        testBSPerformance(new File(TMPFOLDER, "small"), 100000, Integer.MAX_VALUE);
    }

    public static void testBSPerformance(File location, int terms, int runs) throws Exception {
        System.out.println(String.format("testPerformance('%s', %d runs) called", location, runs));
        createTS(terms, location);
        LineReader ts = new LineReader(location, "r");
        ts.setBufferSize(200);
        int digits = Integer.toString(terms).length();

        Random random = new Random(87);
        Profiler profiler = new Profiler(runs);
        profiler.setBpsSpan(10000);
        int feedback = Math.max(1, runs / 100);
        for (int i = 0; i < runs; i++) {
            int test = random.nextInt(terms);
            //noinspection DuplicateStringLiteralInspection
            String term = "term_" + leader(test, digits);
            ts.binaryLineSearch(termLocator, term);
            profiler.beat();
            if (i % feedback == 0) {
                System.out.println(String.format("Executed %d/%d lookups. Average lookups/second: %d. " + "ETA: %s",
                        i, runs, (int) profiler.getBps(true), profiler.getETAAsString(true)));
            }
        }
        System.out.println(String.format("Executed %d lookups in %s. Average lookups/second: %s", runs,
                profiler.getSpendTime(), profiler.getBps(false)));
        ts.close();
    }

    public static void createTS(int terms, File location) throws Exception {
        System.out.println(String.format("Creating sample term stats with %d terms at '%s'", terms, location));
        int digits = Integer.toString(terms).length();
        Profiler profiler = new Profiler();
        LineReader ts = new LineReader(location, "rw");
        for (int i = 0; i < terms; i++) {
            //noinspection DuplicateStringLiteralInspection
            ts.write("term_" + leader(i, digits) + " " + (i + 2) + "\n");
        }
        ts.close();
        System.out.println(
                "Finished creating sample term stats with " + terms + " terms in " + profiler.getSpendTime());
    }

    private static String leader(int num, int digits) {
        String result = Integer.toString(num);
        if (result.length() > digits) {
            throw new IllegalArgumentException(
                    String.format("The number %d already has more than %d digits", num, digits));
        }
        while (result.length() < digits) {
            result = "0" + result;
        }
        return result;
    }

    private static Comparator<String> termLocator = new Comparator<String>() {
        // o1 will always be the search value, o2 is the line
        public int compare(String o1, String o2) {
            return o1.compareTo(extractTermStringWithEscapes(o2));
        }
    };

    private static String extractTermStringWithEscapes(String line) {
        // No explicit check for existence of delimiter as we don't care whether
        // a "No DELIMITER found or an ArrayIndexOutOfBounds is thrown
        // The same goes for the Integer.parseInt-call. In all three cases, we
        // can infer what happened.
        return line.substring(0, line.lastIndexOf(" "));
    }
}