org.apache.jackrabbit.oak.segment.file.ReversedLinesFileReader.java Source code

Introduction

Here is the source code for org.apache.jackrabbit.oak.segment.file.ReversedLinesFileReader.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.segment.file;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.UnsupportedCharsetException;

import org.apache.commons.io.Charsets;

/**
 * Reads lines in a file reversely (similar to a BufferedReader, but starting at
 * the last line). Useful for e.g. searching in log files.
 *
 * FIXME: this is a copy of org.apache.commons.io.input.ReversedLinesFileReader
 * with a fix for IO-471. Replace again once commons-io has released a fixed version.
 */
class ReversedLinesFileReader implements Closeable {

    private final int blockSize;
    private final Charset encoding;

    private final RandomAccessFile randomAccessFile;

    private final long totalByteLength;
    private final long totalBlockCount;

    private final byte[][] newLineSequences;
    private final int avoidNewlineSplitBufferSize;
    private final int byteDecrement;

    private FilePart currentFilePart;

    private boolean trailingNewlineOfFileSkipped = false;

    /**
     * Creates a ReversedLinesFileReader with default block size of 4KB and the
     * platform's default encoding.
     *
     * @param file
     *            the file to be read
     * @throws IOException  if an I/O error occurs
     */
    public ReversedLinesFileReader(final File file) throws IOException {
        this(file, 4096, Charset.defaultCharset().toString());
    }

    /**
     * Creates a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file
     *            the file to be read
     * @param blockSize
     *            size of the internal buffer (for ideal performance this should
     *            match with the block size of the underlying file system).
     * @param encoding
     *            the encoding of the file
     * @throws IOException  if an I/O error occurs
     * @since 2.3
     */
    public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding)
            throws IOException {
        this.blockSize = blockSize;
        this.encoding = encoding;

        randomAccessFile = new RandomAccessFile(file, "r");
        totalByteLength = randomAccessFile.length();
        int lastBlockLength = (int) (totalByteLength % blockSize);
        if (lastBlockLength > 0) {
            totalBlockCount = totalByteLength / blockSize + 1;
        } else {
            totalBlockCount = totalByteLength / blockSize;
            if (totalByteLength > 0) {
                lastBlockLength = blockSize;
            }
        }
        currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);

        // --- check & prepare encoding ---
        Charset charset = Charsets.toCharset(encoding);
        CharsetEncoder charsetEncoder = charset.newEncoder();
        float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
        if (maxBytesPerChar == 1f) {
            // all one byte encodings are no problem
            byteDecrement = 1;
        } else if (charset == Charset.forName("UTF-8")) {
            // UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
            // http://en.wikipedia.org/wiki/UTF-8
            byteDecrement = 1;
        } else if (charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
                charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
                charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
                charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
                charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
            byteDecrement = 1;
        } else if (charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
            // UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
            // however byte order has to be specified
            byteDecrement = 2;
        } else if (charset == Charset.forName("UTF-16")) {
            throw new UnsupportedEncodingException(
                    "For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
        } else {
            throw new UnsupportedEncodingException(
                    "Encoding " + encoding + " is not supported yet (feel free to submit a patch)");
        }
        // NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
        newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding),
                "\r".getBytes(encoding) };

        avoidNewlineSplitBufferSize = newLineSequences[0].length;
    }

    /**
     * Creates a ReversedLinesFileReader with the given block size and encoding.
     *
     * @param file
     *            the file to be read
     * @param blockSize
     *            size of the internal buffer (for ideal performance this should
     *            match with the block size of the underlying file system).
     * @param encoding
     *            the encoding of the file
     * @throws IOException  if an I/O error occurs
     * @throws UnsupportedCharsetException
     *             thrown instead of {@link UnsupportedEncodingException} in version 2.2 if the encoding is not
     *             supported.
     */
    public ReversedLinesFileReader(final File file, final int blockSize, final String encoding) throws IOException {
        this(file, blockSize, Charsets.toCharset(encoding));
    }

    /**
     * Returns the lines of the file from bottom to top.
     *
     * @return the next line or null if the start of the file is reached
     * @throws IOException  if an I/O error occurs
     */
    public String readLine() throws IOException {

        String line = currentFilePart.readLine();
        while (line == null) {
            currentFilePart = currentFilePart.rollOver();
            if (currentFilePart != null) {
                line = currentFilePart.readLine();
            } else {
                // no more fileparts: we're done, leave line set to null
                break;
            }
        }

        // aligned behaviour with BufferedReader that doesn't return a last, empty line
        if ("".equals(line) && !trailingNewlineOfFileSkipped) {
            trailingNewlineOfFileSkipped = true;
            line = readLine();
        }

        return line;
    }

    /**
     * Closes underlying resources.
     *
     * @throws IOException  if an I/O error occurs
     */
    @Override
    public void close() throws IOException {
        randomAccessFile.close();
    }

    private class FilePart {
        private final long no;

        private final byte[] data;

        private byte[] leftOver;

        private int currentLastBytePos;

        /**
         * ctor
         * @param no the part number
         * @param length its length
         * @param leftOverOfLastFilePart remainder
         * @throws IOException if there is a problem reading the file
         */
        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
            this.no = no;
            int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
            this.data = new byte[dataLength];
            final long off = (no - 1) * blockSize;

            // read data
            if (no > 0 /* file not empty */) {
                randomAccessFile.seek(off);
                final int countRead = randomAccessFile.read(data, 0, length);
                if (countRead != length) {
                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
                }
            }
            // copy left over part into data arr
            if (leftOverOfLastFilePart != null) {
                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
            }
            this.currentLastBytePos = data.length - 1;
            this.leftOver = null;
        }

        /**
         * Handles block rollover
         *
         * @return the new FilePart or null
         * @throws IOException if there was a problem reading the file
         */
        private FilePart rollOver() throws IOException {

            if (currentLastBytePos > -1) {
                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
                        + "last readLine() should have returned something! currentLastCharPos="
                        + currentLastBytePos);
            }

            if (no > 1) {
                return new FilePart(no - 1, blockSize, leftOver);
            } else {
                // NO 1 was the last FilePart, we're finished
                if (leftOver != null) {
                    throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
                            + new String(leftOver, encoding));
                }
                return null;
            }
        }

        /**
         * Reads a line.
         *
         * @return the line or null
         * @throws IOException if there is an error reading from the file
         */
        private String readLine() throws IOException {

            String line = null;
            int newLineMatchByteCount;

            boolean isLastFilePart = no == 1;

            int i = currentLastBytePos;
            while (i > -1) {

                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
                    // avoidNewlineSplitBuffer: for all except the last file part we
                    // take a few bytes to the next file part to avoid splitting of newlines
                    createLeftOver();
                    break; // skip last few bytes and leave it to the next file part
                }

                // --- check for newline ---
                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
                    final int lineStart = i + 1;
                    int lineLengthBytes = currentLastBytePos - lineStart + 1;

                    if (lineLengthBytes < 0) {
                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
                    }
                    byte[] lineData = new byte[lineLengthBytes];
                    System.arraycopy(data, lineStart, lineData, 0, lineLengthBytes);

                    line = new String(lineData, encoding);

                    currentLastBytePos = i - newLineMatchByteCount;
                    break; // found line
                }

                // --- move cursor ---
                i -= byteDecrement;

                // --- end of file part handling ---
                if (i < 0) {
                    createLeftOver();
                    break; // end of file part
                }
            }

            // --- last file part handling ---
            if (isLastFilePart && leftOver != null) {
                // there will be no line break anymore, this is the first line of the file
                line = new String(leftOver, encoding);
                leftOver = null;
            }

            return line;
        }

        /**
         * Creates the buffer containing any left over bytes.
         */
        private void createLeftOver() {
            int lineLengthBytes = currentLastBytePos + 1;
            if (lineLengthBytes > 0) {
                // create left over for next block
                leftOver = new byte[lineLengthBytes];
                System.arraycopy(data, 0, leftOver, 0, lineLengthBytes);
            } else {
                leftOver = null;
            }
            currentLastBytePos = -1;
        }

        /**
         * Finds the new-line sequence and return its length.
         *
         * @param data buffer to scan
         * @param i start offset in buffer
         * @return length of newline sequence or 0 if none found
         */
        private int getNewLineMatchByteCount(byte[] data, int i) {
            for (byte[] newLineSequence : newLineSequences) {
                boolean match = true;
                for (int j = newLineSequence.length - 1; j >= 0; j--) {
                    int k = i + j - (newLineSequence.length - 1);
                    match &= k >= 0 && data[k] == newLineSequence[j];
                }
                if (match) {
                    return newLineSequence.length;
                }
            }
            return 0;
        }
    }

}