CsvReader.java Source code

Introduction

Here is the source code for CsvReader.java
Source

/*
 * Java CSV is a stream based library for reading and writing
 * CSV and other delimited data.
 *   
 * Copyright (C) Bruce Dunwiddie bruce@csvreader.com
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 */

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.text.NumberFormat;
import java.util.HashMap;

/**
 * A stream based parser for parsing delimited text data from a file or a
 * stream.
 */
public class CsvReader {
    private Reader inputStream = null;

    private String fileName = null;

    // this holds all the values for switches that the user is allowed to set
    private UserSettings userSettings = new UserSettings();

    private Charset charset = null;

    private boolean useCustomRecordDelimiter = false;

    // this will be our working buffer to hold data chunks
    // read in from the data file

    private DataBuffer dataBuffer = new DataBuffer();

    private ColumnBuffer columnBuffer = new ColumnBuffer();

    private RawRecordBuffer rawBuffer = new RawRecordBuffer();

    private boolean[] isQualified = null;

    private String rawRecord = "";

    private HeadersHolder headersHolder = new HeadersHolder();

    // these are all more or less global loop variables
    // to keep from needing to pass them all into various
    // methods during parsing

    private boolean startedColumn = false;

    private boolean startedWithQualifier = false;

    private boolean hasMoreData = true;

    private char lastLetter = '\0';

    private boolean hasReadNextLine = false;

    private int columnsCount = 0;

    private long currentRecord = 0;

    private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];

    private boolean initialized = false;

    private boolean closed = false;

    /**
     * Double up the text qualifier to represent an occurance of the text
     * qualifier.
     */
    public static final int ESCAPE_MODE_DOUBLED = 1;

    /**
     * Use a backslash character before the text qualifier to represent an
     * occurance of the text qualifier.
     */
    public static final int ESCAPE_MODE_BACKSLASH = 2;

    /**
     * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
     * as the data source.
     * 
     * @param fileName
     *            The path to the file to use as the data source.
     * @param delimiter
     *            The character to use as the column delimiter.
     * @param charset
     *            The {@link java.nio.charset.Charset Charset} to use while
     *            parsing the data.
     */
    public CsvReader(String fileName, char delimiter, Charset charset) throws FileNotFoundException {
        if (fileName == null) {
            throw new IllegalArgumentException("Parameter fileName can not be null.");
        }

        if (charset == null) {
            throw new IllegalArgumentException("Parameter charset can not be null.");
        }

        if (!new File(fileName).exists()) {
            throw new FileNotFoundException("File " + fileName + " does not exist.");
        }

        this.fileName = fileName;
        this.userSettings.Delimiter = delimiter;
        this.charset = charset;

        isQualified = new boolean[values.length];
    }

    /**
     * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
     * as the data source.&nbsp;Uses ISO-8859-1 as the
     * {@link java.nio.charset.Charset Charset}.
     * 
     * @param fileName
     *            The path to the file to use as the data source.
     * @param delimiter
     *            The character to use as the column delimiter.
     */
    public CsvReader(String fileName, char delimiter) throws FileNotFoundException {
        this(fileName, delimiter, Charset.forName("ISO-8859-1"));
    }

    /**
     * Creates a {@link com.csvreader.CsvReader CsvReader} object using a file
     * as the data source.&nbsp;Uses a comma as the column delimiter and
     * ISO-8859-1 as the {@link java.nio.charset.Charset Charset}.
     * 
     * @param fileName
     *            The path to the file to use as the data source.
     */
    public CsvReader(String fileName) throws FileNotFoundException {
        this(fileName, Letters.COMMA);
    }

    /**
     * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
     * {@link java.io.Reader Reader} object as the data source.
     * 
     * @param inputStream
     *            The stream to use as the data source.
     * @param delimiter
     *            The character to use as the column delimiter.
     */
    public CsvReader(Reader inputStream, char delimiter) {
        if (inputStream == null) {
            throw new IllegalArgumentException("Parameter inputStream can not be null.");
        }

        this.inputStream = inputStream;
        this.userSettings.Delimiter = delimiter;
        initialized = true;

        isQualified = new boolean[values.length];
    }

    /**
     * Constructs a {@link com.csvreader.CsvReader CsvReader} object using a
     * {@link java.io.Reader Reader} object as the data source.&nbsp;Uses a
     * comma as the column delimiter.
     * 
     * @param inputStream
     *            The stream to use as the data source.
     */
    public CsvReader(Reader inputStream) {
        this(inputStream, Letters.COMMA);
    }

    /**
     * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
     * {@link java.io.InputStream InputStream} object as the data source.
     * 
     * @param inputStream
     *            The stream to use as the data source.
     * @param delimiter
     *            The character to use as the column delimiter.
     * @param charset
     *            The {@link java.nio.charset.Charset Charset} to use while
     *            parsing the data.
     */
    public CsvReader(InputStream inputStream, char delimiter, Charset charset) {
        this(new InputStreamReader(inputStream, charset), delimiter);
    }

    /**
     * Constructs a {@link com.csvreader.CsvReader CsvReader} object using an
     * {@link java.io.InputStream InputStream} object as the data
     * source.&nbsp;Uses a comma as the column delimiter.
     * 
     * @param inputStream
     *            The stream to use as the data source.
     * @param charset
     *            The {@link java.nio.charset.Charset Charset} to use while
     *            parsing the data.
     */
    public CsvReader(InputStream inputStream, Charset charset) {
        this(new InputStreamReader(inputStream, charset));
    }

    public boolean getCaptureRawRecord() {
        return userSettings.CaptureRawRecord;
    }

    public void setCaptureRawRecord(boolean captureRawRecord) {
        userSettings.CaptureRawRecord = captureRawRecord;
    }

    public String getRawRecord() {
        return rawRecord;
    }

    /**
     * Gets whether leading and trailing whitespace characters are being trimmed
     * from non-textqualified column data. Default is true.
     * 
     * @return Whether leading and trailing whitespace characters are being
     *         trimmed from non-textqualified column data.
     */
    public boolean getTrimWhitespace() {
        return userSettings.TrimWhitespace;
    }

    /**
     * Sets whether leading and trailing whitespace characters should be trimmed
     * from non-textqualified column data or not. Default is true.
     * 
     * @param trimWhitespace
     *            Whether leading and trailing whitespace characters should be
     *            trimmed from non-textqualified column data or not.
     */
    public void setTrimWhitespace(boolean trimWhitespace) {
        userSettings.TrimWhitespace = trimWhitespace;
    }

    /**
     * Gets the character being used as the column delimiter. Default is comma,
     * ','.
     * 
     * @return The character being used as the column delimiter.
     */
    public char getDelimiter() {
        return userSettings.Delimiter;
    }

    /**
     * Sets the character to use as the column delimiter. Default is comma, ','.
     * 
     * @param delimiter
     *            The character to use as the column delimiter.
     */
    public void setDelimiter(char delimiter) {
        userSettings.Delimiter = delimiter;
    }

    public char getRecordDelimiter() {
        return userSettings.RecordDelimiter;
    }

    /**
     * Sets the character to use as the record delimiter.
     * 
     * @param recordDelimiter
     *            The character to use as the record delimiter. Default is
     *            combination of standard end of line characters for Windows,
     *            Unix, or Mac.
     */
    public void setRecordDelimiter(char recordDelimiter) {
        useCustomRecordDelimiter = true;
        userSettings.RecordDelimiter = recordDelimiter;
    }

    /**
     * Gets the character to use as a text qualifier in the data.
     * 
     * @return The character to use as a text qualifier in the data.
     */
    public char getTextQualifier() {
        return userSettings.TextQualifier;
    }

    /**
     * Sets the character to use as a text qualifier in the data.
     * 
     * @param textQualifier
     *            The character to use as a text qualifier in the data.
     */
    public void setTextQualifier(char textQualifier) {
        userSettings.TextQualifier = textQualifier;
    }

    /**
     * Whether text qualifiers will be used while parsing or not.
     * 
     * @return Whether text qualifiers will be used while parsing or not.
     */
    public boolean getUseTextQualifier() {
        return userSettings.UseTextQualifier;
    }

    /**
     * Sets whether text qualifiers will be used while parsing or not.
     * 
     * @param useTextQualifier
     *            Whether to use a text qualifier while parsing or not.
     */
    public void setUseTextQualifier(boolean useTextQualifier) {
        userSettings.UseTextQualifier = useTextQualifier;
    }

    /**
     * Gets the character being used as a comment signal.
     * 
     * @return The character being used as a comment signal.
     */
    public char getComment() {
        return userSettings.Comment;
    }

    /**
     * Sets the character to use as a comment signal.
     * 
     * @param comment
     *            The character to use as a comment signal.
     */
    public void setComment(char comment) {
        userSettings.Comment = comment;
    }

    /**
     * Gets whether comments are being looked for while parsing or not.
     * 
     * @return Whether comments are being looked for while parsing or not.
     */
    public boolean getUseComments() {
        return userSettings.UseComments;
    }

    /**
     * Sets whether comments are being looked for while parsing or not.
     * 
     * @param useComments
     *            Whether comments are being looked for while parsing or not.
     */
    public void setUseComments(boolean useComments) {
        userSettings.UseComments = useComments;
    }

    /**
     * Gets the current way to escape an occurance of the text qualifier inside
     * qualified data.
     * 
     * @return The current way to escape an occurance of the text qualifier
     *         inside qualified data.
     */
    public int getEscapeMode() {
        return userSettings.EscapeMode;
    }

    /**
     * Sets the current way to escape an occurance of the text qualifier inside
     * qualified data.
     * 
     * @param escapeMode
     *            The way to escape an occurance of the text qualifier inside
     *            qualified data.
     * @exception IllegalArgumentException
     *                When an illegal value is specified for escapeMode.
     */
    public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
        if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) {
            throw new IllegalArgumentException("Parameter escapeMode must be a valid value.");
        }

        userSettings.EscapeMode = escapeMode;
    }

    public boolean getSkipEmptyRecords() {
        return userSettings.SkipEmptyRecords;
    }

    public void setSkipEmptyRecords(boolean skipEmptyRecords) {
        userSettings.SkipEmptyRecords = skipEmptyRecords;
    }

    /**
     * Safety caution to prevent the parser from using large amounts of memory
     * in the case where parsing settings like file encodings don't end up
     * matching the actual format of a file. This switch can be turned off if
     * the file format is known and tested. With the switch off, the max column
     * lengths and max column count per record supported by the parser will
     * greatly increase. Default is true.
     * 
     * @return The current setting of the safety switch.
     */
    public boolean getSafetySwitch() {
        return userSettings.SafetySwitch;
    }

    /**
     * Safety caution to prevent the parser from using large amounts of memory
     * in the case where parsing settings like file encodings don't end up
     * matching the actual format of a file. This switch can be turned off if
     * the file format is known and tested. With the switch off, the max column
     * lengths and max column count per record supported by the parser will
     * greatly increase. Default is true.
     * 
     * @param safetySwitch
     */
    public void setSafetySwitch(boolean safetySwitch) {
        userSettings.SafetySwitch = safetySwitch;
    }

    /**
     * Gets the count of columns found in this record.
     * 
     * @return The count of columns found in this record.
     */
    public int getColumnCount() {
        return columnsCount;
    }

    /**
     * Gets the index of the current record.
     * 
     * @return The index of the current record.
     */
    public long getCurrentRecord() {
        return currentRecord - 1;
    }

    /**
     * Gets the count of headers read in by a previous call to
     * {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
     * 
     * @return The count of headers read in by a previous call to
     *         {@link com.csvreader.CsvReader#readHeaders readHeaders()}.
     */
    public int getHeaderCount() {
        return headersHolder.Length;
    }

    /**
     * Returns the header values as a string array.
     * 
     * @return The header values as a String array.
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    public String[] getHeaders() throws IOException {
        checkClosed();

        if (headersHolder.Headers == null) {
            return null;
        } else {
            // use clone here to prevent the outside code from
            // setting values on the array directly, which would
            // throw off the index lookup based on header name
            String[] clone = new String[headersHolder.Length];
            System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length);
            return clone;
        }
    }

    public void setHeaders(String[] headers) {
        headersHolder.Headers = headers;

        headersHolder.IndexByName.clear();

        if (headers != null) {
            headersHolder.Length = headers.length;
        } else {
            headersHolder.Length = 0;
        }

        // use headersHolder.Length here in case headers is null
        for (int i = 0; i < headersHolder.Length; i++) {
            headersHolder.IndexByName.put(headers[i], Integer.valueOf(i));
        }
    }

    public String[] getValues() throws IOException {
        checkClosed();

        // need to return a clone, and can't use clone because values.Length
        // might be greater than columnsCount
        String[] clone = new String[columnsCount];
        System.arraycopy(values, 0, clone, 0, columnsCount);
        return clone;
    }

    /**
     * Returns the current column value for a given column index.
     * 
     * @param columnIndex
     *            The index of the column.
     * @return The current column value.
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    public String get(int columnIndex) throws IOException {
        checkClosed();

        if (columnIndex > -1 && columnIndex < columnsCount) {
            return values[columnIndex];
        } else {
            return "";
        }
    }

    /**
     * Returns the current column value for a given column header name.
     * 
     * @param headerName
     *            The header name of the column.
     * @return The current column value.
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    public String get(String headerName) throws IOException {
        checkClosed();

        return get(getIndex(headerName));
    }

    /**
     * Creates a {@link com.csvreader.CsvReader CsvReader} object using a string
     * of data as the source.&nbsp;Uses ISO-8859-1 as the
     * {@link java.nio.charset.Charset Charset}.
     * 
     * @param data
     *            The String of data to use as the source.
     * @return A {@link com.csvreader.CsvReader CsvReader} object using the
     *         String of data as the source.
     */
    public static CsvReader parse(String data) {
        if (data == null) {
            throw new IllegalArgumentException("Parameter data can not be null.");
        }

        return new CsvReader(new StringReader(data));
    }

    /**
     * Reads another record.
     * 
     * @return Whether another record was successfully read or not.
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    public boolean readRecord() throws IOException {
        checkClosed();

        columnsCount = 0;
        rawBuffer.Position = 0;

        dataBuffer.LineStart = dataBuffer.Position;

        hasReadNextLine = false;

        // check to see if we've already found the end of data

        if (hasMoreData) {
            // loop over the data stream until the end of data is found
            // or the end of the record is found

            do {
                if (dataBuffer.Position == dataBuffer.Count) {
                    checkDataLength();
                } else {
                    startedWithQualifier = false;

                    // grab the current letter as a char

                    char currentLetter = dataBuffer.Buffer[dataBuffer.Position];

                    if (userSettings.UseTextQualifier && currentLetter == userSettings.TextQualifier) {
                        // this will be a text qualified column, so
                        // we need to set startedWithQualifier to make it
                        // enter the seperate branch to handle text
                        // qualified columns

                        lastLetter = currentLetter;

                        // read qualified
                        startedColumn = true;
                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                        startedWithQualifier = true;
                        boolean lastLetterWasQualifier = false;

                        char escapeChar = userSettings.TextQualifier;

                        if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
                            escapeChar = Letters.BACKSLASH;
                        }

                        boolean eatingTrailingJunk = false;
                        boolean lastLetterWasEscape = false;
                        boolean readingComplexEscape = false;
                        int escape = ComplexEscape.UNICODE;
                        int escapeLength = 0;
                        char escapeValue = (char) 0;

                        dataBuffer.Position++;

                        do {
                            if (dataBuffer.Position == dataBuffer.Count) {
                                checkDataLength();
                            } else {
                                // grab the current letter as a char

                                currentLetter = dataBuffer.Buffer[dataBuffer.Position];

                                if (eatingTrailingJunk) {
                                    dataBuffer.ColumnStart = dataBuffer.Position + 1;

                                    if (currentLetter == userSettings.Delimiter) {
                                        endColumn();
                                    } else if ((!useCustomRecordDelimiter
                                            && (currentLetter == Letters.CR || currentLetter == Letters.LF))
                                            || (useCustomRecordDelimiter
                                                    && currentLetter == userSettings.RecordDelimiter)) {
                                        endColumn();

                                        endRecord();
                                    }
                                } else if (readingComplexEscape) {
                                    escapeLength++;

                                    switch (escape) {
                                    case ComplexEscape.UNICODE:
                                        escapeValue *= (char) 16;
                                        escapeValue += hexToDec(currentLetter);

                                        if (escapeLength == 4) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.OCTAL:
                                        escapeValue *= (char) 8;
                                        escapeValue += (char) (currentLetter - '0');

                                        if (escapeLength == 3) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.DECIMAL:
                                        escapeValue *= (char) 10;
                                        escapeValue += (char) (currentLetter - '0');

                                        if (escapeLength == 3) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.HEX:
                                        escapeValue *= (char) 16;
                                        escapeValue += hexToDec(currentLetter);

                                        if (escapeLength == 2) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    }

                                    if (!readingComplexEscape) {
                                        appendLetter(escapeValue);
                                    } else {
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                                    }
                                } else if (currentLetter == userSettings.TextQualifier) {
                                    if (lastLetterWasEscape) {
                                        lastLetterWasEscape = false;
                                        lastLetterWasQualifier = false;
                                    } else {
                                        updateCurrentValue();

                                        if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
                                            lastLetterWasEscape = true;
                                        }

                                        lastLetterWasQualifier = true;
                                    }
                                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
                                        && lastLetterWasEscape) {
                                    switch (currentLetter) {
                                    case 'n':
                                        appendLetter(Letters.LF);
                                        break;
                                    case 'r':
                                        appendLetter(Letters.CR);
                                        break;
                                    case 't':
                                        appendLetter(Letters.TAB);
                                        break;
                                    case 'b':
                                        appendLetter(Letters.BACKSPACE);
                                        break;
                                    case 'f':
                                        appendLetter(Letters.FORM_FEED);
                                        break;
                                    case 'e':
                                        appendLetter(Letters.ESCAPE);
                                        break;
                                    case 'v':
                                        appendLetter(Letters.VERTICAL_TAB);
                                        break;
                                    case 'a':
                                        appendLetter(Letters.ALERT);
                                        break;
                                    case '0':
                                    case '1':
                                    case '2':
                                    case '3':
                                    case '4':
                                    case '5':
                                    case '6':
                                    case '7':
                                        escape = ComplexEscape.OCTAL;
                                        readingComplexEscape = true;
                                        escapeLength = 1;
                                        escapeValue = (char) (currentLetter - '0');
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                                        break;
                                    case 'u':
                                    case 'x':
                                    case 'o':
                                    case 'd':
                                    case 'U':
                                    case 'X':
                                    case 'O':
                                    case 'D':
                                        switch (currentLetter) {
                                        case 'u':
                                        case 'U':
                                            escape = ComplexEscape.UNICODE;
                                            break;
                                        case 'x':
                                        case 'X':
                                            escape = ComplexEscape.HEX;
                                            break;
                                        case 'o':
                                        case 'O':
                                            escape = ComplexEscape.OCTAL;
                                            break;
                                        case 'd':
                                        case 'D':
                                            escape = ComplexEscape.DECIMAL;
                                            break;
                                        }

                                        readingComplexEscape = true;
                                        escapeLength = 0;
                                        escapeValue = (char) 0;
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;

                                        break;
                                    default:
                                        break;
                                    }

                                    lastLetterWasEscape = false;

                                    // can only happen for ESCAPE_MODE_BACKSLASH
                                } else if (currentLetter == escapeChar) {
                                    updateCurrentValue();
                                    lastLetterWasEscape = true;
                                } else {
                                    if (lastLetterWasQualifier) {
                                        if (currentLetter == userSettings.Delimiter) {
                                            endColumn();
                                        } else if ((!useCustomRecordDelimiter
                                                && (currentLetter == Letters.CR || currentLetter == Letters.LF))
                                                || (useCustomRecordDelimiter
                                                        && currentLetter == userSettings.RecordDelimiter)) {
                                            endColumn();

                                            endRecord();
                                        } else {
                                            dataBuffer.ColumnStart = dataBuffer.Position + 1;

                                            eatingTrailingJunk = true;
                                        }

                                        // make sure to clear the flag for next
                                        // run of the loop

                                        lastLetterWasQualifier = false;
                                    }
                                }

                                // keep track of the last letter because we need
                                // it for several key decisions

                                lastLetter = currentLetter;

                                if (startedColumn) {
                                    dataBuffer.Position++;

                                    if (userSettings.SafetySwitch && dataBuffer.Position - dataBuffer.ColumnStart
                                            + columnBuffer.Position > 100000) {
                                        close();

                                        throw new IOException("Maximum column length of 100,000 exceeded in column "
                                                + NumberFormat.getIntegerInstance().format(columnsCount)
                                                + " in record "
                                                + NumberFormat.getIntegerInstance().format(currentRecord)
                                                + ". Set the SafetySwitch property to false"
                                                + " if you're expecting column lengths greater than 100,000 characters to"
                                                + " avoid this error.");
                                    }
                                }
                            } // end else

                        } while (hasMoreData && startedColumn);
                    } else if (currentLetter == userSettings.Delimiter) {
                        // we encountered a column with no data, so
                        // just send the end column

                        lastLetter = currentLetter;

                        endColumn();
                    } else if (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter) {
                        // this will skip blank lines
                        if (startedColumn || columnsCount > 0 || !userSettings.SkipEmptyRecords) {
                            endColumn();

                            endRecord();
                        } else {
                            dataBuffer.LineStart = dataBuffer.Position + 1;
                        }

                        lastLetter = currentLetter;
                    } else if (!useCustomRecordDelimiter
                            && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
                        // this will skip blank lines
                        if (startedColumn || columnsCount > 0 || (!userSettings.SkipEmptyRecords
                                && (currentLetter == Letters.CR || lastLetter != Letters.CR))) {
                            endColumn();

                            endRecord();
                        } else {
                            dataBuffer.LineStart = dataBuffer.Position + 1;
                        }

                        lastLetter = currentLetter;
                    } else if (userSettings.UseComments && columnsCount == 0
                            && currentLetter == userSettings.Comment) {
                        // encountered a comment character at the beginning of
                        // the line so just ignore the rest of the line

                        lastLetter = currentLetter;

                        skipLine();
                    } else if (userSettings.TrimWhitespace
                            && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
                        // do nothing, this will trim leading whitespace
                        // for both text qualified columns and non

                        startedColumn = true;
                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                    } else {
                        // since the letter wasn't a special letter, this
                        // will be the first letter of our current column

                        startedColumn = true;
                        dataBuffer.ColumnStart = dataBuffer.Position;
                        boolean lastLetterWasBackslash = false;
                        boolean readingComplexEscape = false;
                        int escape = ComplexEscape.UNICODE;
                        int escapeLength = 0;
                        char escapeValue = (char) 0;

                        boolean firstLoop = true;

                        do {
                            if (!firstLoop && dataBuffer.Position == dataBuffer.Count) {
                                checkDataLength();
                            } else {
                                if (!firstLoop) {
                                    // grab the current letter as a char
                                    currentLetter = dataBuffer.Buffer[dataBuffer.Position];
                                }

                                if (!userSettings.UseTextQualifier
                                        && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
                                        && currentLetter == Letters.BACKSLASH) {
                                    if (lastLetterWasBackslash) {
                                        lastLetterWasBackslash = false;
                                    } else {
                                        updateCurrentValue();
                                        lastLetterWasBackslash = true;
                                    }
                                } else if (readingComplexEscape) {
                                    escapeLength++;

                                    switch (escape) {
                                    case ComplexEscape.UNICODE:
                                        escapeValue *= (char) 16;
                                        escapeValue += hexToDec(currentLetter);

                                        if (escapeLength == 4) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.OCTAL:
                                        escapeValue *= (char) 8;
                                        escapeValue += (char) (currentLetter - '0');

                                        if (escapeLength == 3) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.DECIMAL:
                                        escapeValue *= (char) 10;
                                        escapeValue += (char) (currentLetter - '0');

                                        if (escapeLength == 3) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    case ComplexEscape.HEX:
                                        escapeValue *= (char) 16;
                                        escapeValue += hexToDec(currentLetter);

                                        if (escapeLength == 2) {
                                            readingComplexEscape = false;
                                        }

                                        break;
                                    }

                                    if (!readingComplexEscape) {
                                        appendLetter(escapeValue);
                                    } else {
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                                    }
                                } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
                                        && lastLetterWasBackslash) {
                                    switch (currentLetter) {
                                    case 'n':
                                        appendLetter(Letters.LF);
                                        break;
                                    case 'r':
                                        appendLetter(Letters.CR);
                                        break;
                                    case 't':
                                        appendLetter(Letters.TAB);
                                        break;
                                    case 'b':
                                        appendLetter(Letters.BACKSPACE);
                                        break;
                                    case 'f':
                                        appendLetter(Letters.FORM_FEED);
                                        break;
                                    case 'e':
                                        appendLetter(Letters.ESCAPE);
                                        break;
                                    case 'v':
                                        appendLetter(Letters.VERTICAL_TAB);
                                        break;
                                    case 'a':
                                        appendLetter(Letters.ALERT);
                                        break;
                                    case '0':
                                    case '1':
                                    case '2':
                                    case '3':
                                    case '4':
                                    case '5':
                                    case '6':
                                    case '7':
                                        escape = ComplexEscape.OCTAL;
                                        readingComplexEscape = true;
                                        escapeLength = 1;
                                        escapeValue = (char) (currentLetter - '0');
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;
                                        break;
                                    case 'u':
                                    case 'x':
                                    case 'o':
                                    case 'd':
                                    case 'U':
                                    case 'X':
                                    case 'O':
                                    case 'D':
                                        switch (currentLetter) {
                                        case 'u':
                                        case 'U':
                                            escape = ComplexEscape.UNICODE;
                                            break;
                                        case 'x':
                                        case 'X':
                                            escape = ComplexEscape.HEX;
                                            break;
                                        case 'o':
                                        case 'O':
                                            escape = ComplexEscape.OCTAL;
                                            break;
                                        case 'd':
                                        case 'D':
                                            escape = ComplexEscape.DECIMAL;
                                            break;
                                        }

                                        readingComplexEscape = true;
                                        escapeLength = 0;
                                        escapeValue = (char) 0;
                                        dataBuffer.ColumnStart = dataBuffer.Position + 1;

                                        break;
                                    default:
                                        break;
                                    }

                                    lastLetterWasBackslash = false;
                                } else {
                                    if (currentLetter == userSettings.Delimiter) {
                                        endColumn();
                                    } else if ((!useCustomRecordDelimiter
                                            && (currentLetter == Letters.CR || currentLetter == Letters.LF))
                                            || (useCustomRecordDelimiter
                                                    && currentLetter == userSettings.RecordDelimiter)) {
                                        endColumn();

                                        endRecord();
                                    }
                                }

                                // keep track of the last letter because we need
                                // it for several key decisions

                                lastLetter = currentLetter;
                                firstLoop = false;

                                if (startedColumn) {
                                    dataBuffer.Position++;

                                    if (userSettings.SafetySwitch && dataBuffer.Position - dataBuffer.ColumnStart
                                            + columnBuffer.Position > 100000) {
                                        close();

                                        throw new IOException("Maximum column length of 100,000 exceeded in column "
                                                + NumberFormat.getIntegerInstance().format(columnsCount)
                                                + " in record "
                                                + NumberFormat.getIntegerInstance().format(currentRecord)
                                                + ". Set the SafetySwitch property to false"
                                                + " if you're expecting column lengths greater than 100,000 characters to"
                                                + " avoid this error.");
                                    }
                                }
                            } // end else
                        } while (hasMoreData && startedColumn);
                    }

                    if (hasMoreData) {
                        dataBuffer.Position++;
                    }
                } // end else
            } while (hasMoreData && !hasReadNextLine);

            // check to see if we hit the end of the file
            // without processing the current record

            if (startedColumn || lastLetter == userSettings.Delimiter) {
                endColumn();

                endRecord();
            }
        }

        if (userSettings.CaptureRawRecord) {
            if (hasMoreData) {
                if (rawBuffer.Position == 0) {
                    rawRecord = new String(dataBuffer.Buffer, dataBuffer.LineStart,
                            dataBuffer.Position - dataBuffer.LineStart - 1);
                } else {
                    rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position) + new String(dataBuffer.Buffer,
                            dataBuffer.LineStart, dataBuffer.Position - dataBuffer.LineStart - 1);
                }
            } else {
                // for hasMoreData to ever be false, all data would have had to
                // have been
                // copied to the raw buffer
                rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);
            }
        } else {
            rawRecord = "";
        }

        return hasReadNextLine;
    }

    /**
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    private void checkDataLength() throws IOException {
        if (!initialized) {
            if (fileName != null) {
                inputStream = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charset),
                        StaticSettings.MAX_FILE_BUFFER_SIZE);
            }

            charset = null;
            initialized = true;
        }

        updateCurrentValue();

        if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
            if (rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count - dataBuffer.LineStart) {
                int newLength = rawBuffer.Buffer.length
                        + Math.max(dataBuffer.Count - dataBuffer.LineStart, rawBuffer.Buffer.length);

                char[] holder = new char[newLength];

                System.arraycopy(rawBuffer.Buffer, 0, holder, 0, rawBuffer.Position);

                rawBuffer.Buffer = holder;
            }

            System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart, rawBuffer.Buffer, rawBuffer.Position,
                    dataBuffer.Count - dataBuffer.LineStart);

            rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;
        }

        try {
            dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0, dataBuffer.Buffer.length);
        } catch (IOException ex) {
            close();

            throw ex;
        }

        // if no more data could be found, set flag stating that
        // the end of the data was found

        if (dataBuffer.Count == -1) {
            hasMoreData = false;
        }

        dataBuffer.Position = 0;
        dataBuffer.LineStart = 0;
        dataBuffer.ColumnStart = 0;
    }

    /**
     * Read the first record of data as column headers.
     * 
     * @return Whether the header record was successfully read or not.
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    public boolean readHeaders() throws IOException {
        boolean result = readRecord();

        // copy the header data from the column array
        // to the header string array

        headersHolder.Length = columnsCount;

        headersHolder.Headers = new String[columnsCount];

        for (int i = 0; i < headersHolder.Length; i++) {
            String columnValue = get(i);

            headersHolder.Headers[i] = columnValue;

            // if there are duplicate header names, we will save the last one
            headersHolder.IndexByName.put(columnValue, Integer.valueOf(i));
        }

        if (result) {
            currentRecord--;
        }

        columnsCount = 0;

        return result;
    }

    /**
     * Returns the column header value for a given column index.
     * 
     * @param columnIndex
     *            The index of the header column being requested.
     * @return The value of the column header at the given column index.
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    public String getHeader(int columnIndex) throws IOException {
        checkClosed();

        // check to see if we have read the header record yet

        // check to see if the column index is within the bounds
        // of our header array

        if (columnIndex > -1 && columnIndex < headersHolder.Length) {
            // return the processed header data for this column

            return headersHolder.Headers[columnIndex];
        } else {
            return "";
        }
    }

    public boolean isQualified(int columnIndex) throws IOException {
        checkClosed();

        if (columnIndex < columnsCount && columnIndex > -1) {
            return isQualified[columnIndex];
        } else {
            return false;
        }
    }

    /**
     * @exception IOException
     *                Thrown if a very rare extreme exception occurs during
     *                parsing, normally resulting from improper data format.
     */
    private void endColumn() throws IOException {
        String currentValue = "";

        // must be called before setting startedColumn = false
        if (startedColumn) {
            if (columnBuffer.Position == 0) {
                if (dataBuffer.ColumnStart < dataBuffer.Position) {
                    int lastLetter = dataBuffer.Position - 1;

                    if (userSettings.TrimWhitespace && !startedWithQualifier) {
                        while (lastLetter >= dataBuffer.ColumnStart
                                && (dataBuffer.Buffer[lastLetter] == Letters.SPACE
                                        || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
                            lastLetter--;
                        }
                    }

                    currentValue = new String(dataBuffer.Buffer, dataBuffer.ColumnStart,
                            lastLetter - dataBuffer.ColumnStart + 1);
                }
            } else {
                updateCurrentValue();

                int lastLetter = columnBuffer.Position - 1;

                if (userSettings.TrimWhitespace && !startedWithQualifier) {
                    while (lastLetter >= 0 && (columnBuffer.Buffer[lastLetter] == Letters.SPACE
                            || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
                        lastLetter--;
                    }
                }

                currentValue = new String(columnBuffer.Buffer, 0, lastLetter + 1);
            }
        }

        columnBuffer.Position = 0;

        startedColumn = false;

        if (columnsCount >= 100000 && userSettings.SafetySwitch) {
            close();

            throw new IOException("Maximum column count of 100,000 exceeded in record "
                    + NumberFormat.getIntegerInstance().format(currentRecord)
                    + ". Set the SafetySwitch property to false"
                    + " if you're expecting more than 100,000 columns per record to" + " avoid this error.");
        }

        // check to see if our current holder array for
        // column chunks is still big enough to handle another
        // column chunk

        if (columnsCount == values.length) {
            // holder array needs to grow to be able to hold another column
            int newLength = values.length * 2;

            String[] holder = new String[newLength];

            System.arraycopy(values, 0, holder, 0, values.length);

            values = holder;

            boolean[] qualifiedHolder = new boolean[newLength];

            System.arraycopy(isQualified, 0, qualifiedHolder, 0, isQualified.length);

            isQualified = qualifiedHolder;
        }

        values[columnsCount] = currentValue;

        isQualified[columnsCount] = startedWithQualifier;

        currentValue = "";

        columnsCount++;
    }

    private void appendLetter(char letter) {
        if (columnBuffer.Position == columnBuffer.Buffer.length) {
            int newLength = columnBuffer.Buffer.length * 2;

            char[] holder = new char[newLength];

            System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position);

            columnBuffer.Buffer = holder;
        }
        columnBuffer.Buffer[columnBuffer.Position++] = letter;
        dataBuffer.ColumnStart = dataBuffer.Position + 1;
    }

    private void updateCurrentValue() {
        if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {
            if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position - dataBuffer.ColumnStart) {
                int newLength = columnBuffer.Buffer.length
                        + Math.max(dataBuffer.Position - dataBuffer.ColumnStart, columnBuffer.Buffer.length);

                char[] holder = new char[newLength];

                System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position);

                columnBuffer.Buffer = holder;
            }

            System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart, columnBuffer.Buffer, columnBuffer.Position,
                    dataBuffer.Position - dataBuffer.ColumnStart);

            columnBuffer.Position += dataBuffer.Position - dataBuffer.ColumnStart;
        }

        dataBuffer.ColumnStart = dataBuffer.Position + 1;
    }

    /**
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    private void endRecord() throws IOException {
        // this flag is used as a loop exit condition
        // during parsing

        hasReadNextLine = true;

        currentRecord++;
    }

    /**
     * Gets the corresponding column index for a given column header name.
     * 
     * @param headerName
     *            The header name of the column.
     * @return The column index for the given column header name.&nbsp;Returns
     *         -1 if not found.
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    public int getIndex(String headerName) throws IOException {
        checkClosed();

        Integer indexValue = headersHolder.IndexByName.get(headerName);

        if (indexValue != null) {
            return indexValue.intValue();
        } else {
            return -1;
        }
    }

    /**
     * Skips the next record of data by parsing each column.&nbsp;Does not
     * increment
     * {@link com.csvreader.CsvReader#getCurrentRecord getCurrentRecord()}.
     * 
     * @return Whether another record was successfully skipped or not.
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    public boolean skipRecord() throws IOException {
        checkClosed();

        boolean recordRead = false;

        if (hasMoreData) {
            recordRead = readRecord();

            if (recordRead) {
                currentRecord--;
            }
        }

        return recordRead;
    }

    /**
     * Skips the next line of data using the standard end of line characters and
     * does not do any column delimited parsing.
     * 
     * @return Whether a line was successfully skipped or not.
     * @exception IOException
     *                Thrown if an error occurs while reading data from the
     *                source stream.
     */
    public boolean skipLine() throws IOException {
        checkClosed();

        // clear public column values for current line

        columnsCount = 0;

        boolean skippedLine = false;

        if (hasMoreData) {
            boolean foundEol = false;

            do {
                if (dataBuffer.Position == dataBuffer.Count) {
                    checkDataLength();
                } else {
                    skippedLine = true;

                    // grab the current letter as a char

                    char currentLetter = dataBuffer.Buffer[dataBuffer.Position];

                    if (currentLetter == Letters.CR || currentLetter == Letters.LF) {
                        foundEol = true;
                    }

                    // keep track of the last letter because we need
                    // it for several key decisions

                    lastLetter = currentLetter;

                    if (!foundEol) {
                        dataBuffer.Position++;
                    }

                } // end else
            } while (hasMoreData && !foundEol);

            columnBuffer.Position = 0;

            dataBuffer.LineStart = dataBuffer.Position + 1;
        }

        rawBuffer.Position = 0;
        rawRecord = "";

        return skippedLine;
    }

    /**
     * Closes and releases all related resources.
     */
    public void close() {
        if (!closed) {
            close(true);

            closed = true;
        }
    }

    /**
     * 
     */
    private void close(boolean closing) {
        if (!closed) {
            if (closing) {
                charset = null;
                headersHolder.Headers = null;
                headersHolder.IndexByName = null;
                dataBuffer.Buffer = null;
                columnBuffer.Buffer = null;
                rawBuffer.Buffer = null;
            }

            try {
                if (initialized) {
                    inputStream.close();
                }
            } catch (Exception e) {
                // just eat the exception
            }

            inputStream = null;

            closed = true;
        }
    }

    /**
     * @exception IOException
     *                Thrown if this object has already been closed.
     */
    private void checkClosed() throws IOException {
        if (closed) {
            throw new IOException("This instance of the CsvReader class has already been closed.");
        }
    }

    /**
     * 
     */
    protected void finalize() {
        close(false);
    }

    private class ComplexEscape {
        private static final int UNICODE = 1;

        private static final int OCTAL = 2;

        private static final int DECIMAL = 3;

        private static final int HEX = 4;
    }

    private static char hexToDec(char hex) {
        char result;

        if (hex >= 'a') {
            result = (char) (hex - 'a' + 10);
        } else if (hex >= 'A') {
            result = (char) (hex - 'A' + 10);
        } else {
            result = (char) (hex - '0');
        }

        return result;
    }

    private class DataBuffer {
        public char[] Buffer;

        public int Position;

        // / <summary>
        // / How much usable data has been read into the stream,
        // / which will not always be as long as Buffer.Length.
        // / </summary>
        public int Count;

        // / <summary>
        // / The position of the cursor in the buffer when the
        // / current column was started or the last time data
        // / was moved out to the column buffer.
        // / </summary>
        public int ColumnStart;

        public int LineStart;

        public DataBuffer() {
            Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
            Position = 0;
            Count = 0;
            ColumnStart = 0;
            LineStart = 0;
        }
    }

    private class ColumnBuffer {
        public char[] Buffer;

        public int Position;

        public ColumnBuffer() {
            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
            Position = 0;
        }
    }

    private class RawRecordBuffer {
        public char[] Buffer;

        public int Position;

        public RawRecordBuffer() {
            Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE * StaticSettings.INITIAL_COLUMN_COUNT];
            Position = 0;
        }
    }

    private class Letters {
        public static final char LF = '\n';

        public static final char CR = '\r';

        public static final char QUOTE = '"';

        public static final char COMMA = ',';

        public static final char SPACE = ' ';

        public static final char TAB = '\t';

        public static final char POUND = '#';

        public static final char BACKSLASH = '\\';

        public static final char NULL = '\0';

        public static final char BACKSPACE = '\b';

        public static final char FORM_FEED = '\f';

        public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape

        public static final char VERTICAL_TAB = '\u000B';

        public static final char ALERT = '\u0007';
    }

    private class UserSettings {
        // having these as publicly accessible members will prevent
        // the overhead of the method call that exists on properties
        public boolean CaseSensitive;

        public char TextQualifier;

        public boolean TrimWhitespace;

        public boolean UseTextQualifier;

        public char Delimiter;

        public char RecordDelimiter;

        public char Comment;

        public boolean UseComments;

        public int EscapeMode;

        public boolean SafetySwitch;

        public boolean SkipEmptyRecords;

        public boolean CaptureRawRecord;

        public UserSettings() {
            CaseSensitive = true;
            TextQualifier = Letters.QUOTE;
            TrimWhitespace = true;
            UseTextQualifier = true;
            Delimiter = Letters.COMMA;
            RecordDelimiter = Letters.NULL;
            Comment = Letters.POUND;
            UseComments = false;
            EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED;
            SafetySwitch = true;
            SkipEmptyRecords = true;
            CaptureRawRecord = true;
        }
    }

    private class HeadersHolder {
        public String[] Headers;

        public int Length;

        public HashMap<String, Integer> IndexByName;

        public HeadersHolder() {
            Headers = null;
            Length = 0;
            IndexByName = new HashMap<String, Integer>();
        }
    }

    private class StaticSettings {
        // these are static instead of final so they can be changed in unit test
        // isn't visible outside this class and is only accessed once during
        // CsvReader construction
        public static final int MAX_BUFFER_SIZE = 1024;

        public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;

        public static final int INITIAL_COLUMN_COUNT = 10;

        public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
    }
}