CSVFile.java Source code

Java tutorial

Introduction

Here is the source code for CSVFile.java

Source

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */
import java.io.*;
import java.util.*;

/** CSV in action: lines from a file and print. */
public class CSVFile {

    public static void main(String[] args) throws IOException {

        // Construct a new CSV parser.
        CSV csv = new CSV();

        if (args.length == 0) { // read standard input
            BufferedReader is = new BufferedReader(new InputStreamReader(System.in));
            process(csv, is);
        } else {
            for (int i = 0; i < args.length; i++) {
                process(csv, new BufferedReader(new FileReader(args[i])));
            }
        }
    }

    protected static void process(CSV csv, BufferedReader is) throws IOException {
        String line;
        while ((line = is.readLine()) != null) {
            System.out.println("line = `" + line + "'");
            Iterator e = csv.parse(line).iterator();
            int i = 0;
            while (e.hasNext())
                System.out.println("field[" + i++ + "] = `" + e.next() + "'");
        }
    }
}

/** Parse comma-separated values (CSV), a common Windows file format.
 * Sample input: "LU",86.25,"11/4/1998","2:19PM",+4.0625
 * <p>
 * Inner logic adapted from a C++ original that was
 * Copyright (C) 1999 Lucent Technologies
 * Excerpted from 'The Practice of Programming'
 * by Brian W. Kernighan and Rob Pike.
 * <p>
 * Included by permission of the http://tpop.awl.com/ web site, 
 * which says:
 * "You may use this code for any purpose, as long as you leave 
 * the copyright notice and book citation attached." I have done so.
 * @author Brian W. Kernighan and Rob Pike (C++ original)
 * @author Ian F. Darwin (translation into Java and removal of I/O)
 * @author Ben Ballard (rewrote advQuoted to handle '""' and for readability)
 */
class CSV {

    public static final char DEFAULT_SEP = ',';

    /** Construct a CSV parser, with the default separator (`,'). */
    public CSV() {
        this(DEFAULT_SEP);
    }

    /** Construct a CSV parser with a given separator. 
     * @param sep The single char for the separator (not a list of
     * separator characters)
     */
    public CSV(char sep) {
        fieldSep = sep;
    }

    /** The fields in the current String */
    protected List list = new ArrayList();

    /** the separator char for this parser */
    protected char fieldSep;

    /** parse: break the input String into fields
     * @return java.util.Iterator containing each field 
     * from the original as a String, in order.
     */
    public List parse(String line) {
        StringBuffer sb = new StringBuffer();
        list.clear(); // recycle to initial state
        int i = 0;

        if (line.length() == 0) {
            list.add(line);
            return list;
        }

        do {
            sb.setLength(0);
            if (i < line.length() && line.charAt(i) == '"')
                i = advQuoted(line, sb, ++i); // skip quote
            else
                i = advPlain(line, sb, i);
            list.add(sb.toString());
            i++;
        } while (i < line.length());

        return list;
    }

    /** advQuoted: quoted field; return index of next separator */
    protected int advQuoted(String s, StringBuffer sb, int i) {
        int j;
        int len = s.length();
        for (j = i; j < len; j++) {
            if (s.charAt(j) == '"' && j + 1 < len) {
                if (s.charAt(j + 1) == '"') {
                    j++; // skip escape char
                } else if (s.charAt(j + 1) == fieldSep) { //next delimeter
                    j++; // skip end quotes
                    break;
                }
            } else if (s.charAt(j) == '"' && j + 1 == len) { // end quotes at end of line
                break; //done
            }
            sb.append(s.charAt(j)); // regular character.
        }
        return j;
    }

    /** advPlain: unquoted field; return index of next separator */
    protected int advPlain(String s, StringBuffer sb, int i) {
        int j;

        j = s.indexOf(fieldSep, i); // look for separator
        if (j == -1) { // none found
            sb.append(s.substring(i));
            return s.length();
        } else {
            sb.append(s.substring(i, j));
            return j;
        }
    }
}
//File:CSV.dat
/*
"Hello","World"
123,456
"LU",86.25|"11/4/1998"|"2:19PM"|+4.0625
bad "input",123e01
XYZZY,""|"OReilly & Associates| Inc."|"Darwin| Ian"|"a \"glug\" bit|"|5|"Memory fault| core NOT dumped"
    
*/