CSV in action: lines from a file and print
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.io.*;
import java.util.*;
/** CSV in action: lines from a file and print. */
public class CSVFile {
public static void main(String[] args) throws IOException {
// Construct a new CSV parser.
CSV csv = new CSV();
if (args.length == 0) { // read standard input
BufferedReader is = new BufferedReader(
new InputStreamReader(System.in));
process(csv, is);
} else {
for (int i=0; i<args.length; i++) {
process(csv, new BufferedReader(new FileReader(args[i])));
}
}
}
protected static void process(CSV csv, BufferedReader is) throws IOException {
String line;
while ((line = is.readLine()) != null) {
System.out.println("line = `" + line + "'");
Iterator e = csv.parse(line).iterator();
int i = 0;
while (e.hasNext())
System.out.println("field[" + i++ + "] = `" +
e.next() + "'");
}
}
}
/** Parse comma-separated values (CSV), a common Windows file format.
* Sample input: "LU",86.25,"11/4/1998","2:19PM",+4.0625
* <p>
* Inner logic adapted from a C++ original that was
* Copyright (C) 1999 Lucent Technologies
* Excerpted from 'The Practice of Programming'
* by Brian W. Kernighan and Rob Pike.
* <p>
* Included by permission of the http://tpop.awl.com/ web site,
* which says:
* "You may use this code for any purpose, as long as you leave
* the copyright notice and book citation attached." I have done so.
* @author Brian W. Kernighan and Rob Pike (C++ original)
* @author Ian F. Darwin (translation into Java and removal of I/O)
* @author Ben Ballard (rewrote advQuoted to handle '""' and for readability)
*/
class CSV {
public static final char DEFAULT_SEP = ',';
/** Construct a CSV parser, with the default separator (`,'). */
public CSV() {
this(DEFAULT_SEP);
}
/** Construct a CSV parser with a given separator.
* @param sep The single char for the separator (not a list of
* separator characters)
*/
public CSV(char sep) {
fieldSep = sep;
}
/** The fields in the current String */
protected List list = new ArrayList();
/** the separator char for this parser */
protected char fieldSep;
/** parse: break the input String into fields
* @return java.util.Iterator containing each field
* from the original as a String, in order.
*/
public List parse(String line)
{
StringBuffer sb = new StringBuffer();
list.clear(); // recycle to initial state
int i = 0;
if (line.length() == 0) {
list.add(line);
return list;
}
do {
sb.setLength(0);
if (i < line.length() && line.charAt(i) == '"')
i = advQuoted(line, sb, ++i); // skip quote
else
i = advPlain(line, sb, i);
list.add(sb.toString());
i++;
} while (i < line.length());
return list;
}
/** advQuoted: quoted field; return index of next separator */
protected int advQuoted(String s, StringBuffer sb, int i)
{
int j;
int len= s.length();
for (j=i; j<len; j++) {
if (s.charAt(j) == '"' && j+1 < len) {
if (s.charAt(j+1) == '"') {
j++; // skip escape char
} else if (s.charAt(j+1) == fieldSep) { //next delimeter
j++; // skip end quotes
break;
}
} else if (s.charAt(j) == '"' && j+1 == len) { // end quotes at end of line
break; //done
}
sb.append(s.charAt(j)); // regular character.
}
return j;
}
/** advPlain: unquoted field; return index of next separator */
protected int advPlain(String s, StringBuffer sb, int i)
{
int j;
j = s.indexOf(fieldSep, i); // look for separator
if (j == -1) { // none found
sb.append(s.substring(i));
return s.length();
} else {
sb.append(s.substring(i, j));
return j;
}
}
}
//File:CSV.dat
/*
"Hello","World"
123,456
"LU",86.25|"11/4/1998"|"2:19PM"|+4.0625
bad "input",123e01
XYZZY,""|"OReilly & Associates| Inc."|"Darwin| Ian"|"a \"glug\" bit|"|5|"Memory fault| core NOT dumped"
*/
Related examples in the same category