CSVRE.java Source code

Java tutorial

Introduction

Here is the source code for CSVRE.java

Source

/*
 * Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
 * All rights reserved. Software written by Ian F. Darwin and others.
 * $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
 * cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
 * pioneering role in inventing and promulgating (and standardizing) the Java 
 * language and environment is gratefully acknowledged.
 * 
 * The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
 * inventing predecessor languages C and C++ is also gratefully acknowledged.
 */

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* Simple demo of CSV matching using Regular Expressions.
 * Does NOT use the "CSV" class defined in the Java CookBook, but uses
 * a regex pattern simplified from Chapter 7 of <em>Mastering Regular 
 * Expressions</em> (p. 205, first edn.)
 * @version $Id: CSVRE.java,v 1.16 2004/04/25 19:43:32 ian Exp $
 */
public class CSVRE {
    /** The rather involved pattern used to match CSV's consists of three
     * alternations: the first matches aquoted field, the second unquoted,
     * the third a null field.
     */
    public static final String CSV_PATTERN = "\"([^\"]+?)\",?|([^,]+),?|,";
    private static Pattern csvRE;

    public static void main(String[] argv) throws IOException {
        System.out.println(CSV_PATTERN);
        new CSVRE().process(new BufferedReader(new InputStreamReader(System.in)));
    }

    /** Construct a regex-based CSV parser. */
    public CSVRE() {
        csvRE = Pattern.compile(CSV_PATTERN);
    }

    /** Process one file. Delegates to parse() a line at a time */
    public void process(BufferedReader in) throws IOException {
        String line;

        // For each line...
        while ((line = in.readLine()) != null) {
            System.out.println("line = `" + line + "'");
            List l = parse(line);
            System.out.println("Found " + l.size() + " items.");
            for (int i = 0; i < l.size(); i++) {
                System.out.print(l.get(i) + ",");
            }
            System.out.println();
        }
    }

    /** Parse one line.
     * @return List of Strings, minus their double quotes
     */
    public List parse(String line) {
        List list = new ArrayList();
        Matcher m = csvRE.matcher(line);
        // For each field
        while (m.find()) {
            String match = m.group();
            if (match == null)
                break;
            if (match.endsWith(",")) { // trim trailing ,
                match = match.substring(0, match.length() - 1);
            }
            if (match.startsWith("\"")) { // assume also ends with
                match = match.substring(1, match.length() - 1);
            }
            if (match.length() == 0)
                match = null;
            list.add(match);
        }
        return list;
    }
}