Java CSV String Split splitCSV(String str, String delim)

Here you can find the source of splitCSV(String str, String delim)

Description

Splits a string by a specified delimiter into all tokens, including empty while respecting the rules for quotes and escapes defined in RFC4180.

License

Apache License

Parameter

Parameter Description
str string to split
delim delimiter

Return

string array

Declaration

public static String[] splitCSV(String str, String delim) 

Method Source Code

//package com.java2s;
/*/*w  w  w. j a v a2 s.com*/
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.util.ArrayList;

public class Main {
    private static final char CSV_QUOTE_CHAR = '"';

    /**
     * Splits a string by a specified delimiter into all tokens, including empty
     * while respecting the rules for quotes and escapes defined in RFC4180.
     * 
     * NOTE: use StringEscapeUtils.unescapeCsv(tmp) if needed afterwards.
     * 
     * @param str string to split
     * @param delim delimiter
     * @return string array
     */
    public static String[] splitCSV(String str, String delim) {
        // check for empty input
        if (str == null || str.isEmpty())
            return new String[] { "" };

        // scan string and create individual tokens
        ArrayList<String> tokens = new ArrayList<String>();
        int from = 0, to = 0;
        int len = str.length();
        while (from < len) { // for all tokens
            if (str.charAt(from) == CSV_QUOTE_CHAR && str.indexOf(CSV_QUOTE_CHAR, from + 1) > 0) {
                to = str.indexOf(CSV_QUOTE_CHAR, from + 1);
                // handle escaped inner quotes, e.g. "aa""a"
                while (to + 1 < len && str.charAt(to + 1) == CSV_QUOTE_CHAR)
                    to = str.indexOf(CSV_QUOTE_CHAR, to + 2); // to + ""
                to += 1; // last "
            } else if (str.regionMatches(from, delim, 0, delim.length())) {
                to = from; // empty string
            } else { // default: unquoted non-empty
                to = str.indexOf(delim, from + 1);
            }

            // slice out token and advance position
            to = (to >= 0) ? to : len;
            tokens.add(str.substring(from, to));
            from = to + delim.length();
        }

        // handle empty string at end
        if (from == len)
            tokens.add("");

        // return tokens
        return tokens.toArray(new String[0]);
    }

    public static String[] splitCSV(String str, String delim, String[] tokens) {
        // check for empty input
        if (str == null || str.isEmpty())
            return new String[] { "" };

        // scan string and create individual tokens
        int from = 0, to = 0;
        int len = str.length();
        int pos = 0;
        while (from < len) { // for all tokens
            if (str.charAt(from) == CSV_QUOTE_CHAR && str.indexOf(CSV_QUOTE_CHAR, from + 1) > 0) {
                to = str.indexOf(CSV_QUOTE_CHAR, from + 1);
                // handle escaped inner quotes, e.g. "aa""a"
                while (to + 1 < len && str.charAt(to + 1) == CSV_QUOTE_CHAR)
                    to = str.indexOf(CSV_QUOTE_CHAR, to + 2); // to + ""
                to += 1; // last "
            } else if (str.regionMatches(from, delim, 0, delim.length())) {
                to = from; // empty string
            } else { // default: unquoted non-empty
                to = str.indexOf(delim, from + 1);
            }

            // slice out token and advance position
            to = (to >= 0) ? to : len;
            tokens[pos++] = str.substring(from, to);
            from = to + delim.length();
        }

        // handle empty string at end
        if (from == len)
            tokens[pos] = "";

        // return tokens
        return tokens;
    }
}

Related

  1. SplitCSV(String csv)
  2. splitCSV(String inputString)
  3. splitCSV(String str)
  4. splitCSV(String str)
  5. splitCSV(String str)
  6. SplitCSVString(String str)
  7. tokenizeCsv(String input)
  8. toTable(String csv)