Here you can find the source of split(String inputStr, String delimeter, String enclosureStr)
Use case #1: where delimeter character doesn't appear within the pair(s) of enclosure characters in input data
Example: inputData = "'20003', 'johndoe111@yahoo.com', 'John Doe', 'user', '0'";
Use case #2: where delimeter DOES appear within the pair(s) of enclosure
Example: inputData = "'20003', 'johndoe111@yahoo.com', 'John,, ,Doe', 'user', '0'";
Use case #3: where no enclosure pairs are specified in the input data
Example: inputData = "20003, johndoe111@yahoo.com, John Doe, user, 0";
The maximum splitting limit is imposed and set at 1000 internally.
Parameter | Description |
---|---|
inputStr | input string to be split |
delimeter | string containing one or more characters; e.g. "," |
enclosureStr | string containing one or more characters; e.g. "'" |
public static String[] split(String inputStr, String delimeter, String enclosureStr)
//package com.java2s; /*//from w w w . j av a2s . c o m * Copyright (C) 2010-2012, Wan Lee, wan5332@gmail.com * Source can be obtained from git://github.com/wanclee/datashaper.git * BSD-style license. Please read license.txt that comes with source files */ import java.util.ArrayList; import java.util.List; public class Main { private static final int MAX_LIMIT = 1000; /** * Custom String split method, which handle the splitting of string * containing a delimeter and enclosure <p> Use case #1: where delimeter * character doesn't appear within the pair(s) of enclosure characters in * input data <p> Example: inputData = "'20003', 'johndoe111@yahoo.com', * 'John Doe', 'user', '0'"; <p> Use case #2: where delimeter DOES appear * within the pair(s) of enclosure <p> Example: inputData = "'20003', * 'johndoe111@yahoo.com', 'John,, ,Doe', 'user', '0'"; <p> Use case #3: * where no enclosure pairs are specified in the input data <p> Example: * inputData = "20003, johndoe111@yahoo.com, John Doe, user, 0"; <p> The * maximum splitting limit is imposed and set at 1000 internally. When * delimeter appears within a pair of enclosures around a token in the input * string, the delimeter in the string token will be preserved. All white * spaces, if exist, at both ends of the token are trimmed. * * @param inputStr input string to be split * @param delimeter string containing one or more characters; e.g. "," * @param enclosureStr string containing one or more characters; e.g. "'" * @return String[] string array; when inputStr null or empty, returns * zero-size string array */ public static String[] split(String inputStr, String delimeter, String enclosureStr) { //Note: do not change order of these checks if (inputStr == null || inputStr.isEmpty()) { return new String[0]; //return a zero-size string array } if (delimeter == null || delimeter.isEmpty()) { String[] strArr = new String[1]; strArr[0] = inputStr; return strArr; } String enclosure = (enclosureStr != null) ? enclosureStr : ""; //prevent run-away splitting String[] strArr = inputStr.split(delimeter, MAX_LIMIT); StringBuilder temp = new StringBuilder(); boolean notProperlyEnclosed = false; int resetAtIndex = -1; for (int i = 0; i < strArr.length; i++) { //keep the original token before trimming; //and accumulate it for handling case 2 if it happens if (!enclosure.isEmpty()) { temp.append(strArr[i]); } strArr[i] = strArr[i].trim(); //remove white spaces if (strArr[i].startsWith(enclosure) && strArr[i].endsWith(enclosure)) { //straight forward case 1 strArr[i] = strArr[i].substring(enclosure.length(), strArr[i].length() - enclosure.length()); //empty the temp buffer temp.delete(0, temp.length()); } else { //handle case 2 notProperlyEnclosed = true; if (strArr[i].startsWith(enclosure)) { //give back the delimeter character temp.append(delimeter); resetAtIndex = i; } else if (strArr[i].endsWith(enclosure)) { // all tokens have been re-assembled strArr[resetAtIndex] = temp.toString(); //remove any white spaces at both ends strArr[resetAtIndex] = strArr[resetAtIndex].trim(); strArr[resetAtIndex] = strArr[resetAtIndex].substring( enclosure.length(), strArr[resetAtIndex].length() - enclosure.length()); temp.delete(0, temp.length()); //empty the temp buffer //empty the string at this index location strArr[i] = "{{**marked-as-dont-care**}}"; resetAtIndex = -1; //reset } else { //the is the case where the token has no enclosure //character at both ends //give back the delimeter character temp.append(delimeter); //empty the string at this index location strArr[i] = "{{**marked-as-dont-care**}}"; } } } if (notProperlyEnclosed) { List<String> newStrArr = new ArrayList<String>(); for (int i = 0; i < strArr.length; i++) { //if (strArr[i] != null && !strArr[i].isEmpty()) { if (strArr[i] != null && !strArr[i].equals("{{**marked-as-dont-care**}}")) { newStrArr.add(strArr[i]); } } String str[] = (String[]) newStrArr .toArray(new String[newStrArr.size()]); return str; } return strArr; } }