Java tutorial
/* * This file is part of the Spider Web Framework. * * The Spider Web Framework is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * The Spider Web Framework is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with the Spider Web Framework. If not, see <http://www.gnu.org/licenses/>. */ package com.medallia.tiny; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Pattern; import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Various utility functions for working with String objects. * * @author kristian */ public class Strings { private static final Log log = LogFactory.getLog(Strings.class); /** @return false if any of the strings are null or have zero trimmed length */ public static boolean allHaveContent(String... strings) { for (String s : strings) if (!hasContent(s)) return false; return true; } /** @return false if the string is null or has zero trimmed length */ public static boolean hasContent(String s) { return s != null && s.trim().length() != 0; } /** Parses the input string as an integer. Returns null if the string is null, empty or does not contain a parsable integer. */ public static Integer parseIntOrNull(String s) { if (!hasContent(s)) return null; try { return Integer.parseInt(s); } catch (NumberFormatException nfe) { return null; } } /** Throws AssertionError if any of the strings are null or have zero length */ public static void assertAllHaveContent(String... strings) { if (!allHaveContent(strings)) throw new AssertionError("String is null or blank: " + Arrays.asList(strings)); } /** Does exactly what you think */ public static String rot13(String s) { StringBuilder sb = new StringBuilder(s); for (int i = 0; i < sb.length(); i++) { char c = sb.charAt(i); if ((c >= 'a' && c <= 'm') || (c >= 'A' && c <= 'M')) { c += 13; } else if ((c >= 'n' && c <= 'z') || (c >= 'N' && c <= 'Z')) { c -= 13; } sb.setCharAt(i, c); } return sb.toString(); } /** Return the given string with any white space on the right side removed */ public static String trimRight(String s) { int k = s.length(); while (k > 0 && Character.isWhitespace(s.charAt(k - 1))) k--; return s.substring(0, k); } /** Return the given string with any white space on the left side removed */ public static String trimLeft(String s) { int k = 0; while (k < s.length() && Character.isWhitespace(s.charAt(k))) k++; return s.substring(k, s.length()); } /** * @return a new array including only those strings which are non-empty * after trim(). If trim is true then the array returned will include the * trimmed version of the strings. */ public static List<String> removeEmptyAfterTrim(boolean trim, List<String> strings) { List<String> l = Empty.list(); for (String s : strings) { if (s != null) { String t = s.trim(); if (t.length() > 0) l.add(trim ? t : s); } } return l; } /** Forwards to {@link #removeEmptyAfterTrim(boolean, Collection)} */ public static String[] removeEmptyAfterTrim(boolean trim, String... s) { return removeEmptyAfterTrim(trim, Arrays.asList(s)).toArray(new String[0]); } private static final Pattern COLLAPSE_WHITESPACE_PATTERN = Pattern.compile("\\s+"); /** Replace any sequence of whitespace with a single space and trim */ public static String collapseWhitespace(String s) { return COLLAPSE_WHITESPACE_PATTERN.matcher(s).replaceAll(" ").trim(); } private static final Pattern SINGLE_WORDS_PATTERN = Pattern.compile("\\W+"); /** Replace all non-word characters with a space and trim */ public static String singleWords(String s) { return SINGLE_WORDS_PATTERN.matcher(s).replaceAll(" ").trim(); } /** * Compare two strings for equality, after running collapseWhitespace * @return whether the two strings have the same black content */ public static boolean equalsAfterWhitespaceCollapse(String a, String b) { if (a == null && b == null) return true; if (a == null || b == null) return false; return collapseWhitespace(a).equals(collapseWhitespace(b)); } /** Forwards to {@link #removeEmptyAfterTrim(boolean, List)} */ public static String[] trimRemoveEmpty(String... s) { return removeEmptyAfterTrim(true, s); } /** @return the array with each string trimmed if it is non-null */ public static String[] trim(String... s) { for (int i = 0; i < s.length; i++) if (s[i] != null) s[i] = s[i].trim(); return s; } /** @return the array with each string trimmed if it is non-null */ public static List<String> trim(List<String> l) { return Arrays.asList(trim(l.toArray(new String[0]))); } /** Forwards to {@link #removeEmptyAfterTrim(boolean, List)} with 'true' for the first argument */ public static List<String> trimRemoveEmpty(List<String> l) { return removeEmptyAfterTrim(true, l); } /** @return the string trimmed if it is non-null, otherwise null */ public static String trimmed(String s) { return trim(s)[0]; } /** Extract String from obj; if it is a String[], take the first element, otherwise toString */ public static String extract(Object obj) { if (obj == null) return null; if (obj instanceof String[]) { String[] ss = (String[]) obj; if (ss.length > 1) { log.warn("more than one entry in request parameter: " + Arrays.toString(ss), new Throwable()); } if (ss.length == 0) { log.warn("no entries in request parameter", new Throwable()); return null; } return ss[0]; } return obj.toString(); } /** Function for properly capitializing english names. * * Original code found here: http://freejava.info/capitalize-english-names/ * Permission was given (by email to Kristian Eide on 2006-06-29 from the author * Dimiter Petrov <hinotori2772@gmail.com> to use the code as we see fit. */ public static String capitalizeAndTrimEnglishNames(String txt) { if (txt == null) return ""; String lcName = txt.toLowerCase().trim(); if (lcName.indexOf(' ') == -1) return capitalizeSingleEnglishName(lcName); StringBuilder res = new StringBuilder(lcName.length()); String[] names = lcName.split(" "); for (int i = 0; i < names.length; i++) { if (i > 0) res.append(" "); res.append(capitalizeSingleEnglishName(names[i])); } return res.toString(); } private static String capitalizeSingleEnglishName(String lcName) { if (lcName == null || lcName.length() == 0) return ""; String exFound = enCapExceptions.get(lcName); if (exFound != null) return exFound; StringBuilder res = new StringBuilder(lcName.length()); int i; int n = lcName.length(); if (lcName.startsWith("d'")) { res.append("d'"); i = 2; } else if (lcName.startsWith("mc")) { res.append("Mc"); if (n > 2) res.append(Character.toUpperCase(lcName.charAt(2))); i = 3; } else if (lcName.startsWith("mac")) { res.append("Mac"); if (n > 3) res.append(Character.toUpperCase(lcName.charAt(3))); i = 4; } else { res.append(Character.toUpperCase(lcName.charAt(0))); i = 1; } for (; i < n; i++) { if (lcName.charAt(i) == ' ' && (i > 0) && (lcName.charAt(i - 1) != ' ')) res.append(' '); if (i == 0) res.append(Character.toUpperCase(lcName.charAt(i))); else { switch (lcName.charAt(i - 1)) { case '-': case '.': case ' ': res.append(Character.toUpperCase(lcName.charAt(i))); break; case '\'': if (i == (n - 1)) res.append(lcName.charAt(i)); else res.append(Character.toUpperCase(lcName.charAt(i))); break; default: res.append(lcName.charAt(i)); break; } } } return res.toString(); } private static final Map<String, String> enCapExceptions = Empty.hashMap(); static { enCapExceptions.put("macintosh", "Macintosh"); enCapExceptions.put("von", "von"); enCapExceptions.put("van", "van"); enCapExceptions.put("de", "de"); enCapExceptions.put("la", "la"); enCapExceptions.put("da", "da"); enCapExceptions.put("di", "di"); } /** * Splits the given string into a list of tokens separated by sep. * Returns an empty list if str is null. */ public static List<String> split(String str, String sep) { return split(str, sep, false); } /** * Splits the given string into a list of tokens separated by sep. * @param trim If true, tokens are trimmed. * @return The list of tokens or an empty list if str is null. */ public static List<String> split(String str, String sep, boolean trim) { List<String> l = Empty.list(); if (str != null) { StringTokenizer st = new StringTokenizer(str, sep); while (st.hasMoreTokens()) l.add(trim ? st.nextToken().trim() : st.nextToken()); } return l; } /** split the given string on the given regex and return all non-empty elements */ public static List<String> splitNoEmpty(String s, String regex) { List<String> l = Empty.list(); for (String k : s.split(regex)) { if (hasContent(k)) l.add(k); } return l; } /** * Splits the given string into a list of integers separated by sep. * 'null' strings are not allowed and will throw a NumberFormatException. */ public static List<Integer> splitToInt(String str, String sep) { return splitToInt(str, sep, false); } /** * Splits the given string into a list of integers separated by sep. * If allowNull is true, the string 'null' is parsed as a null Integer object. * If allowNull is false, the string 'null' is not allowed and throws a NumberFormatException. */ public static List<Integer> splitToInt(String str, String sep, boolean allowNull) { List<String> sl = split(str, sep); List<Integer> l = Empty.list(); for (String s : sl) { String st = s.trim(); if (allowNull && st.equals("null")) l.add(null); else l.add(Integer.valueOf(st)); } return l; } /** * Splits the given string into a list of doubles separated by sep. */ public static List<Double> splitToDouble(String str, String sep) { List<String> sl = split(str, sep); List<Double> l = Empty.list(); for (String s : sl) l.add(Double.valueOf(s.trim())); return l; } /** * Join strings by separator, folding nulls and empty strings. */ public static String join(String sep, String... ting) { return join(sep, true, ting); } /** * Join strings by separator, folding nulls (and optionally empty strings) */ public static String join(String sep, boolean foldEmpty, String... ting) { if (ting.length == 0) return ""; if (ting.length == 1) return ting[0]; StringBuilder sb = Empty.sb(); String mysep = ""; for (String s : ting) { if (s == null || (foldEmpty && s.length() == 0)) continue; sb.append(mysep); sb.append(s); mysep = sep; } return sb.toString(); } /** Collection version of join */ public static String join(String sep, Collection<String> c) { return join(sep, c.toArray(new String[0])); } /** * Join the toString of each object element into a single string, with * each element separated by the given sep (which can be empty). */ public static String joinObjects(String sep, Collection<? extends Object> l) { return sepList(sep, l, -1); } /** Collection version of join */ public static String join(String sep, boolean foldEmpty, Collection<String> c) { return join(sep, foldEmpty, c.toArray(new String[0])); } /** Array version of sepList */ public static String sepList(String sep, Object[] arg, int max) { return sepList(sep, Arrays.asList(arg), max); } /** Same as sepList with no wrapping */ public static String sepList(String sep, Iterable<?> os, int max) { return sepList(sep, null, os, max); } /** Return the concatenation of toString of the objects obtained from the iterable, separated by sep, and if max * is > 0 include no more than that number of objects. If wrap is non-null, prepend and append each object with it */ public static String sepList(String sep, String wrap, Iterable<?> os, int max) { StringBuilder sb = Empty.sb(); String s = ""; if (max == 0) max = -1; for (Object o : os) { sb.append(s); s = sep; if (max-- == 0) { sb.append("..."); break; } if (wrap != null) sb.append(wrap); sb.append(o); if (wrap != null) sb.append(wrap); } return sb.toString(); } /** Return toString() of the given object, or null if the returned value is a * zero length string */ public static String toStringBlankIsNull(Object o) { if (o == null) return null; String s = String.valueOf(o).trim(); return s.length() == 0 ? null : s; } /** Return toString() of the given object, or a zero length string if either the argument * or the value returned from toString is null */ public static String toStringNullIsBlank(Object o) { if (o == null) return ""; String s = o.toString(); if (s == null) return ""; return s; } /** Return the bytes of toString on the given object in the given charset */ public static byte[] getBytes(Object o, Charset cs) { try { return o.toString().getBytes(cs.name()); } catch (UnsupportedEncodingException ex) { throw new AssertionError(ex); } } /** Convert the given bytes to a String using the given charset */ public static String fromBytes(byte[] bytes, Charset cs) { try { return new String(bytes, cs.name()); } catch (UnsupportedEncodingException ex) { throw new AssertionError(ex); } } /** Return null if the argument is null, otherwise s.toLowerCase */ public static String lower(String s) { return s == null ? s : s.toLowerCase(); } /** Return a version of the given string with line feeds inserted in an attempt to make * no line wider than the given margin. Note that some lines might still exceed the * margin if there is no space to wrap on (long words are not wrapped) */ public static String wrapLines(String text, int margin) { StringBuilder sb = Empty.sb(); int lastSpace = -1, curLineLength = 0; for (int i = 0; i < text.length(); i++) { char c = text.charAt(i); if (c == '\n') { curLineLength = 0; lastSpace = -1; } if (curLineLength >= margin && lastSpace >= 0) { sb.setCharAt(lastSpace, '\n'); curLineLength = 0; lastSpace = -1; } else if (c == ' ') { lastSpace = sb.length(); } curLineLength++; sb.append(c); } return sb.toString(); } /** Capitalize the first character of the given string and lowercase the rest; uses the default locale, so beware */ public static String capitalizeFirstCharacterLowercaseRest(String s) { if (!hasContent(s)) return s; return s.substring(0, 1).toUpperCase() + s.substring(1).toLowerCase(); } /** Capitalize the first character of the given string; uses the default locale, so beware */ public static String capitalizeFirstCharacter(String s) { if (!hasContent(s)) return s; return s.substring(0, 1).toUpperCase() + s.substring(1); } /** @return The string in the given collection of strings that best matches our string (case insensitive), or null if the given collection is empty */ public static String findBestMatch(String mine, Collection<String> others) { return findBestMatchOrNull(mine, others, Integer.MAX_VALUE, Integer.MAX_VALUE); } /** * Returns string in the given collection of strings that best matches our string (case insensitive), or null if none of the strings * matches particularly well. If you need to specify what counts as 'particularly well', please use the * findBestMatchOrNull(mine, others, minSize, maxDistance) method instead. * @param mine The string we want to match against * @param others The collection of string we want to match against our string to find the best match * @return The best matching others-string or null if none matches */ public static String findBestMatchOrNull(String mine, Collection<String> others) { return findBestMatchOrNull(mine, others, 3, Math.max(1, (int) (mine.length() * 0.25))); } /** * Returns string in the given collection of strings that best matches our string (case insensitive), or null if none of the strings * matches particularly well. * @param mine The string we want to match against * @param others The collection of string we want to match against our string to find the best match * @param minSize The minimum length our string has to be trigger the algorithm. 0 here counts as * @param maxDistance The maximum distance between the strings before they are considered as not matching * @return The best matching others-string or null if all strings have a distance longer than maxDistance */ public static String findBestMatchOrNull(String mine, Collection<String> others, int minSize, int maxDistance) { String bestMatch = null; if (mine.length() < minSize && minSize > 0) return null; int bestDistance = mine.length() + 1; for (String other : others) { if (Math.abs(other.length() - mine.length()) > maxDistance) continue; int distance = Strings.editDist(mine, other, false); if (distance < bestDistance && distance <= maxDistance) { bestDistance = distance; bestMatch = other; } } return bestMatch; } /** * Damerau-Levenshtein edit distance * param a misspelled string * param b (sub)string to look for * param substring look for substring or match whole word * return edit distance */ public static int editDist(String a, String b, boolean substring) { return editDist(a.toLowerCase().toCharArray(), b.toLowerCase().toCharArray(), substring); } /** * Damerau-Levenshtein edit distance * param a misspelled string * param b (sub)string to look for * param substring look for substring or match whole word * return edit distance */ public static int editDist(char[] a, char[] b, boolean substring) { int alen = a.length; int blen = b.length; int[][] d = new int[alen + 1][blen + 1]; if (substring) { // free removal at start int cnt = 0; for (int i = alen - blen + 1; i <= alen; ++i) d[i][0] = cnt++; } else { for (int i = 0; i <= alen; ++i) d[i][0] = i; } for (int j = 1; j <= blen; ++j) d[0][j] = j; for (int i = 1; i <= alen; ++i) { for (int j = 1; j <= blen; ++j) { int swap = 1; if (a[i - 1] == b[j - 1]) swap = 0; int val = d[i - 1][j] + 1; if (val > d[i][j - 1] + 1) val = d[i][j - 1] + 1; if (val > d[i - 1][j - 1] + swap) val = d[i - 1][j - 1] + swap; if (i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] && val > d[i - 2][j - 2]) { val = d[i - 2][j - 2] + 1; } d[i][j] = val; } } int best = d[alen][blen]; if (substring) { // free removal at end for (int i = blen - 1; i <= alen; ++i) { if (d[i][blen] < best) best = d[i][blen]; } //print(d); } return best; } /** Return a max number of chars of the given string for display purposes; some extra chars are * added at the end to explain how much is missing. */ public static String maxForDisplay(String str, int max) { int l = str.length(); if (l <= max) return str; return str.substring(0, max) + " (" + (l - max) + " more chars)"; } /** * Splits the string s on characters c not preceded by an odd number of escape characters '\'. * All escape characters are left intact. * @param s The string to split * @param c The charater to split on * @return A tokenized version of s */ public static String[] splitUnescaped(String s, char c) { List<String> r = Empty.list(); char[] car = s.toCharArray(); boolean isEscaped = false; int lastp = 0; for (int i = 0; i < car.length; i++) { if (isEscaped) { isEscaped = false; continue; } else if (car[i] == '\\') isEscaped = true; else if (car[i] == c) { r.add(s.substring(lastp, i)); lastp = i + 1; } } r.add(s.substring(lastp)); return r.toArray(new String[0]); } /** * Unescapes all characters escaped by '\' in the string. * No characters are taken to have any special meaning. For instance, an escaped 'n' just become a regular 'n'. * @param s * @return s with escape characters removed. */ public static String unescape(String s) { return s.replaceAll("\\\\(.)", "$1"); } /** * Splits the string s on characters c not preceded by an odd number of escape characters '\'. * All escaped characters are unescaped. * @param s The string to split * @param c The charater to split on * @return A tokenized version of s */ public static String[] splitAndUnescape(String s, char c) { String[] ans = splitUnescaped(s, c); for (int i = 0; i < ans.length; ++i) { ans[i] = unescape(ans[i]); } return ans; } /** test cases for the Strings methods */ public static class Test extends TestCase { /** test that the {@link Strings#capitalizeFirstCharacter(String)} method works */ public void testCapitalizeFirst() { assertEquals(null, capitalizeFirstCharacter(null)); assertEquals("", capitalizeFirstCharacter("")); assertEquals("A", capitalizeFirstCharacter("a")); assertEquals("Aa", capitalizeFirstCharacter("aa")); assertEquals("AA", capitalizeFirstCharacter("aA")); } /** test that the {@link Strings#capitalizeFirstCharacterLowercaseRest(String)} method works */ public void testCapitalizeFirstLowerRest() { assertEquals(null, capitalizeFirstCharacterLowercaseRest(null)); assertEquals("", capitalizeFirstCharacterLowercaseRest("")); assertEquals("A", capitalizeFirstCharacterLowercaseRest("a")); assertEquals("Aa", capitalizeFirstCharacterLowercaseRest("aa")); assertEquals("Aa", capitalizeFirstCharacterLowercaseRest("aA")); } /** test that the {@link Strings#wrapLines(String, int)} method works */ public void testLineWrap() { String[] s = { "abcdef\nab cdef", "abcdef\nab\ncdef", "ab d", "ab \nd", "abcd", "abcd", "abcd\n", "abcd\n", "abcdefgh", "abcdefgh", "abcd\n", "abcd\n", "ab ", "ab \n ", "ab a\nab cd\na cdef\nabcdef", "ab a\nab\ncd\na\ncdef\nabcdef", }; for (int i = 0; i < s.length; i += 2) { check(wrapLines(s[i], 4), s[i + 1]); } } private void check(String expect, String was) { String p1 = expect.replace('\n', '$'); String p2 = was.replace('\n', '$'); if (!p1.equals(p2)) System.out.println(p1 + "|\n---\n" + p2 + "|\n==="); assertEquals(p1 + " not " + p2, expect, was); } /** Test the bestMatch feature, both default values and corner cases */ public void testBestMatch() { doTest("a", 0, 1, "b", "b"); doTest("a", 0, 1, "B", "B"); doTest("a", null, "B"); // too small for defaults doTest("jllkkesrjjls", "jlktesrjgls", "hfdklaj", "jlktesrjgls", "kdplasfre"); doTest("jkerlwjksdf", null, "hfdklaj", "jlktesrjgls", "kdplasfre"); // too far apart doTest("heisann!", "heisannja!", "heisannja!", "fjjflaf"); doTest("heisann!", 12, 3, null, "heisannja!", "fjjflaf"); // too short string doTest("heisann!", 4, 1, null, "heisannja!", "fjjflaf"); // too far apart } private void doTest(String mine, String solution, String... others) { assertEquals(solution, findBestMatchOrNull(mine, Arrays.asList(others))); } private void doTest(String mine, int minSize, int maxDistance, String solution, String... others) { assertEquals(solution, findBestMatchOrNull(mine, Arrays.asList(others), minSize, maxDistance)); } } }