edu.stanford.nlp.util.StringUtils.java Source code

Introduction

Here is the source code for edu.stanford.nlp.util.StringUtils.java
Source

package edu.stanford.nlp.util;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasOffset;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.pipeline.LanguageInfo;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.*;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.Normalizer;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/**
 * StringUtils is a class for random String things, including output formatting and command line argument parsing.
 * <p>
 * Many of these methods will be familiar to perl users: {@link #join(Iterable)}, {@link #split(String, String)}, {@link
 * #trim(String, int)}, {@link #find(String, String)}, {@link #lookingAt(String, String)}, and {@link #matches(String,
 * String)}.
 * <p>
 * There are also useful methods for padding Strings/Objects with spaces on the right or left for printing even-width
 * table columns: {@link #padLeft(int, int)}, {@link #pad(String, int)}.
 *
 * <p>Example: print a comma-separated list of numbers:</p>
 * <p>{@code System.out.println(StringUtils.pad(nums, &quot;, &quot;));}</p>
 * <p>Example: print a 2D array of numbers with 8-char cells:</p>
 * <p><code>for(int i = 0; i &lt; nums.length; i++) {<br>
 * &nbsp;&nbsp;&nbsp; for(int j = 0; j &lt; nums[i].length; j++) {<br>
 * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 * System.out.print(StringUtils.leftPad(nums[i][j], 8));<br>
 * &nbsp;&nbsp;&nbsp; <br>
 * &nbsp;&nbsp;&nbsp; System.out.println();<br>
 * </code></p>
 *
 * @author Dan Klein
 * @author Christopher Manning
 * @author Tim Grow (grow@stanford.edu)
 * @author Chris Cox
 * @version 2006/02/03
 */
public class StringUtils {

    // todo [cdm 2016]: Remove CoreMap/CoreLabel methods from this class
    // todo [cdm 2016]: Write a really good join method for this class, like William's Ruby one

    /** A logger for this class */
    private static final Redwood.RedwoodChannels log = Redwood.channels(StringUtils.class);

    /**
     * Don't let anyone instantiate this class.
     */
    private StringUtils() {
    }

    public static final String[] EMPTY_STRING_ARRAY = new String[0];
    private static final String LANG = "lang";
    private static final String PROP = "prop";
    private static final String PROPS = "props";
    private static final String PROPERTIES = "properties";
    private static final String ARGS = "args";
    private static final String ARGUMENTS = "arguments";

    /**
     * Say whether this regular expression can be found inside
     * this String.  This method provides one of the two "missing"
     * convenience methods for regular expressions in the String class
     * in JDK1.4.  This is the one you'll want to use all the time if
     * you're used to Perl.  What were they smoking?
     *
     * @param str   String to search for match in
     * @param regex String to compile as the regular expression
     * @return Whether the regex can be found in str
     */
    public static boolean find(String str, String regex) {
        return Pattern.compile(regex).matcher(str).find();
    }

    /**
     * Convenience method: a case-insensitive variant of Collection.contains.
     *
     * @param c Collection&lt;String&gt;
     * @param s String
     * @return true if s case-insensitively matches a string in c
     */
    public static boolean containsIgnoreCase(Collection<String> c, String s) {
        for (String sPrime : c) {
            if (sPrime.equalsIgnoreCase(s))
                return true;
        }
        return false;
    }

    /**
     * Say whether this regular expression can be found at the beginning of
     * this String.  This method provides one of the two "missing"
     * convenience methods for regular expressions in the String class
     * in JDK1.4.
     *
     * @param str   String to search for match at start of
     * @param regex String to compile as the regular expression
     * @return Whether the regex can be found at the start of str
     */
    public static boolean lookingAt(String str, String regex) {
        return Pattern.compile(regex).matcher(str).lookingAt();
    }

    /**
     * Takes a string of the form "x1=y1,x2=y2,..." such
     * that each y is an integer and each x is a key.  A
     * String[] s is returned such that s[yn]=xn.
     *
     * @param map A string of the form "x1=y1,x2=y2,..." such
     *     that each y is an integer and each x is a key.
     * @return  A String[] s is returned such that s[yn]=xn
     */
    public static String[] mapStringToArray(String map) {
        String[] m = map.split("[,;]");
        int maxIndex = 0;
        String[] keys = new String[m.length];
        int[] indices = new int[m.length];
        for (int i = 0; i < m.length; i++) {
            int index = m[i].lastIndexOf('=');
            keys[i] = m[i].substring(0, index);
            indices[i] = Integer.parseInt(m[i].substring(index + 1));
            if (indices[i] > maxIndex) {
                maxIndex = indices[i];
            }
        }
        String[] mapArr = new String[maxIndex + 1];
        // Arrays.fill(mapArr, null); // not needed; Java arrays zero initialized
        for (int i = 0; i < m.length; i++) {
            mapArr[indices[i]] = keys[i];
        }
        return mapArr;
    }

    /**
     * Takes a string of the form "x1=y1,x2=y2,..." and returns Map.
     *
     * @param map A string of the form "x1=y1,x2=y2,..."
     * @return  A Map m is returned such that m.get(xn) = yn
     */
    public static Map<String, String> mapStringToMap(String map) {
        String[] m = map.split("[,;]");
        Map<String, String> res = Generics.newHashMap();
        for (String str : m) {
            int index = str.lastIndexOf('=');
            String key = str.substring(0, index);
            String val = str.substring(index + 1);
            res.put(key.trim(), val.trim());
        }
        return res;
    }

    public static List<Pattern> regexesToPatterns(Iterable<String> regexes) {
        List<Pattern> patterns = new ArrayList<>();
        for (String regex : regexes) {
            patterns.add(Pattern.compile(regex));
        }
        return patterns;
    }

    /**
     * Given a pattern, which contains one or more capturing groups, and a String,
     * returns a list with the values of the
     * captured groups in the pattern. If the pattern does not match, returns
     * null. Note that this uses Matcher.find() rather than Matcher.matches().
     * If str is null, returns null.
     */
    public static List<String> regexGroups(Pattern regex, String str) {
        if (str == null) {
            return null;
        }

        Matcher matcher = regex.matcher(str);
        if (!matcher.find()) {
            return null;
        }

        List<String> groups = new ArrayList<>(matcher.groupCount());
        for (int index = 1; index <= matcher.groupCount(); index++) {
            groups.add(matcher.group(index));
        }

        return groups;
    }

    /**
     * Say whether this regular expression matches
     * this String.  This method is the same as the String.matches() method,
     * and is included just to give a call that is parallel to the other
     * static regex methods in this class.
     *
     * @param str   String to search for match at start of
     * @param regex String to compile as the regular expression
     * @return Whether the regex matches the whole of this str
     */
    public static boolean matches(String str, String regex) {
        return Pattern.compile(regex).matcher(str).matches();
    }

    public static Set<String> stringToSet(String str, String delimiter) {
        Set<String> ret = null;
        if (str != null) {
            String[] fields = str.split(delimiter);
            ret = Generics.newHashSet(fields.length);
            for (String field : fields) {
                field = field.trim();
                ret.add(field);
            }
        }
        return ret;
    }

    public static String joinWords(Iterable<? extends HasWord> l, String glue) {
        StringBuilder sb = new StringBuilder(l instanceof Collection ? ((Collection) l).size() : 64);
        boolean first = true;
        for (HasWord o : l) {
            if (!first) {
                sb.append(glue);
            } else {
                first = false;
            }
            sb.append(o.word());
        }
        return sb.toString();
    }

    public static <E> String join(List<? extends E> l, String glue, Function<E, String> toStringFunc, int start,
            int end) {
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        start = Math.max(start, 0);
        end = Math.min(end, l.size());
        for (int i = start; i < end; i++) {
            if (!first) {
                sb.append(glue);
            } else {
                first = false;
            }
            sb.append(toStringFunc.apply(l.get(i)));
        }
        return sb.toString();
    }

    public static String joinWords(List<? extends HasWord> l, String glue, int start, int end) {
        return join(l, glue, HasWord::word, start, end);
    }

    private static final Function<Object, String> DEFAULT_TOSTRING = Object::toString;

    public static String joinFields(List<? extends CoreMap> l, final Class field, final String defaultFieldValue,
            String glue, int start, int end, final Function<Object, String> toStringFunc) {
        return join(l, glue, (Function<CoreMap, String>) in -> {
            Object val = in.get(field);
            return (val != null) ? toStringFunc.apply(val) : defaultFieldValue;
        }, start, end);
    }

    public static String joinFields(List<? extends CoreMap> l, final Class field, final String defaultFieldValue,
            String glue, int start, int end) {
        return joinFields(l, field, defaultFieldValue, glue, start, end, DEFAULT_TOSTRING);
    }

    public static String joinFields(List<? extends CoreMap> l, final Class field,
            final Function<Object, String> toStringFunc) {
        return joinFields(l, field, "-", " ", 0, l.size(), toStringFunc);
    }

    public static String joinFields(List<? extends CoreMap> l, final Class field) {
        return joinFields(l, field, "-", " ", 0, l.size());
    }

    public static String joinMultipleFields(List<? extends CoreMap> l, final Class[] fields,
            final String defaultFieldValue, final String fieldGlue, String glue, int start, int end,
            final Function<Object, String> toStringFunc) {
        return join(l, glue, (Function<CoreMap, String>) in -> {
            StringBuilder sb = new StringBuilder();
            for (Class field : fields) {
                if (sb.length() > 0) {
                    sb.append(fieldGlue);
                }
                Object val = in.get(field);
                String str = (val != null) ? toStringFunc.apply(val) : defaultFieldValue;
                sb.append(str);
            }
            return sb.toString();
        }, start, end);
    }

    public static String joinMultipleFields(List<? extends CoreMap> l, final Class[] fields,
            final Function<Object, String> toStringFunc) {
        return joinMultipleFields(l, fields, "-", "/", " ", 0, l.size(), toStringFunc);
    }

    public static String joinMultipleFields(List<? extends CoreMap> l, final Class[] fields,
            final String defaultFieldValue, final String fieldGlue, String glue, int start, int end) {
        return joinMultipleFields(l, fields, defaultFieldValue, fieldGlue, glue, start, end, DEFAULT_TOSTRING);
    }

    public static String joinMultipleFields(List<? extends CoreMap> l, final Class[] fields) {
        return joinMultipleFields(l, fields, "-", "/", " ", 0, l.size());
    }

    /**
     * Joins all the tokens together (more or less) according to their original whitespace.
     * It assumes all whitespace was " ".
     *
     * @param tokens list of tokens which implement {@link HasOffset} and {@link HasWord}
     * @return a string of the tokens with the appropriate amount of spacing
     */
    public static String joinWithOriginalWhiteSpace(List<CoreLabel> tokens) {
        if (tokens.isEmpty()) {
            return "";
        }

        CoreLabel lastToken = tokens.get(0);
        StringBuilder buffer = new StringBuilder(lastToken.word());

        for (int i = 1; i < tokens.size(); i++) {
            CoreLabel currentToken = tokens.get(i);
            int numSpaces = currentToken.beginPosition() - lastToken.endPosition();
            if (numSpaces < 0) {
                numSpaces = 0;
            }

            buffer.append(repeat(' ', numSpaces)).append(currentToken.word());
            lastToken = currentToken;
        }

        return buffer.toString();
    }

    /**
     * Joins each elem in the {@link Iterable} with the given glue.
     * For example, given a list of {@code Integers}, you can create
     * a comma-separated list by calling {@code join(numbers, ", ")}.
     *
     * @see StringUtils#join(Stream, String)
     */
    public static <X> String join(Iterable<X> l, String glue) {
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        for (X o : l) {
            if (!first) {
                sb.append(glue);
            } else {
                first = false;
            }
            sb.append(o);
        }
        return sb.toString();
    }

    /**
     * Joins each elem in the {@link Stream} with the given glue.
     * For example, given a list of {@code Integers}, you can create
     * a comma-separated list by calling {@code join(numbers, ", ")}.
     *
     * @see StringUtils#join(Iterable, String)
     */
    public static <X> String join(Stream<X> l, String glue) {
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        Iterator<X> iter = l.iterator();
        while (iter.hasNext()) {
            if (!first) {
                sb.append(glue);
            } else {
                first = false;
            }
            sb.append(iter.next());
        }
        return sb.toString();
    }

    /**
     * Joins each elem in the array with the given glue. For example, given a
     * list of ints, you can create a comma-separated list by calling
     * {@code join(numbers, ", ")}.
     */
    public static String join(Object[] elements, String glue) {
        return (join(Arrays.asList(elements), glue));
    }

    /**
     * Joins an array of elements in a given span.
     * @param elements The elements to join.
     * @param start The start index to join from.
     * @param end The end (non-inclusive) to join until.
     * @param glue The glue to hold together the elements.
     * @return The string form of the sub-array, joined on the given glue.
     */
    public static String join(Object[] elements, int start, int end, String glue) {
        StringBuilder b = new StringBuilder(127);
        boolean isFirst = true;
        for (int i = start; i < end; ++i) {
            if (isFirst) {
                b.append(elements[i]);
                isFirst = false;
            } else {
                b.append(glue).append(elements[i]);
            }
        }
        return b.toString();
    }

    /**
     * Joins each element in the given array with the given glue. For example,
     * given an array of Integers, you can create a comma-separated list by calling
     * {@code join(numbers, ", ")}.
     */
    public static String join(String[] items, String glue) {
        return join(Arrays.asList(items), glue);
    }

    /**
     * Joins elements with a space.
     */
    public static String join(Iterable<?> l) {
        return join(l, " ");
    }

    /**
     * Joins elements with a space.
     */
    public static String join(Object[] elements) {
        return (join(elements, " "));
    }

    /**
     * Splits on whitespace (\\s+).
     * <br>
     * Note that this follows Java split style, meaning leading spaces
     * will result in an empty string and trailing spaces will be cut off.
     * @param s String to split
     * @return List<String> of split strings
     */
    public static List<String> split(String s) {
        return split(s, "\\s+");
    }

    /**
     * Splits on the given delimiter and returns the delimiters as well.
     * <br>
     * For expressions where the expression appears inside itself this may not work.
     * <br>
     * See http://stackoverflow.com/a/2206432
     *
     * @param s String to split
     * @param separator Delimiter to use for splitting
     * @return List<String> of split strings
     */
    public static List<String> splitKeepDelimiter(String s, String separator) {
        return split(s, String.format("((?<=(?:%1$s))|(?=(?:%1$s)))", separator));
    }

    /**
     * Split the text into pieces based on newlines.  Include the newline tokens in the pieces.
     *
     * @param s String to splits
     */
    public static List<String> splitLinesKeepNewlines(String s) {
        List<String> pieces = StringUtils.splitKeepDelimiter(s, "\\R");

        // The delimeter regex trick seems to split \r \n into two
        // separate matches.  We want to treat them as the same newline
        List<String> newPieces = new ArrayList<>();
        for (int i = 0; i < pieces.size(); ++i) {
            if (i < pieces.size() - 1 && pieces.get(i).equals("\r") && pieces.get(i + 1).equals("\n")) {
                newPieces.add("\r\n");
                i = i + 1;
            } else {
                newPieces.add(pieces.get(i));
            }
        }

        return newPieces;
    }

    /**
     * Splits the given string using the given regex as delimiters.
     * This method is the same as the String.split() method (except it throws
     * the results in a List),
     * and is included just to give a call that is parallel to the other
     * static regex methods in this class.
     *
     * @param str   String to split up
     * @param regex String to compile as the regular expression
     * @return List of Strings resulting from splitting on the regex
     */
    public static List<String> split(String str, String regex) {
        return (Arrays.asList(str.split(regex)));
    }

    /**
     * Split a string on a given single character.
     * This method is often faster than the regular split() method.
     * @param input The input to split.
     * @param delimiter The character to split on.
     * @return An array of Strings corresponding to the original input split on the delimiter character.
     */
    public static String[] splitOnChar(String input, char delimiter) {
        // State
        String[] out = new String[input.length() + 1];
        int nextIndex = 0;
        int lastDelimiterIndex = -1;
        char[] chars = input.toCharArray();
        // Split
        for (int i = 0; i <= chars.length; ++i) {
            if (i >= chars.length || chars[i] == delimiter) {
                char[] tokenChars = new char[i - (lastDelimiterIndex + 1)];
                System.arraycopy(chars, lastDelimiterIndex + 1, tokenChars, 0, tokenChars.length);
                out[nextIndex] = new String(tokenChars);
                nextIndex += 1;
                lastDelimiterIndex = i;
            }
        }
        // Clean Result
        String[] trimmedOut = new String[nextIndex];
        System.arraycopy(out, 0, trimmedOut, 0, trimmedOut.length);
        return trimmedOut;
    }

    /**
     * Splits a string into whitespace tokenized fields based on a delimiter and then whitespace.
     * For example, "aa bb | bb cc | ccc ddd" would be split into "[aa,bb],[bb,cc],[ccc,ddd]" based on
     * the delimiter "|". This method uses the old StringTokenizer class, which is up to
     * 3x faster than the regex-based "split()" methods.
     *
     * @param delimiter String to split on
     * @return List of lists of strings.
     */
    public static List<List<String>> splitFieldsFast(String str, String delimiter) {
        List<List<String>> fields = Generics.newArrayList();
        StringTokenizer tokenizer = new StringTokenizer(str.trim());
        List<String> currentField = Generics.newArrayList();
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            if (token.equals(delimiter)) {
                fields.add(currentField);
                currentField = Generics.newArrayList();
            } else {
                currentField.add(token.trim());
            }
        }
        if (currentField.size() > 0) {
            fields.add(currentField);
        }
        return fields;
    }

    /**
     * Split on a given character, filling out the fields in the output array.
     * This is suitable for, e.g., splitting a TSV file of known column count.
     * @param out The output array to fill
     * @param input The input to split
     * @param delimiter The delimiter to split on.
     */
    public static void splitOnChar(String[] out, String input, char delimiter) {
        int lastSplit = 0;
        int outI = 0;
        char[] chars = input.toCharArray();
        for (int i = 0; i < chars.length; ++i) {
            if (chars[i] == delimiter) {
                out[outI] = new String(chars, lastSplit, i - lastSplit);
                outI += 1;
                lastSplit = i + 1;
            }
        }
        if (outI < out.length) {
            out[outI] = input.substring(lastSplit);
        }
    }

    /** Split a string into tokens.  Because there is a tokenRegex as well as a
     *  separatorRegex (unlike for the conventional split), you can do things
     *  like correctly split quoted strings or parenthesized arguments.
     *  However, it doesn't do the unquoting of quoted Strings for you.
     *  An empty String argument is returned at the beginning, if valueRegex
     *  accepts the empty String and str begins with separatorRegex.
     *  But str can end with either valueRegex or separatorRegex and this does
     *  not generate an empty String at the end (indeed, valueRegex need not
     *  even accept the empty String in this case.  However, if it does accept
     *  the empty String and there are multiple trailing separators, then
     *  empty values will be returned.
     *
     *  @param str The String to split
     *  @param valueRegex Must match a token. You may wish to let it match the empty String
     *  @param separatorRegex Must match a separator
     *  @return The List of tokens
     *  @throws IllegalArgumentException if str cannot be tokenized by the two regex
     */
    public static List<String> valueSplit(String str, String valueRegex, String separatorRegex) {
        Pattern vPat = Pattern.compile(valueRegex);
        Pattern sPat = Pattern.compile(separatorRegex);
        List<String> ret = new ArrayList<>();
        while (!str.isEmpty()) {
            Matcher vm = vPat.matcher(str);
            if (vm.lookingAt()) {
                ret.add(vm.group());
                str = str.substring(vm.end());
                // String got = vm.group();
                // log.info("vmatched " + got + "; now str is " + str);
            } else {
                throw new IllegalArgumentException("valueSplit: " + valueRegex + " doesn't match " + str);
            }
            if (!str.isEmpty()) {
                Matcher sm = sPat.matcher(str);
                if (sm.lookingAt()) {
                    str = str.substring(sm.end());
                    // String got = sm.group();
                    // log.info("smatched " + got + "; now str is " + str);
                } else {
                    throw new IllegalArgumentException("valueSplit: " + separatorRegex + " doesn't match " + str);
                }
            }
        } // end while
        return ret;
    }

    /**
     * Return a String of length a minimum of totalChars characters by
     * padding the input String str at the right end with spaces.
     * If str is already longer
     * than totalChars, it is returned unchanged.
     */
    public static String pad(String str, int totalChars) {
        return pad(str, totalChars, ' ');
    }

    /**
     * Return a String of length a minimum of totalChars characters by
     * padding the input String str at the right end with spaces.
     * If str is already longer
     * than totalChars, it is returned unchanged.
     */
    public static String pad(String str, int totalChars, char pad) {
        if (str == null) {
            str = "null";
        }
        int slen = str.length();
        StringBuilder sb = new StringBuilder(str);
        for (int i = 0; i < totalChars - slen; i++) {
            sb.append(pad);
        }
        return sb.toString();
    }

    /**
     * Pads the toString value of the given Object.
     */
    public static String pad(Object obj, int totalChars) {
        return pad(obj.toString(), totalChars);
    }

    /**
     * Pad or trim so as to produce a string of exactly a certain length.
     *
     * @param str The String to be padded or truncated
     * @param num The desired length
     */
    public static String padOrTrim(String str, int num) {
        if (str == null) {
            str = "null";
        }
        int leng = str.length();
        if (leng < num) {
            StringBuilder sb = new StringBuilder(str);
            for (int i = 0; i < num - leng; i++) {
                sb.append(' ');
            }
            return sb.toString();
        } else if (leng > num) {
            return str.substring(0, num);
        } else {
            return str;
        }
    }

    /**
     * Pad or trim so as to produce a string of exactly a certain length.
     *
     * @param str The String to be padded or truncated
     * @param num The desired length
     */
    public static String padLeftOrTrim(String str, int num) {
        if (str == null) {
            str = "null";
        }
        int leng = str.length();
        if (leng < num) {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < num - leng; i++) {
                sb.append(' ');
            }
            sb.append(str);
            return sb.toString();
        } else if (leng > num) {
            return str.substring(str.length() - num);
        } else {
            return str;
        }
    }

    /**
     * Pad or trim the toString value of the given Object.
     */
    public static String padOrTrim(Object obj, int totalChars) {
        return padOrTrim(obj.toString(), totalChars);
    }

    /**
     * Pads the given String to the left with the given character ch to ensure that
     * it's at least totalChars long.
     */
    public static String padLeft(String str, int totalChars, char ch) {
        if (str == null) {
            str = "null";
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0, num = totalChars - str.length(); i < num; i++) {
            sb.append(ch);
        }
        sb.append(str);
        return sb.toString();
    }

    /**
     * Pads the given String to the left with spaces to ensure that it's
     * at least totalChars long.
     */
    public static String padLeft(String str, int totalChars) {
        return padLeft(str, totalChars, ' ');
    }

    public static String padLeft(Object obj, int totalChars) {
        return padLeft(obj.toString(), totalChars);
    }

    public static String padLeft(int i, int totalChars) {
        return padLeft(Integer.valueOf(i), totalChars);
    }

    public static String padLeft(double d, int totalChars) {
        return padLeft(new Double(d), totalChars);
    }

    /**
     * Returns s if it's at most maxWidth chars, otherwise chops right side to fit.
     */
    public static String trim(String s, int maxWidth) {
        if (s.length() <= maxWidth) {
            return (s);
        }
        return s.substring(0, maxWidth);
    }

    public static String trim(Object obj, int maxWidth) {
        return trim(obj.toString(), maxWidth);
    }

    public static String trimWithEllipsis(String s, int width) {
        if (s.length() > width)
            s = s.substring(0, width - 3) + "...";
        return s;
    }

    public static String trimWithEllipsis(Object o, int width) {
        return trimWithEllipsis(o.toString(), width);
    }

    public static String repeat(String s, int times) {
        if (times == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder(times * s.length());
        for (int i = 0; i < times; i++) {
            sb.append(s);
        }
        return sb.toString();
    }

    public static String repeat(char ch, int times) {
        if (times == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder(times);
        for (int i = 0; i < times; i++) {
            sb.append(ch);
        }
        return sb.toString();
    }

    /**
     * Returns a "clean" version of the given filename in which spaces have
     * been converted to dashes and all non-alphanumeric chars are underscores.
     */
    public static String fileNameClean(String s) {
        char[] chars = s.toCharArray();
        StringBuilder sb = new StringBuilder();
        for (char c : chars) {
            if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_')) {
                sb.append(c);
            } else {
                if (c == ' ' || c == '-') {
                    sb.append('_');
                } else {
                    sb.append('x').append((int) c).append('x');
                }
            }
        }
        return sb.toString();
    }

    /**
     * Returns the index of the <i>n</i>th occurrence of ch in s, or -1
     * if there are less than n occurrences of ch.
     */
    public static int nthIndex(String s, char ch, int n) {
        int index = 0;
        for (int i = 0; i < n; i++) {
            // if we're already at the end of the string,
            // and we need to find another ch, return -1
            if (index == s.length() - 1) {
                return -1;
            }
            index = s.indexOf(ch, index + 1);
            if (index == -1) {
                return (-1);
            }
        }
        return index;
    }

    /**
     * This returns a string from decimal digit smallestDigit to decimal digit
     * biggest digit. Smallest digit is labeled 1, and the limits are
     * inclusive.
     */
    public static String truncate(int n, int smallestDigit, int biggestDigit) {
        int numDigits = biggestDigit - smallestDigit + 1;
        char[] result = new char[numDigits];
        for (int j = 1; j < smallestDigit; j++) {
            n = n / 10;
        }
        for (int j = numDigits - 1; j >= 0; j--) {
            result[j] = Character.forDigit(n % 10, 10);
            n = n / 10;
        }
        return new String(result);
    }

    /**
     * Parses command line arguments into a Map. Arguments of the form
     * <br>
     * {@code -flag1 arg1a arg1b ... arg1m -flag2 -flag3 arg3a ... arg3n}
     * <br>
     * will be parsed so that the flag is a key in the Map (including
     * the hyphen) and its value will be a {@link String}[] containing
     * the optional arguments (if present).  The non-flag values not
     * captured as flag arguments are collected into a String[] array
     * and returned as the value of {@code null} in the Map.  In
     * this invocation, flags cannot take arguments, so all the {@link
     * String} array values other than the value for {@code null}
     * will be zero-length.
     *
     * @param args A command-line arguments array
     * @return a {@link Map} of flag names to flag argument {@link
     *         String} arrays.
     */
    public static Map<String, String[]> argsToMap(String[] args) {
        return argsToMap(args, Collections.emptyMap());
    }

    /**
     * Parses command line arguments into a Map. Arguments of the form
     * <br>
     * {@code -flag1 arg1a arg1b ... arg1m -flag2 -flag3 arg3a ... arg3n}
     * <br>
     * will be parsed so that the flag is a key in the Map (including
     * the hyphen) and its value will be a {@link String}[] containing
     * the optional arguments (if present).  The non-flag values not
     * captured as flag arguments are collected into a String[] array
     * and returned as the value of {@code null} in the Map.  In
     * this invocation, the maximum number of arguments for each flag
     * can be specified as an {@link Integer} value of the appropriate
     * flag key in the {@code flagsToNumArgs} {@link Map}
     * argument. (By default, flags cannot take arguments.)
     * <br>
     * Example of usage:
     * <br>
     * <code>
     * Map flagsToNumArgs = new HashMap();
     * flagsToNumArgs.put("-x",new Integer(2));
     * flagsToNumArgs.put("-d",new Integer(1));
     * Map result = argsToMap(args,flagsToNumArgs);
     * </code>
     * <br>
     * If a given flag appears more than once, the extra args are appended to
     * the String[] value for that flag.
     *
     * @param args           the argument array to be parsed
     * @param flagsToNumArgs a {@link Map} of flag names to {@link Integer}
     *                       values specifying the number of arguments
     *                       for that flag (default min 0, max 1).
     * @return a {@link Map} of flag names to flag argument {@link String}
     */
    public static Map<String, String[]> argsToMap(String[] args, Map<String, Integer> flagsToNumArgs) {
        Map<String, String[]> result = Generics.newHashMap();
        List<String> remainingArgs = new ArrayList<>();
        for (int i = 0; i < args.length; i++) {
            String key = args[i];
            if (key.charAt(0) == '-') { // found a flag
                Integer numFlagArgs = flagsToNumArgs.get(key);
                int max = numFlagArgs == null ? 1 : numFlagArgs.intValue();
                int min = numFlagArgs == null ? 0 : numFlagArgs.intValue();
                List<String> flagArgs = new ArrayList<>();
                for (int j = 0; j < max && i + 1 < args.length
                        && (j < min || args[i + 1].isEmpty() || args[i + 1].charAt(0) != '-'); i++, j++) {
                    flagArgs.add(args[i + 1]);
                }
                if (result.containsKey(key)) { // append the second specification into the args.
                    String[] newFlagArg = new String[result.get(key).length + flagsToNumArgs.get(key)];
                    int oldNumArgs = result.get(key).length;
                    System.arraycopy(result.get(key), 0, newFlagArg, 0, oldNumArgs);
                    for (int j = 0; j < flagArgs.size(); j++) {
                        newFlagArg[j + oldNumArgs] = flagArgs.get(j);
                    }
                    result.put(key, newFlagArg);
                } else {
                    result.put(key, flagArgs.toArray(new String[flagArgs.size()]));
                }
            } else {
                remainingArgs.add(args[i]);
            }
        }
        result.put(null, remainingArgs.toArray(new String[remainingArgs.size()]));
        return result;
    }

    /**
     * In this version each flag has zero or one argument. It has one argument
     * if there is a thing following a flag that does not begin with '-'.  See
     * {@link #argsToProperties(String[], Map)} for full documentation.
     *
     * @param args Command line arguments
     * @return A Properties object representing the arguments.
     */
    public static Properties argsToProperties(String... args) {
        return argsToProperties(args, Collections.emptyMap());
    }

    /**
     * Analogous to {@link #argsToMap}.  However, there are several key differences between this method and {@link #argsToMap}:
     * <ul>
     * <li> Hyphens are stripped from flag names </li>
     * <li> Since Properties objects are String to String mappings, the default number of arguments to a flag is
     * assumed to be 1 and not 0. </li>
     * <li> Furthermore, the list of arguments not bound to a flag is mapped to the "" property, not null </li>
     * <li> The special flags "-prop", "-props", "-properties", "-args", or "-arguments" will load the property file
     *      specified by its argument. </li>
     * <li> The value for flags without arguments is set to "true" </li>
     * <li> If a flag has multiple arguments, the value of the property is all
     * of the arguments joined together with a space (" ") character between them.</li>
     * <li> The value strings are trimmed so trailing spaces do not stop you from loading a file.</li>
     * </ul>
     * Properties are read from left to right, and later properties will override earlier ones with the same name.
     * Properties loaded from a Properties file with the special args are defaults that can be overridden by command line
     * flags (or earlier Properties files if there is nested usage of the special args.
     *
     * @param args Command line arguments
     * @param flagsToNumArgs Map of how many arguments flags should have. The keys are without the minus signs.
     * @return A Properties object representing the arguments.
     */
    public static Properties argsToProperties(String[] args, Map<String, Integer> flagsToNumArgs) {
        Properties result = new Properties();
        List<String> remainingArgs = new ArrayList<>();
        for (int i = 0; i < args.length; i++) {
            String key = args[i];
            if (!key.isEmpty() && key.charAt(0) == '-') { // found a flag
                if (key.length() > 1 && key.charAt(1) == '-') {
                    key = key.substring(2); // strip off 2 hyphens
                } else {
                    key = key.substring(1); // strip off the hyphen
                }

                Integer maxFlagArgs = flagsToNumArgs.get(key);
                int max = maxFlagArgs == null ? 1 : maxFlagArgs;
                int min = maxFlagArgs == null ? 0 : maxFlagArgs;
                if (maxFlagArgs != null && maxFlagArgs == 0 && i < args.length - 1
                        && ("true".equalsIgnoreCase(args[i + 1]) || "false".equalsIgnoreCase(args[i + 1]))) {
                    max = 1; // case: we're reading a boolean flag. TODO(gabor) there's gotta be a better way...
                }
                List<String> flagArgs = new ArrayList<>();
                // cdm oct 2007: add length check to allow for empty string argument!
                for (int j = 0; j < max && i + 1 < args.length
                        && (j < min || args[i + 1].isEmpty() || args[i + 1].charAt(0) != '-'); i++, j++) {
                    flagArgs.add(args[i + 1]);
                }
                String value;
                if (flagArgs.isEmpty()) {
                    value = "true";
                } else {
                    value = join(flagArgs, " ");
                }
                if (key.equalsIgnoreCase(PROP) || key.equalsIgnoreCase(PROPS) || key.equalsIgnoreCase(PROPERTIES)
                        || key.equalsIgnoreCase(ARGUMENTS) || key.equalsIgnoreCase(ARGS)
                        || key.equalsIgnoreCase(LANG)) {
                    result.setProperty(PROPERTIES, value);
                } else {
                    result.setProperty(key, value);
                }
            } else {
                remainingArgs.add(args[i]);
            }
        }
        if (!remainingArgs.isEmpty()) {
            result.setProperty("", join(remainingArgs, " "));
        }

        /* Processing in reverse order, add properties that aren't present only. Thus, later ones override earlier ones. */
        while (result.containsKey(PROPERTIES)) {
            String file = result.getProperty(PROPERTIES);
            if (LanguageInfo.isStanfordCoreNLPSupportedLang(file))
                file = LanguageInfo.getLanguagePropertiesFile(file);
            result.remove(PROPERTIES);
            Properties toAdd = new Properties();
            BufferedReader reader = null;
            try {
                reader = IOUtils.readerFromString(file);
                toAdd.load(reader);
                // trim all values
                for (String propKey : toAdd.stringPropertyNames()) {
                    String newVal = toAdd.getProperty(propKey);
                    toAdd.setProperty(propKey, newVal.trim());
                }
            } catch (IOException e) {
                String msg = "argsToProperties could not read properties file: " + file;
                throw new RuntimeIOException(msg, e);
            } finally {
                IOUtils.closeIgnoringExceptions(reader);
            }

            for (String key : toAdd.stringPropertyNames()) {
                String val = toAdd.getProperty(key);
                if (!result.containsKey(key)) {
                    result.setProperty(key, val);
                }
            }
        }

        return result;
    }

    /**
     * This method reads in properties listed in a file in the format prop=value, one property per line.
     * Although {@code Properties.load(InputStream)} exists, I implemented this method to trim the lines,
     * something not implemented in the {@code load()} method.
     *
     * @param filename A properties file to read
     * @return The corresponding Properties object
     */
    public static Properties propFileToProperties(String filename) {
        try {
            InputStream is = new BufferedInputStream(new FileInputStream(filename));
            Properties result = new Properties();
            result.load(is);
            // trim all values
            for (String propKey : result.stringPropertyNames()) {
                String newVal = result.getProperty(propKey);
                result.setProperty(propKey, newVal.trim());
            }
            is.close();
            return result;
        } catch (IOException e) {
            throw new RuntimeIOException("propFileToProperties could not read properties file: " + filename, e);
        }
    }

    /**
     * This method converts a comma-separated String (with whitespace
     * optionally allowed after the comma) representing properties
     * to a Properties object.  Each property is "property=value".  The value
     * for properties without an explicitly given value is set to "true". This can be used for a 2nd level
     * of properties, for example, when you have a commandline argument like "-outputOptions style=xml,tags".
     */
    public static Properties stringToProperties(String str) {
        Properties result = new Properties();
        return stringToProperties(str, result);
    }

    /**
     * This method updates a Properties object based on
     * a comma-separated String (with whitespace
     * optionally allowed after the comma) representing properties
     * to a Properties object.  Each property is "property=value".  The value
     * for properties without an explicitly given value is set to "true".
     */
    public static Properties stringToProperties(String str, Properties props) {
        String[] propsStr = str.trim().split(",\\s*");
        for (String term : propsStr) {
            int divLoc = term.indexOf('=');
            String key;
            String value;
            if (divLoc >= 0) {
                key = term.substring(0, divLoc).trim();
                value = term.substring(divLoc + 1).trim();
            } else {
                key = term.trim();
                value = "true";
            }
            props.setProperty(key, value);
        }
        return props;
    }

    /**
     * If any of the given list of properties are not found, returns the
     * name of that property.  Otherwise, returns null.
     */
    public static String checkRequiredProperties(Properties props, String... requiredProps) {
        for (String required : requiredProps) {
            if (props.getProperty(required) == null) {
                return required;
            }
        }
        return null;
    }

    /**
     * Prints to a file.  If the file already exists, appends if
     * {@code append=true}, and overwrites if {@code append=false}.
     */
    public static void printToFile(File file, String message, boolean append, boolean printLn, String encoding) {
        PrintWriter pw = null;
        try {
            Writer fw;
            if (encoding != null) {
                fw = new OutputStreamWriter(new FileOutputStream(file, append), encoding);
            } else {
                fw = new FileWriter(file, append);
            }
            pw = new PrintWriter(fw);
            if (printLn) {
                pw.println(message);
            } else {
                pw.print(message);
            }
        } catch (Exception e) {
            log.warn("Exception: in printToFile " + file.getAbsolutePath());
            log.warn(e);
        } finally {
            if (pw != null) {
                pw.flush();
                pw.close();
            }
        }
    }

    /**
     * Prints to a file.  If the file already exists, appends if
     * {@code append=true}, and overwrites if {@code append=false}.
     */
    public static void printToFileLn(File file, String message, boolean append) {
        PrintWriter pw = null;
        try {
            Writer fw = new FileWriter(file, append);
            pw = new PrintWriter(fw);
            pw.println(message);
        } catch (Exception e) {
            log.warn("Exception: in printToFileLn " + file.getAbsolutePath() + ' ' + message);
            log.warn(e);
        } finally {
            if (pw != null) {
                pw.flush();
                pw.close();
            }
        }
    }

    /**
     * Prints to a file.  If the file already exists, appends if
     * {@code append=true}, and overwrites if {@code append=false}.
     */
    public static void printToFile(File file, String message, boolean append) {
        PrintWriter pw = null;
        try {
            Writer fw = new FileWriter(file, append);
            pw = new PrintWriter(fw);
            pw.print(message);
        } catch (Exception e) {
            throw new RuntimeIOException("Exception in printToFile " + file.getAbsolutePath(), e);
        } finally {
            IOUtils.closeIgnoringExceptions(pw);
        }
    }

    /**
     * Prints to a file.  If the file does not exist, rewrites the file;
     * does not append.
     */
    public static void printToFile(File file, String message) {
        printToFile(file, message, false);
    }

    /**
     * Prints to a file.  If the file already exists, appends if
     * {@code append=true}, and overwrites if {@code append=false}.
     */
    public static void printToFile(String filename, String message, boolean append) {
        printToFile(new File(filename), message, append);
    }

    /**
     * Prints to a file.  If the file already exists, appends if
     * {@code append=true}, and overwrites if {@code append=false}.
     */
    public static void printToFileLn(String filename, String message, boolean append) {
        printToFileLn(new File(filename), message, append);
    }

    /**
     * Prints to a file.  If the file does not exist, rewrites the file;
     * does not append.
     */
    public static void printToFile(String filename, String message) {
        printToFile(new File(filename), message, false);
    }

    /**
     * A simpler form of command line argument parsing.
     * Dan thinks this is highly superior to the overly complexified code that
     * comes before it.
     * Parses command line arguments into a Map. Arguments of the form
     * -flag1 arg1 -flag2 -flag3 arg3
     * will be parsed so that the flag is a key in the Map (including the hyphen)
     * and the
     * optional argument will be its value (if present).
     *
     * @return A Map from keys to possible values (String or null)
     */
    @SuppressWarnings("unchecked")
    public static Map<String, String> parseCommandLineArguments(String[] args) {
        return (Map) parseCommandLineArguments(args, false);
    }

    /**
     * A simpler form of command line argument parsing.
     * Dan thinks this is highly superior to the overly complexified code that
     * comes before it.
     * Parses command line arguments into a Map. Arguments of the form
     * -flag1 arg1 -flag2 -flag3 arg3
     * will be parsed so that the flag is a key in the Map (including the hyphen)
     * and the
     * optional argument will be its value (if present).
     * In this version, if the argument is numeric, it will be a Double value
     * in the map, not a String.
     *
     * @return A Map from keys to possible values (String or null)
     */
    public static Map<String, Object> parseCommandLineArguments(String[] args, boolean parseNumbers) {
        Map<String, Object> result = Generics.newHashMap();
        for (int i = 0; i < args.length; i++) {
            String key = args[i];
            if (key.charAt(0) == '-') {
                if (i + 1 < args.length) {
                    String value = args[i + 1];
                    if (value.charAt(0) != '-') {
                        if (parseNumbers) {
                            Object numericValue = value;
                            try {
                                numericValue = Double.parseDouble(value);
                            } catch (NumberFormatException e2) {
                                // ignore
                            }
                            result.put(key, numericValue);
                        } else {
                            result.put(key, value);
                        }
                        i++;
                    } else {
                        result.put(key, null);
                    }
                } else {
                    result.put(key, null);
                }
            }
        }
        return result;
    }

    public static String stripNonAlphaNumerics(String orig) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < orig.length(); i++) {
            char c = orig.charAt(i);
            if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static String stripSGML(String orig) {
        Pattern sgmlPattern = Pattern.compile("<.*?>", Pattern.DOTALL);
        Matcher sgmlMatcher = sgmlPattern.matcher(orig);
        return sgmlMatcher.replaceAll("");
    }

    public static void printStringOneCharPerLine(String s) {
        for (int i = 0; i < s.length(); i++) {
            int c = s.charAt(i);
            System.out.println(c + " \'" + (char) c + "\' ");
        }
    }

    public static String escapeString(String s, char[] charsToEscape, char escapeChar) {
        StringBuilder result = new StringBuilder();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (c == escapeChar) {
                result.append(escapeChar);
            } else {
                for (char charToEscape : charsToEscape) {
                    if (c == charToEscape) {
                        result.append(escapeChar);
                        break;
                    }
                }
            }
            result.append(c);
        }
        return result.toString();
    }

    /**
     * This function splits the String s into multiple Strings using the
     * splitChar.  However, it provides a quoting facility: it is possible to
     * quote strings with the quoteChar.
     * If the quoteChar occurs within the quotedExpression, it must be prefaced
     * by the escapeChar.
     * This routine can be useful for processing a line of a CSV file.
     *
     * @param s         The String to split into fields. Cannot be null.
     * @param splitChar The character to split on
     * @param quoteChar The character to quote items with
     * @param escapeChar The character to escape the quoteChar with
     * @return An array of Strings that s is split into
     */
    public static String[] splitOnCharWithQuoting(String s, char splitChar, char quoteChar, char escapeChar) {
        // todo [cdm 2018]: rewrite this to use code points so can work with any Unicode characters
        List<String> result = new ArrayList<>();
        int i = 0;
        int length = s.length();
        StringBuilder b = new StringBuilder();
        while (i < length) {
            char curr = s.charAt(i);
            if (curr == splitChar) {
                // add last buffer
                // cdm 2014: Do this even if the field is empty!
                // if (b.length() > 0) {
                result.add(b.toString());
                b = new StringBuilder();
                // }
                i++;
            } else if (curr == quoteChar) {
                // find next instance of quoteChar
                i++;
                while (i < length) {
                    curr = s.charAt(i);
                    // mrsmith: changed this condition from
                    // if (curr == escapeChar) {
                    if ((curr == escapeChar) && (i + 1 < length) && (s.charAt(i + 1) == quoteChar)) {
                        b.append(s.charAt(i + 1));
                        i += 2;
                    } else if (curr == quoteChar) {
                        i++;
                        break; // break this loop
                    } else {
                        b.append(s.charAt(i));
                        i++;
                    }
                }
            } else {
                b.append(curr);
                i++;
            }
        }
        // RFC 4180 disallows final comma. At any rate, don't produce a field after it unless non-empty
        if (b.length() > 0) {
            result.add(b.toString());
        }
        return result.toArray(EMPTY_STRING_ARRAY);
    }

    /**
     * Computes the longest common substring of s and t.
     * The longest common substring of a and b is the longest run of
     * characters that appear in order inside both a and b. Both a and b
     * may have other extraneous characters along the way. This is like
     * edit distance but with no substitution and a higher number means
     * more similar. For example, the LCS of "abcD" and "aXbc" is 3 (abc).
     */
    public static int longestCommonSubstring(String s, String t) {
        int[][] d; // matrix
        int n; // length of s
        int m; // length of t
        int i; // iterates through s
        int j; // iterates through t
        // int cost; // cost
        // Step 1
        n = s.length();
        m = t.length();
        if (n == 0) {
            return 0;
        }
        if (m == 0) {
            return 0;
        }
        d = new int[n + 1][m + 1];
        // Step 2
        for (i = 0; i <= n; i++) {
            d[i][0] = 0;
        }
        for (j = 0; j <= m; j++) {
            d[0][j] = 0;
        }
        // Step 3
        for (i = 1; i <= n; i++) {
            char s_i = s.charAt(i - 1); // ith character of s
            // Step 4
            for (j = 1; j <= m; j++) {
                char t_j = t.charAt(j - 1); // jth character of t
                // Step 5
                // js: if the chars match, you can get an extra point
                // otherwise you have to skip an insertion or deletion (no subs)
                if (s_i == t_j) {
                    d[i][j] = SloppyMath.max(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1] + 1);
                } else {
                    d[i][j] = Math.max(d[i - 1][j], d[i][j - 1]);
                }
            }
        }
        /* ----
          // num chars needed to display longest num
          int numChars = (int) Math.ceil(Math.log(d[n][m]) / Math.log(10));
          for (i = 0; i < numChars + 3; i++) {
            log.info(' ');
          }
          for (j = 0; j < m; j++) {
            log.info(t.charAt(j) + " ");
          }
          log.info();
          for (i = 0; i <= n; i++) {
            log.info((i == 0 ? ' ' : s.charAt(i - 1)) + " ");
            for (j = 0; j <= m; j++) {
              log.info(d[i][j] + " ");
            }
            log.info();
          }
        ---- */
        // Step 7
        return d[n][m];
    }

    /**
     * Computes the longest common contiguous substring of s and t.
     * The LCCS is the longest run of characters that appear consecutively in
     * both s and t. For instance, the LCCS of "color" and "colour" is 4, because
     * of "colo".
     */
    public static int longestCommonContiguousSubstring(String s, String t) {
        if (s.isEmpty() || t.isEmpty()) {
            return 0;
        }
        int M = s.length();
        int N = t.length();
        int[][] d = new int[M + 1][N + 1];
        for (int j = 0; j <= N; j++) {
            d[0][j] = 0;
        }
        for (int i = 0; i <= M; i++) {
            d[i][0] = 0;
        }

        int max = 0;
        for (int i = 1; i <= M; i++) {
            for (int j = 1; j <= N; j++) {
                if (s.charAt(i - 1) == t.charAt(j - 1)) {
                    d[i][j] = d[i - 1][j - 1] + 1;
                } else {
                    d[i][j] = 0;
                }

                if (d[i][j] > max) {
                    max = d[i][j];
                }
            }
        }
        // log.info("LCCS(" + s + "," + t + ") = " + max);
        return max;
    }

    /**
     * Computes the Levenshtein (edit) distance of the two given Strings.
     * This method doesn't allow transposition, so one character transposed between two strings has a cost of 2 (one insertion, one deletion).
     * The EditDistance class also implements the Levenshtein distance, but does allow transposition.
     */
    public static int editDistance(String s, String t) {
        // Step 1
        int n = s.length(); // length of s
        int m = t.length(); // length of t
        if (n == 0) {
            return m;
        }
        if (m == 0) {
            return n;
        }
        int[][] d = new int[n + 1][m + 1]; // matrix
        // Step 2
        for (int i = 0; i <= n; i++) {
            d[i][0] = i;
        }
        for (int j = 0; j <= m; j++) {
            d[0][j] = j;
        }
        // Step 3
        for (int i = 1; i <= n; i++) {
            char s_i = s.charAt(i - 1); // ith character of s
            // Step 4
            for (int j = 1; j <= m; j++) {
                char t_j = t.charAt(j - 1); // jth character of t
                // Step 5
                int cost; // cost
                if (s_i == t_j) {
                    cost = 0;
                } else {
                    cost = 1;
                }
                // Step 6
                d[i][j] = SloppyMath.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
            }
        }

        // Step 7
        return d[n][m];
    }

    /**
     * Computes the WordNet 2.0 POS tag corresponding to the PTB POS tag s.
     *
     * @param s a Penn TreeBank POS tag.
     */
    public static String pennPOSToWordnetPOS(String s) {
        if (s.matches("NN|NNP|NNS|NNPS")) {
            return "noun";
        }
        if (s.matches("VB|VBD|VBG|VBN|VBZ|VBP|MD")) {
            return "verb";
        }
        if (s.matches("JJ|JJR|JJS|CD")) {
            return "adjective";
        }
        if (s.matches("RB|RBR|RBS|RP|WRB")) {
            return "adverb";
        }
        return null;
    }

    /**
     * Returns a short class name for an object.
     * This is the class name stripped of any package name.
     *
     * @return The name of the class minus a package name, for example {@code ArrayList}
     */
    public static String getShortClassName(Object o) {
        if (o == null) {
            return "null";
        }
        String name = o.getClass().getName();
        int index = name.lastIndexOf('.');
        if (index >= 0) {
            name = name.substring(index + 1);
        }
        return name;
    }

    /**
     * Converts a tab delimited string into an object with given fields
     * Requires the object has setXxx functions for the specified fields
     *
     * @param objClass Class of object to be created
     * @param str string to convert
     * @param delimiterRegex delimiter regular expression
     * @param fieldNames fieldnames
     * @param <T> type to return
     * @return Object created from string
     */
    public static <T> T columnStringToObject(Class objClass, String str, String delimiterRegex, String[] fieldNames)
            throws InstantiationException, IllegalAccessException, NoSuchFieldException, NoSuchMethodException,
            InvocationTargetException {
        Pattern delimiterPattern = Pattern.compile(delimiterRegex);
        return StringUtils.columnStringToObject(objClass, str, delimiterPattern, fieldNames);
    }

    /**
     * Converts a tab delimited string into an object with given fields
     * Requires the object has public access for the specified fields
     *
     * @param objClass Class of object to be created
     * @param str string to convert
     * @param delimiterPattern delimiter
     * @param fieldNames fieldnames
     * @param <T> type to return
     * @return Object created from string
     */
    public static <T> T columnStringToObject(Class<?> objClass, String str, Pattern delimiterPattern,
            String[] fieldNames) throws InstantiationException, IllegalAccessException, NoSuchMethodException,
            NoSuchFieldException, InvocationTargetException {
        String[] fields = delimiterPattern.split(str);
        T item = ErasureUtils.uncheckedCast(objClass.newInstance());
        for (int i = 0; i < fields.length; i++) {
            try {
                Field field = objClass.getDeclaredField(fieldNames[i]);
                field.set(item, fields[i]);
            } catch (IllegalAccessException ex) {
                Method method = objClass.getDeclaredMethod("set" + StringUtils.capitalize(fieldNames[i]),
                        String.class);
                method.invoke(item, fields[i]);
            }
        }
        return item;
    }

    /**
     * Converts an object into a tab delimited string with given fields
     * Requires the object has public access for the specified fields
     *
     * @param object Object to convert
     * @param delimiter delimiter
     * @param fieldNames fieldnames
     * @return String representing object
     */
    public static String objectToColumnString(Object object, String delimiter, String[] fieldNames)
            throws IllegalAccessException, NoSuchFieldException, NoSuchMethodException, InvocationTargetException {
        StringBuilder sb = new StringBuilder();
        for (String fieldName : fieldNames) {
            if (sb.length() > 0) {
                sb.append(delimiter);
            }
            try {
                Field field = object.getClass().getDeclaredField(fieldName);
                sb.append(field.get(object));
            } catch (IllegalAccessException ex) {
                Method method = object.getClass().getDeclaredMethod("get" + StringUtils.capitalize(fieldName));
                sb.append(method.invoke(object));
            }
        }
        return sb.toString();
    }

    /**
     * Uppercases the first character of a string.
     *
     * @param s a string to capitalize
     * @return a capitalized version of the string
     */
    public static String capitalize(String s) {
        if (Character.isLowerCase(s.charAt(0))) {
            return Character.toUpperCase(s.charAt(0)) + s.substring(1);
        } else {
            return s;
        }
    }

    /**
     * Check if a string begins with an uppercase.
     *
     * @param s a string
     * @return true if the string is capitalized
     *         false otherwise
     */
    public static boolean isCapitalized(String s) {
        return (Character.isUpperCase(s.charAt(0)));
    }

    public static String toTitleCase(String s) {
        StringBuilder sb = new StringBuilder();
        int off = 0;
        int len = s.length();
        char prevChar = ' ';
        while (off < len) {
            char currChar = (char) s.codePointAt(off);
            if (Character.isWhitespace(prevChar) && !Character.isWhitespace(currChar)) {
                sb.append(Character.toUpperCase(currChar));
            } else {
                sb.append(Character.toLowerCase(currChar));
            }
            prevChar = currChar;
            off += Character.charCount(currChar);
        }
        return sb.toString();
    }

    /**
     * Check if every word in a string begins with an uppercase and the rest is lower case (e.g. Title Case)
     *
     * @param s a string
     * @return true if the string is Title case
     *         false otherwise
     */
    public static boolean isTitleCase(String s) {
        String[] words = s.split("\\s+");
        for (String w : words) {
            if (!(Character.isUpperCase(w.charAt(0)) && (w.substring(1).toLowerCase().equals(w.substring(1))))) {
                return false;
            }
        }
        return true;
    }

    /**
     * Check if every non whitespace character is upper case (e.g. TITLE CASE)
     *
     * @param s a string
     * @return true if the string is Title case
     *         false otherwise
     */
    public static boolean isAllUpperCase(String s) {
        int off = 0;
        int len = s.length();
        while (off < len) {
            char currChar = (char) s.codePointAt(off);
            if (!Character.isUpperCase(currChar) && !Character.isWhitespace(currChar)) {
                return false;
            }
            off += Character.charCount(currChar);
        }
        return true;
    }

    public static String searchAndReplace(String text, String from, String to) {
        from = escapeString(from, new char[] { '.', '[', ']', '\\' }, '\\'); // special chars in regex
        Pattern p = Pattern.compile(from);
        Matcher m = p.matcher(text);
        return m.replaceAll(to);
    }

    /**
     * Returns an HTML table containing the matrix of Strings passed in.
     * The first dimension of the matrix should represent the rows, and the
     * second dimension the columns.
     */
    public static String makeHTMLTable(String[][] table, String[] rowLabels, String[] colLabels) {
        StringBuilder buff = new StringBuilder();
        buff.append("<table class=\"auto\" border=\"1\" cellspacing=\"0\">\n");
        // top row
        buff.append("<tr>\n");
        buff.append("<td></td>\n"); // the top left cell
        for (int j = 0; j < table[0].length; j++) { // assume table is a rectangular matrix
            buff.append("<td class=\"label\">").append(colLabels[j]).append("</td>\n");
        }
        buff.append("</tr>\n");
        // all other rows
        for (int i = 0; i < table.length; i++) {
            // one row
            buff.append("<tr>\n");
            buff.append("<td class=\"label\">").append(rowLabels[i]).append("</td>\n");
            for (int j = 0; j < table[i].length; j++) {
                buff.append("<td class=\"data\">");
                buff.append(((table[i][j] != null) ? table[i][j] : ""));
                buff.append("</td>\n");
            }
            buff.append("</tr>\n");
        }
        buff.append("</table>");
        return buff.toString();
    }

    /**
     * Returns a text table containing the matrix of objects passed in.
     * The first dimension of the matrix should represent the rows, and the
     * second dimension the columns. Each object is printed in a cell with toString().
     * The printing may be padded with spaces on the left and then on the right to
     * ensure that the String form is of length at least padLeft or padRight.
     * If tsv is true, a tab is put between columns.
     *
     * @return A String form of the table
     */
    public static String makeTextTable(Object[][] table, Object[] rowLabels, Object[] colLabels, int padLeft,
            int padRight, boolean tsv) {
        StringBuilder buff = new StringBuilder();
        if (colLabels != null) {
            // top row
            buff.append(makeAsciiTableCell("", padLeft, padRight, tsv)); // the top left cell
            for (int j = 0; j < table[0].length; j++) { // assume table is a rectangular matrix
                buff.append(makeAsciiTableCell(colLabels[j], padLeft, padRight, (j != table[0].length - 1) && tsv));
            }
            buff.append('\n');
        }
        // all other rows
        for (int i = 0; i < table.length; i++) {
            // one row
            if (rowLabels != null) {
                buff.append(makeAsciiTableCell(rowLabels[i], padLeft, padRight, tsv));
            }
            for (int j = 0; j < table[i].length; j++) {
                buff.append(makeAsciiTableCell(table[i][j], padLeft, padRight, (j != table[0].length - 1) && tsv));
            }
            buff.append('\n');
        }
        return buff.toString();
    }

    /** The cell String is the string representation of the object.
     *  If padLeft is greater than 0, it is padded. Ditto right
     *
     */
    private static String makeAsciiTableCell(Object obj, int padLeft, int padRight, boolean tsv) {
        String result = obj.toString();
        if (padLeft > 0) {
            result = padLeft(result, padLeft);
        }
        if (padRight > 0) {
            result = pad(result, padRight);
        }
        if (tsv) {
            result = result + '\t';
        }
        return result;
    }

    /**
     * Tests the string edit distance function.
     */
    public static void main(String[] args) {

        String[] s = { "there once was a man", "this one is a manic", "hey there", "there once was a mane",
                "once in a manger.", "where is one match?", "Jo3seph Smarr!", "Joseph R Smarr" };
        for (int i = 0; i < 8; i++) {
            for (int j = 0; j < 8; j++) {
                System.out.println("s1: " + s[i]);
                System.out.println("s2: " + s[j]);
                System.out.println("edit distance: " + editDistance(s[i], s[j]));
                System.out.println("LCS:           " + longestCommonSubstring(s[i], s[j]));
                System.out.println("LCCS:          " + longestCommonContiguousSubstring(s[i], s[j]));
                System.out.println();
            }
        }
    }

    public static String toAscii(String s) {
        StringBuilder b = new StringBuilder();
        for (int i = 0; i < s.length(); i++) {
            char c = s.charAt(i);
            if (c > 127) {
                String result = "?";
                if (c >= 0x00c0 && c <= 0x00c5) {
                    result = "A";
                } else if (c == 0x00c6) {
                    result = "AE";
                } else if (c == 0x00c7) {
                    result = "C";
                } else if (c >= 0x00c8 && c <= 0x00cb) {
                    result = "E";
                } else if (c >= 0x00cc && c <= 0x00cf) {
                    result = "F";
                } else if (c == 0x00d0) {
                    result = "D";
                } else if (c == 0x00d1) {
                    result = "N";
                } else if (c >= 0x00d2 && c <= 0x00d6) {
                    result = "O";
                } else if (c == 0x00d7) {
                    result = "x";
                } else if (c == 0x00d8) {
                    result = "O";
                } else if (c >= 0x00d9 && c <= 0x00dc) {
                    result = "U";
                } else if (c == 0x00dd) {
                    result = "Y";
                } else if (c >= 0x00e0 && c <= 0x00e5) {
                    result = "a";
                } else if (c == 0x00e6) {
                    result = "ae";
                } else if (c == 0x00e7) {
                    result = "c";
                } else if (c >= 0x00e8 && c <= 0x00eb) {
                    result = "e";
                } else if (c >= 0x00ec && c <= 0x00ef) {
                    result = "i";
                } else if (c == 0x00f1) {
                    result = "n";
                } else if (c >= 0x00f2 && c <= 0x00f8) {
                    result = "o";
                } else if (c >= 0x00f9 && c <= 0x00fc) {
                    result = "u";
                } else if (c >= 0x00fd && c <= 0x00ff) {
                    result = "y";
                } else if (c >= 0x2018 && c <= 0x2019) {
                    result = "\'";
                } else if (c >= 0x201c && c <= 0x201e) {
                    result = "\"";
                } else if (c >= 0x0213 && c <= 0x2014) {
                    result = "-";
                } else if (c >= 0x00A2 && c <= 0x00A5) {
                    result = "$";
                } else if (c == 0x2026) {
                    result = ".";
                }
                b.append(result);
            } else {
                b.append(c);
            }
        }
        return b.toString();
    }

    public static String toCSVString(String[] fields) {
        StringBuilder b = new StringBuilder();
        for (String fld : fields) {
            if (b.length() > 0) {
                b.append(',');
            }
            String field = escapeString(fld, new char[] { '\"' }, '\"'); // escape quotes with double quotes
            b.append('\"').append(field).append('\"');
        }
        return b.toString();
    }

    /**
     * Swap any occurrences of any characters in the from String in the input String with
     * the corresponding character from the to String.  As Perl tr, for example,
     * tr("chris", "irs", "mop").equals("chomp"), except it does not
     * support regular expression character ranges.
     * <p>
     * <i>Note:</i> This is now optimized to not allocate any objects if the
     * input is returned unchanged.
     */
    public static String tr(String input, String from, String to) {
        assert from.length() == to.length();
        StringBuilder sb = null;
        int len = input.length();
        for (int i = 0; i < len; i++) {
            int ind = from.indexOf(input.charAt(i));
            if (ind >= 0) {
                if (sb == null) {
                    sb = new StringBuilder(input);
                }
                sb.setCharAt(i, to.charAt(ind));
            }
        }
        if (sb == null) {
            return input;
        } else {
            return sb.toString();
        }
    }

    /**
     * Returns the supplied string with any trailing '\n' or '\r\n' removed.
     */
    public static String chomp(String s) {
        if (s == null) {
            return null;
        }
        int l_1 = s.length() - 1;
        if (l_1 >= 0 && s.charAt(l_1) == '\n') {
            int l_2 = l_1 - 1;
            if (l_2 >= 0 && s.charAt(l_2) == '\r') {
                return s.substring(0, l_2);
            } else {
                return s.substring(0, l_1);
            }
        } else {
            return s;
        }
    }

    /**
     * Returns the result of calling toString() on the supplied Object, but with
     * any trailing '\n' or '\r\n' removed.
     */
    public static String chomp(Object o) {
        return chomp(o.toString());
    }

    /**
     * Strip directory from filename.  Like Unix 'basename'. <br>
     *
     * Example: {@code getBaseName("/u/wcmac/foo.txt") ==> "foo.txt"}
     */
    public static String getBaseName(String fileName) {
        return getBaseName(fileName, "");
    }

    /**
     * Strip directory and suffix from filename.  Like Unix 'basename'.
     *
     * Example: {@code getBaseName("/u/wcmac/foo.txt", "") ==> "foo.txt"}<br>
     * Example: {@code getBaseName("/u/wcmac/foo.txt", ".txt") ==> "foo"}<br>
     * Example: {@code getBaseName("/u/wcmac/foo.txt", ".pdf") ==> "foo.txt"}<br>
     */
    public static String getBaseName(String fileName, String suffix) {
        return getBaseName(fileName, suffix, "/");
    }

    /**
     * Strip directory and suffix from the given name.  Like Unix 'basename'.
     *
     * Example: {@code getBaseName("/tmp/foo/bar/foo", "", "/") ==> "foo"}<br>
     * Example: {@code getBaseName("edu.stanford.nlp", "", "\\.") ==> "nlp"}<br>
     */
    public static String getBaseName(String fileName, String suffix, String sep) {
        String[] elts = fileName.split(sep);
        if (elts.length == 0)
            return "";
        String lastElt = elts[elts.length - 1];
        if (lastElt.endsWith(suffix)) {
            lastElt = lastElt.substring(0, lastElt.length() - suffix.length());
        }
        return lastElt;
    }

    /**
     * Given a String the method uses Regex to check if the String only contains alphabet characters
     *
     * @param s a String to check using regex
     * @return true if the String is valid
     */
    public static boolean isAlpha(String s) {
        Pattern p = Pattern.compile("^[\\p{Alpha}\\s]+$");
        Matcher m = p.matcher(s);
        return m.matches();
    }

    /**
     * Given a String the method uses Regex to check if the String only contains numeric characters
     *
     * @param s a String to check using regex
     * @return true if the String is valid
     */
    public static boolean isNumeric(String s) {
        Pattern p = Pattern.compile("^[\\p{Digit}\\s\\.]+$");
        Matcher m = p.matcher(s);
        return m.matches();
    }

    /**
     * Given a String the method uses Regex to check if the String only contains alphanumeric characters
     *
     * @param s a String to check using regex
     * @return true if the String is valid
     */
    public static boolean isAlphanumeric(String s) {
        Pattern p = Pattern.compile("^[\\p{Alnum}\\s\\.]+$");
        Matcher m = p.matcher(s);
        return m.matches();
    }

    /**
     * Given a String the method uses Regex to check if the String only contains punctuation characters
     *
     * @param s a String to check using regex
     * @return true if the String is valid
     */
    public static boolean isPunct(String s) {
        Pattern p = Pattern.compile("^[\\p{Punct}]+$");
        Matcher m = p.matcher(s);
        return m.matches();
    }

    /**
     * Given a String the method uses Regex to check if the String looks like an acronym
     *
     * @param s a String to check using regex
     * @return true if the String is valid
     */
    public static boolean isAcronym(String s) {
        Pattern p = Pattern.compile("^[\\p{Upper}]+$");
        Matcher m = p.matcher(s);
        return m.matches();
    }

    public static String getNotNullString(String s) {
        if (s == null)
            return "";
        else
            return s;
    }

    /** Returns whether a String is either null or empty.
     *  (Copies the Guava method for this.)
     *
     *  @param str The String to test
     *  @return Whether the String is either null or empty
     */
    public static boolean isNullOrEmpty(String str) {
        return str == null || str.isEmpty();
    }

    /**
     * Resolve variable. If it is the props file, then substitute that variable with
     * the value mentioned in the props file, otherwise look for the variable in the environment variables.
     * If the variable is not found then substitute it for empty string.
     */
    public static String resolveVars(String str, Map props) {
        if (str == null)
            return null;
        // ${VAR_NAME} or $VAR_NAME
        Pattern p = Pattern.compile("\\$\\{(\\w+)\\}");
        Matcher m = p.matcher(str);
        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            String varName = null == m.group(1) ? m.group(2) : m.group(1);
            String vrValue;
            //either in the props file
            if (props.containsKey(varName)) {
                vrValue = ((String) props.get(varName));
            } else {
                //or as the environment variable
                vrValue = System.getenv(varName);
            }
            m.appendReplacement(sb, null == vrValue ? "" : vrValue);
        }
        m.appendTail(sb);
        return sb.toString();
    }

    /**
     * convert args to properties with variable names resolved. for each value
     * having a ${VAR} or $VAR, its value is first resolved using the variables
     * listed in the props file, and if not found then using the environment
     * variables. if the variable is not found then substitute it for empty string
     */
    public static Properties argsToPropertiesWithResolve(String[] args) {
        LinkedHashMap<String, String> result = new LinkedHashMap<>();
        Map<String, String> existingArgs = new LinkedHashMap<>();

        for (int i = 0; i < args.length; i++) {
            String key = args[i];
            if (key.length() > 0 && key.charAt(0) == '-') { // found a flag
                if (key.length() > 1 && key.charAt(1) == '-')
                    key = key.substring(2); // strip off 2 hyphens
                else
                    key = key.substring(1); // strip off the hyphen

                int max = 1;
                int min = 0;
                List<String> flagArgs = new ArrayList<>();
                // cdm oct 2007: add length check to allow for empty string argument!
                for (int j = 0; j < max && i + 1 < args.length
                        && (j < min || args[i + 1].isEmpty() || args[i + 1].charAt(0) != '-'); i++, j++) {
                    flagArgs.add(args[i + 1]);
                }

                if (flagArgs.isEmpty()) {
                    existingArgs.put(key, "true");
                } else {

                    if (key.equalsIgnoreCase(PROP) || key.equalsIgnoreCase(PROPS)
                            || key.equalsIgnoreCase(PROPERTIES) || key.equalsIgnoreCase(ARGUMENTS)
                            || key.equalsIgnoreCase(ARGS)) {
                        for (String flagArg : flagArgs)
                            result.putAll(propFileToLinkedHashMap(flagArg, existingArgs));

                        existingArgs.clear();
                    } else
                        existingArgs.put(key, join(flagArgs, " "));
                }
            }
        }
        result.putAll(existingArgs);

        for (Entry<String, String> o : result.entrySet()) {
            String val = resolveVars(o.getValue(), result);
            result.put(o.getKey(), val);
        }
        Properties props = new Properties();
        props.putAll(result);
        return props;
    }

    /**
     * This method reads in properties listed in a file in the format prop=value,
     * one property per line. and reads them into a LinkedHashMap (insertion order preserving)
     * Flags not having any arguments is set to "true".
     *
     * @param filename A properties file to read
     * @return The corresponding LinkedHashMap where the ordering is the same as in the
     *         props file
     */
    public static LinkedHashMap<String, String> propFileToLinkedHashMap(String filename,
            Map<String, String> existingArgs) {

        LinkedHashMap<String, String> result = new LinkedHashMap<>(existingArgs);
        for (String l : IOUtils.readLines(filename)) {
            l = l.trim();
            if (l.isEmpty() || l.startsWith("#"))
                continue;
            int index = l.indexOf('=');

            if (index == -1)
                result.put(l, "true");
            else
                result.put(l.substring(0, index).trim(), l.substring(index + 1).trim());
        }
        return result;
    }

    /**
     * n grams for already split string. the ngrams are joined with a single space
     */
    public static Collection<String> getNgrams(List<String> words, int minSize, int maxSize) {
        List<List<String>> ng = CollectionUtils.getNGrams(words, minSize, maxSize);
        Collection<String> ngrams = new ArrayList<>();
        for (List<String> n : ng)
            ngrams.add(StringUtils.join(n, " "));

        return ngrams;
    }

    /**
     * n grams for already split string. the ngrams are joined with a single space
     */
    public static Collection<String> getNgramsFromTokens(List<CoreLabel> words, int minSize, int maxSize) {
        List<String> wordsStr = new ArrayList<>();
        for (CoreLabel l : words)
            wordsStr.add(l.word());
        List<List<String>> ng = CollectionUtils.getNGrams(wordsStr, minSize, maxSize);
        Collection<String> ngrams = new ArrayList<>();
        for (List<String> n : ng)
            ngrams.add(StringUtils.join(n, " "));

        return ngrams;
    }

    /**
     * The string is split on whitespace and the ngrams are joined with a single space
     */
    public static Collection<String> getNgramsString(String s, int minSize, int maxSize) {
        return getNgrams(Arrays.asList(s.split("\\s+")), minSize, maxSize);
    }

    /**
     * Build a list of character-based ngrams from the given string.
     */
    public static Collection<String> getCharacterNgrams(String s, int minSize, int maxSize) {
        Collection<String> ngrams = new ArrayList<>();
        int len = s.length();

        for (int i = 0; i < len; i++) {
            for (int ngramSize = minSize; ngramSize > 0 && ngramSize <= maxSize
                    && i + ngramSize <= len; ngramSize++) {
                ngrams.add(s.substring(i, i + ngramSize));
            }
        }

        return ngrams;
    }

    private static Pattern diacriticalMarksPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}");

    public static String normalize(String s) {
        // Normalizes string and strips diacritics (map to ascii) by
        // 1. taking the NFKD (compatibility decomposition -
        //   in compatibility equivalence, formatting such as subscripting is lost -
        //   see http://unicode.org/reports/tr15/)
        // 2. Removing diacriticals
        // 3. Recombining into NFKC form (compatibility composition)
        // This process may be slow.
        //
        // The main purpose of the function is to remove diacritics for asciis,
        //  but it may normalize other stuff as well.
        // A more conservative approach is to do explicit folding just for ascii character
        //   (see RuleBasedNameMatcher.normalize)
        String d = Normalizer.normalize(s, Normalizer.Form.NFKD);
        d = diacriticalMarksPattern.matcher(d).replaceAll("");
        return Normalizer.normalize(d, Normalizer.Form.NFKC);
    }

    /**
     * Convert a list of labels into a string, by simply joining them with spaces.
     * @param words The words to join.
     * @return A string representation of the sentence, tokenized by a single space.
     */
    public static String toString(List<CoreLabel> words) {
        return join(words.stream().map(CoreLabel::word), " ");
    }

    /**
     * Convert a CoreMap representing a sentence into a string, by simply joining them with spaces.
     * @param sentence The sentence to stringify.
     * @return A string representation of the sentence, tokenized by a single space.
     */
    public static String toString(CoreMap sentence) {
        return toString(sentence.get(CoreAnnotations.TokensAnnotation.class));
    }

    /** I shamefully stole this from: http://rosettacode.org/wiki/Levenshtein_distance#Java --Gabor */
    public static int levenshteinDistance(String s1, String s2) {
        s1 = s1.toLowerCase();
        s2 = s2.toLowerCase();

        int[] costs = new int[s2.length() + 1];
        for (int i = 0; i <= s1.length(); i++) {
            int lastValue = i;
            for (int j = 0; j <= s2.length(); j++) {
                if (i == 0)
                    costs[j] = j;
                else {
                    if (j > 0) {
                        int newValue = costs[j - 1];
                        if (s1.charAt(i - 1) != s2.charAt(j - 1))
                            newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
                        costs[j - 1] = lastValue;
                        lastValue = newValue;
                    }
                }
            }
            if (i > 0)
                costs[s2.length()] = lastValue;
        }
        return costs[s2.length()];
    }

    /** I shamefully stole this from: http://rosettacode.org/wiki/Levenshtein_distance#Java --Gabor */
    public static <E> int levenshteinDistance(E[] s1, E[] s2) {

        int[] costs = new int[s2.length + 1];
        for (int i = 0; i <= s1.length; i++) {
            int lastValue = i;
            for (int j = 0; j <= s2.length; j++) {
                if (i == 0)
                    costs[j] = j;
                else {
                    if (j > 0) {
                        int newValue = costs[j - 1];
                        if (!s1[i - 1].equals(s2[j - 1]))
                            newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1;
                        costs[j - 1] = lastValue;
                        lastValue = newValue;
                    }
                }
            }
            if (i > 0)
                costs[s2.length] = lastValue;
        }
        return costs[s2.length];
    }

    /**
     * Unescape an HTML string.
     * Taken from: http://stackoverflow.com/questions/994331/java-how-to-decode-html-character-entities-in-java-like-httputility-htmldecode
     * @param input The string to unescape
     * @return The unescaped String
     */
    public static String unescapeHtml3(final String input) {
        StringWriter writer = null;
        int len = input.length();
        int i = 1;
        int st = 0;
        while (true) {
            // look for '&'
            while (i < len && input.charAt(i - 1) != '&')
                i++;
            if (i >= len)
                break;

            // found '&', look for ';'
            int j = i;
            while (j < len && j < i + 6 + 1 && input.charAt(j) != ';')
                j++;
            if (j == len || j < i + 2 || j == i + 6 + 1) {
                i++;
                continue;
            }

            // found escape
            if (input.charAt(i) == '#') {
                // numeric escape
                int k = i + 1;
                int radix = 10;

                final char firstChar = input.charAt(k);
                if (firstChar == 'x' || firstChar == 'X') {
                    k++;
                    radix = 16;
                }

                try {
                    int entityValue = Integer.parseInt(input.substring(k, j), radix);

                    if (writer == null)
                        writer = new StringWriter(input.length());
                    writer.append(input.substring(st, i - 1));

                    if (entityValue > 0xFFFF) {
                        final char[] chrs = Character.toChars(entityValue);
                        writer.write(chrs[0]);
                        writer.write(chrs[1]);
                    } else {
                        writer.write(entityValue);
                    }

                } catch (NumberFormatException ex) {
                    i++;
                    continue;
                }
            } else {
                // named escape
                CharSequence value = htmlUnescapeLookupMap.get(input.substring(i, j));
                if (value == null) {
                    i++;
                    continue;
                }

                if (writer == null)
                    writer = new StringWriter(input.length());
                writer.append(input.substring(st, i - 1));

                writer.append(value);
            }

            // skip escape
            st = j + 1;
            i = st;
        }

        if (writer != null) {
            writer.append(input.substring(st, len));
            return writer.toString();
        }
        return input;
    }

    private static final String[][] HTML_ESCAPES = { { "\"", "quot" }, // " - double-quote
            { "&", "amp" }, // & - ampersand
            { "<", "lt" }, // < - less-than
            { ">", "gt" }, // > - greater-than
            { "-", "ndash" }, // - - dash

            // Mapping to escape ISO-8859-1 characters to their named HTML 3.x equivalents.
            { "\u00A0", "nbsp" }, // non-breaking space
            { "\u00A1", "iexcl" }, // inverted exclamation mark
            { "\u00A2", "cent" }, // cent sign
            { "\u00A3", "pound" }, // pound sign
            { "\u00A4", "curren" }, // currency sign
            { "\u00A5", "yen" }, // yen sign = yuan sign
            { "\u00A6", "brvbar" }, // broken bar = broken vertical bar
            { "\u00A7", "sect" }, // section sign
            { "\u00A8", "uml" }, // diaeresis = spacing diaeresis
            { "\u00A9", "copy" }, //  - copyright sign
            { "\u00AA", "ordf" }, // feminine ordinal indicator
            { "\u00AB", "laquo" }, // left-pointing double angle quotation mark = left pointing guillemet
            { "\u00AC", "not" }, // not sign
            { "\u00AD", "shy" }, // soft hyphen = discretionary hyphen
            { "\u00AE", "reg" }, //  - registered trademark sign
            { "\u00AF", "macr" }, // macron = spacing macron = overline = APL overbar
            { "\u00B0", "deg" }, // degree sign
            { "\u00B1", "plusmn" }, // plus-minus sign = plus-or-minus sign
            { "\u00B2", "sup2" }, // superscript two = superscript digit two = squared
            { "\u00B3", "sup3" }, // superscript three = superscript digit three = cubed
            { "\u00B4", "acute" }, // acute accent = spacing acute
            { "\u00B5", "micro" }, // micro sign
            { "\u00B6", "para" }, // pilcrow sign = paragraph sign
            { "\u00B7", "middot" }, // middle dot = Georgian comma = Greek middle dot
            { "\u00B8", "cedil" }, // cedilla = spacing cedilla
            { "\u00B9", "sup1" }, // superscript one = superscript digit one
            { "\u00BA", "ordm" }, // masculine ordinal indicator
            { "\u00BB", "raquo" }, // right-pointing double angle quotation mark = right pointing guillemet
            { "\u00BC", "frac14" }, // vulgar fraction one quarter = fraction one quarter
            { "\u00BD", "frac12" }, // vulgar fraction one half = fraction one half
            { "\u00BE", "frac34" }, // vulgar fraction three quarters = fraction three quarters
            { "\u00BF", "iquest" }, // inverted question mark = turned question mark
            { "\u00C0", "Agrave" }, // ? - uppercase A, grave accent
            { "\u00C1", "Aacute" }, //  - uppercase A, acute accent
            { "\u00C2", "Acirc" }, //  - uppercase A, circumflex accent
            { "\u00C3", "Atilde" }, //  - uppercase A, tilde
            { "\u00C4", "Auml" }, //  - uppercase A, umlaut
            { "\u00C5", "Aring" }, //  - uppercase A, ring
            { "\u00C6", "AElig" }, //  - uppercase AE
            { "\u00C7", "Ccedil" }, //  - uppercase C, cedilla
            { "\u00C8", "Egrave" }, //  - uppercase E, grave accent
            { "\u00C9", "Eacute" }, //  - uppercase E, acute accent
            { "\u00CA", "Ecirc" }, //  - uppercase E, circumflex accent
            { "\u00CB", "Euml" }, //  - uppercase E, umlaut
            { "\u00CC", "Igrave" }, //  - uppercase I, grave accent
            { "\u00CD", "Iacute" }, // ? - uppercase I, acute accent
            { "\u00CE", "Icirc" }, //  - uppercase I, circumflex accent
            { "\u00CF", "Iuml" }, //  - uppercase I, umlaut
            { "\u00D0", "ETH" }, //  - uppercase Eth, Icelandic
            { "\u00D1", "Ntilde" }, //  - uppercase N, tilde
            { "\u00D2", "Ograve" }, //  - uppercase O, grave accent
            { "\u00D3", "Oacute" }, //  - uppercase O, acute accent
            { "\u00D4", "Ocirc" }, //  - uppercase O, circumflex accent
            { "\u00D5", "Otilde" }, //  - uppercase O, tilde
            { "\u00D6", "Ouml" }, //  - uppercase O, umlaut
            { "\u00D7", "times" }, // multiplication sign
            { "\u00D8", "Oslash" }, //  - uppercase O, slash
            { "\u00D9", "Ugrave" }, //  - uppercase U, grave accent
            { "\u00DA", "Uacute" }, //  - uppercase U, acute accent
            { "\u00DB", "Ucirc" }, //  - uppercase U, circumflex accent
            { "\u00DC", "Uuml" }, //  - uppercase U, umlaut
            { "\u00DD", "Yacute" }, //  - uppercase Y, acute accent
            { "\u00DE", "THORN" }, //  - uppercase THORN, Icelandic
            { "\u00DF", "szlig" }, //  - lowercase sharps, German
            { "\u00E0", "agrave" }, //  - lowercase a, grave accent
            { "\u00E1", "aacute" }, //  - lowercase a, acute accent
            { "\u00E2", "acirc" }, //  - lowercase a, circumflex accent
            { "\u00E3", "atilde" }, //  - lowercase a, tilde
            { "\u00E4", "auml" }, //  - lowercase a, umlaut
            { "\u00E5", "aring" }, //  - lowercase a, ring
            { "\u00E6", "aelig" }, //  - lowercase ae
            { "\u00E7", "ccedil" }, //  - lowercase c, cedilla
            { "\u00E8", "egrave" }, //  - lowercase e, grave accent
            { "\u00E9", "eacute" }, //  - lowercase e, acute accent
            { "\u00EA", "ecirc" }, //  - lowercase e, circumflex accent
            { "\u00EB", "euml" }, //  - lowercase e, umlaut
            { "\u00EC", "igrave" }, //  - lowercase i, grave accent
            { "\u00ED", "iacute" }, //  - lowercase i, acute accent
            { "\u00EE", "icirc" }, //  - lowercase i, circumflex accent
            { "\u00EF", "iuml" }, //  - lowercase i, umlaut
            { "\u00F0", "eth" }, //  - lowercase eth, Icelandic
            { "\u00F1", "ntilde" }, // ? - lowercase n, tilde
            { "\u00F2", "ograve" }, //  - lowercase o, grave accent
            { "\u00F3", "oacute" }, //  - lowercase o, acute accent
            { "\u00F4", "ocirc" }, //  - lowercase o, circumflex accent
            { "\u00F5", "otilde" }, //  - lowercase o, tilde
            { "\u00F6", "ouml" }, //  - lowercase o, umlaut
            { "\u00F7", "divide" }, // division sign
            { "\u00F8", "oslash" }, //  - lowercase o, slash
            { "\u00F9", "ugrave" }, //  - lowercase u, grave accent
            { "\u00FA", "uacute" }, //  - lowercase u, acute accent
            { "\u00FB", "ucirc" }, //  - lowercase u, circumflex accent
            { "\u00FC", "uuml" }, //  - lowercase u, umlaut
            { "\u00FD", "yacute" }, // ? - lowercase y, acute accent
            { "\u00FE", "thorn" }, //  - lowercase thorn, Icelandic
            { "\u00FF", "yuml" }, // ? - lowercase y, umlaut
    };

    private static final HashMap<String, CharSequence> htmlUnescapeLookupMap;
    static {
        htmlUnescapeLookupMap = new HashMap<>();
        for (final CharSequence[] seq : HTML_ESCAPES)
            htmlUnescapeLookupMap.put(seq[1].toString(), seq[0]);
    }

    /**
     * Decode an array encoded as a String. This entails a comma separated value enclosed in brackets
     * or parentheses.
     *
     * @param encoded The String encoding an array
     * @return A String array corresponding to the encoded array
     */
    public static String[] decodeArray(String encoded) {
        if (encoded.isEmpty())
            return EMPTY_STRING_ARRAY;
        char[] chars = encoded.trim().toCharArray();

        //--Parse the String
        // (state)
        char quoteCloseChar = (char) 0;
        List<String> terms = new ArrayList<>();
        StringBuilder current = new StringBuilder();
        //(start/stop overhead)
        int start = 0;
        int end = chars.length;
        if (chars[0] == '(') {
            start += 1;
            end -= 1;
            if (chars[end] != ')')
                throw new IllegalArgumentException("Unclosed paren in encoded array: " + encoded);
        }
        if (chars[0] == '[') {
            start += 1;
            end -= 1;
            if (chars[end] != ']')
                throw new IllegalArgumentException("Unclosed bracket in encoded array: " + encoded);
        }
        if (chars[0] == '{') {
            start += 1;
            end -= 1;
            if (chars[end] != '}')
                throw new IllegalArgumentException("Unclosed bracke in encoded array: " + encoded);
        }
        // (finite state automaton)
        for (int i = start; i < end; i++) {
            if (chars[i] == '\r') {
                // Ignore funny windows carriage return
                continue;
            } else if (quoteCloseChar != 0) {
                //(case: in quotes)
                if (chars[i] == quoteCloseChar) {
                    quoteCloseChar = (char) 0;
                } else {
                    current.append(chars[i]);
                }
            } else if (chars[i] == '\\') {
                //(case: escaped character)
                if (i == chars.length - 1)
                    throw new IllegalArgumentException(
                            "Last character of encoded array is escape character: " + encoded);
                current.append(chars[i + 1]);
                i += 1;
            } else {
                //(case: normal)
                if (chars[i] == '"') {
                    quoteCloseChar = '"';
                } else if (chars[i] == '\'') {
                    quoteCloseChar = '\'';
                } else if (chars[i] == ',' || chars[i] == ';' || chars[i] == ' ' || chars[i] == '\t'
                        || chars[i] == '\n') {
                    //break
                    if (current.length() > 0) {
                        terms.add(current.toString().trim());
                    }
                    current = new StringBuilder();
                } else {
                    current.append(chars[i]);
                }
            }
        }

        //--Return
        if (current.length() > 0) {
            terms.add(current.toString().trim());
        }
        return terms.toArray(EMPTY_STRING_ARRAY);
    }

    /**
     * Decode a map encoded as a string.
     *
     * @param encoded The String encoded map
     * @return A String map corresponding to the encoded map
     */
    public static Map<String, String> decodeMap(String encoded) {
        if (encoded.isEmpty())
            return new HashMap<>();
        char[] chars = encoded.trim().toCharArray();

        //--Parse the String
        //(state)
        char quoteCloseChar = (char) 0;
        Map<String, String> map = new HashMap<>();
        String key = "";
        String value = "";
        boolean onKey = true;
        StringBuilder current = new StringBuilder();
        //(start/stop overhead)
        int start = 0;
        int end = chars.length;
        if (chars[0] == '(') {
            start += 1;
            end -= 1;
            if (chars[end] != ')')
                throw new IllegalArgumentException("Unclosed paren in encoded map: " + encoded);
        }
        if (chars[0] == '[') {
            start += 1;
            end -= 1;
            if (chars[end] != ']')
                throw new IllegalArgumentException("Unclosed bracket in encoded map: " + encoded);
        }
        if (chars[0] == '{') {
            start += 1;
            end -= 1;
            if (chars[end] != '}')
                throw new IllegalArgumentException("Unclosed bracket in encoded map: " + encoded);
        }
        //(finite state automata)
        for (int i = start; i < end; i++) {
            if (chars[i] == '\r') {
                // Ignore funny windows carriage return
                continue;
            } else if (quoteCloseChar != 0) {
                //(case: in quotes)
                if (chars[i] == quoteCloseChar) {
                    quoteCloseChar = (char) 0;
                } else {
                    current.append(chars[i]);
                }
            } else if (chars[i] == '\\') {
                //(case: escaped character)
                if (i == chars.length - 1) {
                    throw new IllegalArgumentException(
                            "Last character of encoded pair is escape character: " + encoded);
                }
                current.append(chars[i + 1]);
                i += 1;
            } else {
                //(case: normal)
                if (chars[i] == '"') {
                    quoteCloseChar = '"';
                } else if (chars[i] == '\'') {
                    quoteCloseChar = '\'';
                } else if (chars[i] == '\n' && current.length() == 0) {
                    current.append(""); // do nothing
                } else if (chars[i] == ',' || chars[i] == ';' || chars[i] == '\t' || chars[i] == '\n') {
                    // case: end a value
                    if (onKey) {
                        throw new IllegalArgumentException("Encountered key without value");
                    }
                    if (current.length() > 0) {
                        value = current.toString().trim();
                    }
                    current = new StringBuilder();
                    onKey = true;
                    map.put(key, value); // <- add value
                } else if ((chars[i] == '-' || chars[i] == '=') && (i < chars.length - 1 && chars[i + 1] == '>')) {
                    // case: end a key
                    if (!onKey) {
                        throw new IllegalArgumentException("Encountered a value without a key");
                    }
                    if (current.length() > 0) {
                        key = current.toString().trim();
                    }
                    current = new StringBuilder();
                    onKey = false;
                    i += 1; // skip '>' character
                } else if (chars[i] == ':') {
                    // case: end a key
                    if (!onKey) {
                        throw new IllegalArgumentException("Encountered a value without a key");
                    }
                    if (current.length() > 0) {
                        key = current.toString().trim();
                    }
                    current = new StringBuilder();
                    onKey = false;
                } else {
                    current.append(chars[i]);
                }
            }
        }

        //--Return
        if (current.toString().trim().length() > 0 && !onKey) {
            map.put(key.trim(), current.toString().trim());
        }
        return map;
    }

    /**
     * Takes an input String, and replaces any bash-style variables (e.g., $VAR_NAME)
     * with its actual environment variable from the passed environment specification.
     *
     * @param raw The raw String to replace variables in.
     * @param env The environment specification; e.g., {@link System#getenv()}.
     * @return The input String, but with all variables replaced.
     */
    public static String expandEnvironmentVariables(String raw, Map<String, String> env) {
        String pattern = "\\$\\{?([a-zA-Z_]+[a-zA-Z0-9_]*)\\}?";
        Pattern expr = Pattern.compile(pattern);
        String text = raw;
        Matcher matcher = expr.matcher(text);
        while (matcher.find()) {
            String envValue = env.get(matcher.group(1));
            if (envValue == null) {
                envValue = "";
            } else {
                envValue = envValue.replace("\\", "\\\\");
            }
            Pattern subexpr = Pattern.compile(Pattern.quote(matcher.group(0)));
            text = subexpr.matcher(text).replaceAll(envValue);
        }
        return text;
    }

    /**
     * Takes an input String, and replaces any bash-style variables (e.g., $VAR_NAME)
     * with its actual environment variable from {@link System#getenv()}.
     *
     * @param raw The raw String to replace variables in.
     * @return The input String, but with all variables replaced.
     */
    public static String expandEnvironmentVariables(String raw) {
        return expandEnvironmentVariables(raw, System.getenv());
    }

    /**
     * Logs the command line arguments to Redwood on the given channels.
     * The logger should be a RedwoodChannels of a single channel: the main class.
     *
     * @param logger The redwood logger to log to.
     * @param args The command-line arguments to log.
     */
    public static void logInvocationString(Redwood.RedwoodChannels logger, String[] args) {
        StringBuilder sb = new StringBuilder("Invoked on ");
        sb.append(new Date());
        sb.append(" with arguments:");
        for (String arg : args) {
            sb.append(' ').append(arg);
        }
        logger.info(sb.toString());
    }

    private static boolean containsJsonEscape(String str) {
        for (int i = 0; i < str.length(); i++) {
            char ch = str.charAt(i);
            if (ch < '\u0020' || ch == '\\' || ch == '"') {
                return true;
            }
        }
        return false;
    }

    private static final String escapeLetters = "btnvfr";

    public static String escapeJsonString(String str) {
        // check if there are any, else return same str without allocation
        if (!containsJsonEscape(str)) {
            return str;
        }
        StringBuilder sb = new StringBuilder(str.length() * 2);
        for (int i = 0; i < str.length(); i++) {
            char ch = str.charAt(i);
            if (ch == '\\') {
                sb.append("\\\\");
            } else if (ch == '"') {
                sb.append("\\\"");
            } else if (ch < '\u0020') {
                if (ch >= '\b' && ch <= '\r' && ch != '\u000B') {
                    sb.append('\\');
                    sb.append(escapeLetters.charAt(ch - '\b'));
                } else {
                    sb.append("\\u00");
                    sb.append(String.format("%02X", (int) ch));
                }
            } else {
                sb.append(ch);
            }
        }
        return sb.toString();
    }

    /** @return index of pattern (i.e., char position of start of pattern) in s or -1, if not found. */
    public static int indexOfRegex(Pattern pattern, String s) {
        Matcher matcher = pattern.matcher(s);
        return matcher.find() ? matcher.start() : -1;
    }

    /** @return index of pattern in s or -1, if not found, starting from index. */
    public static int indexOfRegex(Pattern pattern, String s, int index) {
        Matcher matcher = pattern.matcher(s);
        return matcher.find(index) ? matcher.start() : -1;
    }

}