com.opendoorlogistics.core.utils.strings.Strings.java Source code

Java tutorial

Introduction

Here is the source code for com.opendoorlogistics.core.utils.strings.Strings.java

Source

/*******************************************************************************
 * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License v3
 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt
 ******************************************************************************/
package com.opendoorlogistics.core.utils.strings;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.Charsets;
import org.apache.commons.io.IOUtils;

import com.opendoorlogistics.core.utils.Numbers;

final public class Strings {
    private static final HashSet<Character> unicodeSpeechMarks;

    private static Pattern nonAlphaNumericOrWhitespace = Pattern.compile("[^a-zA-Z\\d\\s\\-]");

    static {
        unicodeSpeechMarks = new HashSet<>();
        for (char c : new char[] { '\u0022', '\u201C', '\u201D', '\u201E', '\u201F', '\u301D', '\u301E', '\u301F',
                '\uFF02', '\u2033', '\u2036' }) {
            unicodeSpeechMarks.add(c);
        }
    }

    private Strings() {
    }

    public interface DoesStringExist {
        boolean isExisting(String s);
    }

    /**
     * Check if the string s is already used and if so, add a number to the end of it (1, 2, 3...) to make it unique
     * 
     * @param s
     * @return
     */
    public static String makeUnique(String s, DoesStringExist cb) {
        long l = 0;
        while (l < Long.MAX_VALUE) {
            String ret = s + (l == 0 ? "" : Long.toString(l));
            if (cb.isExisting(ret) == false) {
                return ret;
            }
            l++;
        }
        return null;
    }

    public static String standardiseSpeechMarks(String s) {
        int n = s.length();
        StringBuilder builder = new StringBuilder(n);
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            if (unicodeSpeechMarks.contains(c)) {
                builder.append("\"");
            } else {
                builder.append(c);
            }
        }
        return builder.toString();
    }

    public static String toCommas(String... strs) {
        return toString(",", strs);
    }

    public static String toCommas(Collection<String> strs) {
        return toCommas(strs.toArray(new String[strs.size()]));
    }

    public static String toFirstLetterInWordCapitalised(String s) {
        int n = s.length();
        StringBuilder builder = new StringBuilder();
        boolean lastIsSpace = true;
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            if (lastIsSpace) {
                builder.append(Character.toUpperCase(c));
            } else {
                builder.append(Character.toLowerCase(c));
            }
            lastIsSpace = Character.isWhitespace(c);
        }
        return builder.toString();
    }

    public static interface ToString<T> {
        String toString(T o);
    }

    public static <T> String toCommas(ToString<T> toString, Collection<T> objs) {
        return toString(toString, ",", objs);
    }

    public static <T> String toString(ToString<T> toString, String separator, Collection<T> objs) {
        int count = 0;
        StringBuilder builder = new StringBuilder();
        for (T obj : objs) {
            if (count > 0) {
                builder.append(separator);
            }
            builder.append(toString.toString(obj));
        }
        return builder.toString();
    }

    public static String toString(String separator, Collection<String> strs) {
        return toString(separator, strs.toArray(new String[strs.size()]));
    }

    /**
     * Does the collection contain the input string when both are standardised?
     * 
     * @param find
     * @param findIn
     * @return
     */
    public static boolean containsStandardised(String find, Iterable<String> findIn) {
        for (String test : findIn) {
            if (equalsStd(find, test)) {
                return true;
            }
        }
        return false;
    }

    public static String toString(String separator, String... strs) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < strs.length; i++) {
            builder.append(strs[i]);
            if (i != strs.length - 1) {
                builder.append(separator);
            }
        }
        return builder.toString();
    }

    public static <T> String[] toStringArray(T[] objs) {
        String[] ret = new String[objs.length];
        for (int i = 0; i < objs.length; i++) {
            ret[i] = objs[i].toString();
        }
        return ret;
    }

    /**
     * See http://stackoverflow.com/questions/1155107/is-there-a-cross-platform-java-method-to-remove-filename-special-chars
     * 
     * @param s
     * @return
     */
    public static String toFileSafeString(String s) {
        return s.replaceAll("[^a-zA-Z0-9\\._]+", "_");
    }

    public static String[] toLowerCaseArray(String[] arr) {
        String[] ret = new String[arr.length];
        for (int i = 0; i < arr.length; i++) {
            ret[i] = arr[i].toLowerCase();
        }
        return ret;
    }

    public static TreeSet<String> stdTreeSet(Iterable<String> iterable) {
        TreeSet<String> ret = new TreeSet<>();
        if (iterable != null) {
            for (String s : iterable) {
                if (s != null) {
                    String std = std(s);
                    if (std.length() > 0) {
                        ret.add(std(s));
                    }
                }
            }
        }
        return ret;
    }

    //   /**
    //    * Standardised version of a string value
    //    * 
    //    * @param s
    //    * @return
    //    */
    //   public static String std(String s) {
    //      if (s == null) {
    //         return "";
    //      }
    //
    //      // trim whitespace at start and end and convert to lowercase
    //      s = s.trim().toLowerCase();
    //
    //      // ensure only have single spaces
    //      s = s.replaceAll("  ", " ");
    //      return s;
    //   }

    /**
     * Standardised version of a string value. 
     * Calculation is optimised as much as possible.
     * @param s
     * @return
     */
    public static String std(String s) {
        if (s == null) {
            return "";
        }

        int n = s.length();
        StringBuilder b = new StringBuilder(n);

        // find first non-whitespace
        int firstNonWS = n;
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            if (!Character.isWhitespace(c)) {
                firstNonWS = i;
                break;
            }
        }

        // get last non-whitespace char
        int lastNonWS = -1;
        for (int i = n - 1; i >= 0; i--) {
            char c = s.charAt(i);
            if (!Character.isWhitespace(c)) {
                lastNonWS = i;
                break;
            }
        }

        boolean inWhiteSpace = false;
        char c;
        for (int i = firstNonWS; i <= lastNonWS; i++) {
            c = Character.toLowerCase(s.charAt(i));

            if (inWhiteSpace) {
                if (Character.isWhitespace(c)) {
                    // never add two whitespaces in a row
                } else {
                    // no longer in whitespace
                    inWhiteSpace = false;
                    b.append(c);
                }
            } else {
                if (Character.isWhitespace(c)) {
                    // always treat whitespace as a space
                    b.append(' ');
                    inWhiteSpace = true;
                } else {
                    b.append(c);
                }
            }

        }

        return b.toString();
    }

    /**
     * See http://stackoverflow.com/questions/5054995/how-to-replace-case-insensitive-literal-substrings-in-java
     * 
     * @param source
     * @param target
     * @param replacement
     * @return
     */
    public static String caseInsensitiveReplace(String source, String target, String replacement) {
        StringBuilder sbSource = new StringBuilder(source);
        StringBuilder sbSourceLower = new StringBuilder(source.toLowerCase());
        String searchString = target.toLowerCase();

        int idx = 0;
        while ((idx = sbSourceLower.indexOf(searchString, idx)) != -1) {
            sbSource.replace(idx, idx + searchString.length(), replacement);
            sbSourceLower.replace(idx, idx + searchString.length(), replacement);
            idx += replacement.length();
        }
        sbSourceLower.setLength(0);
        sbSourceLower.trimToSize();
        sbSourceLower = null;

        return sbSource.toString();
    }

    public static String repeat(String s, int nTimes) {
        StringBuilder b = new StringBuilder();
        for (int i = 0; i < nTimes; i++) {
            b.append(s);
        }
        return b.toString();
    }

    /**
     * Find the index of the string in the array or return -1 if not found. An exact match is favoured over a standardised match.
     * 
     * @param find
     * @param strs
     * @return
     */
    public static int indexOfStd(String find, String[] strs) {
        for (int i = 0; i < strs.length; i++) {
            if (find.equals(strs[i])) {
                return i;
            }
        }

        for (int i = 0; i < strs.length; i++) {
            if (equalsStd(find, strs[i])) {
                return i;
            }
        }

        return -1;
    }

    /**
     * Case-sensitive equals which will return true if both a and b are null
     * 
     * @param a
     * @param b
     * @return
     */
    public static boolean equals(String a, String b) {
        if ((a == null) != (b == null)) {
            return false;
        }

        if (a == null) {
            // b must also be null
            return true;
        }

        return a.equals(b);
    }

    public static boolean equalsStd(String a, String b) {
        return equalsStd(a, b, null);
    }

    public static boolean equalsStd(String a, String b, StandardisedCache standardisedCache) {
        if (isEmpty(a) && isEmpty(b)) {
            return true;
        }

        if (a == null || b == null) {
            return false;
        }

        if (standardisedCache != null) {
            return standardisedCache.std(a).equals(standardisedCache.std(b));
        } else {
            return std(a).equals(std(b));
        }
    }

    private static class StdStringComparer {
        final private String notNumberGroup1 = "([^0-9]*)";
        final private String notNumberGroup2 = "([^0-9]*.*)";
        final private String number = "(\\d+)";
        final private Pattern pattern = Pattern
                .compile(notNumberGroup1 + "\\s*" + number + "\\s*" + notNumberGroup2 + "(.*)");
        final private static StdStringComparer singleton = new StdStringComparer();

        private class StringComponents implements Comparable<StringComponents> {
            String original;
            String nonNumber;
            Long number;
            String secondNonNumber;

            public StringComponents(String s) {
                original = s;
                s = Strings.std(s);
                nonNumber = s;
                number = Long.MIN_VALUE;
                secondNonNumber = "";
                Matcher matcher = pattern.matcher(s);
                if (matcher.matches()) {
                    Long l = Numbers.toLong(matcher.group(2));
                    if (l != null) {
                        nonNumber = Strings.std(matcher.group(1));
                        number = l;
                        secondNonNumber = Strings.std(matcher.group(3));
                    }
                }
            }

            @Override
            public int compareTo(StringComponents o) {
                int diff = nonNumber.compareTo(o.nonNumber);
                if (diff == 0) {
                    diff = Long.compare(number, o.number);
                }
                if (diff == 0) {
                    diff = secondNonNumber.compareTo(o.secondNonNumber);
                }
                return diff;
            }

            @Override
            public String toString() {
                return original;
            }
        }

        private StdStringComparer() {
        }

        public int compare(String a, String b) {
            int diff = Boolean.compare(isEmpty(a), isEmpty(b));
            if (diff == 0 && a == null) {
                // must both be empty...
                return 0;
            }
            StringComponents ca = new StringComponents(a);
            StringComponents cb = new StringComponents(b);
            diff = ca.compareTo(cb);
            return diff;
        }

        public static StdStringComparer singleton() {
            return singleton;
        }
    }

    public static String[] addArrays(String[] arr1, String... arr2) {
        String[] ret = new String[arr1.length + arr2.length];
        for (int i = 0; i < arr1.length; i++) {
            ret[i] = arr1[i];
        }
        for (int i = 0; i < arr2.length; i++) {
            ret[i + arr1.length] = arr2[i];
        }
        return ret;
    }

    public static String[] addToArray(String[] arr, String s) {
        String[] ret = new String[arr.length + 1];
        System.arraycopy(arr, 0, ret, 0, arr.length);
        ret[arr.length] = s;
        return ret;
    }

    public static String[] addArrayToStr(String s, String[] arr) {
        String[] ret = new String[arr.length + 1];
        ret[0] = s;
        System.arraycopy(arr, 0, ret, 1, arr.length);
        ret[arr.length] = s;
        return ret;
    }

    /**
     * Standardised comparison of two strings. The comparison compares the standardised version of the two strings.
     * 
     * @param a
     * @param b
     * @param useNumberSortationLogic If true, the method handles the situation where you have a word followed by a number, e.g. "vehicle 9", "vehicle 11", 
     * and applies numeric sorting to the number part. 
     * @return
     */
    public static int compareStd(String a, String b, boolean useNumberSortationLogic) {
        if (useNumberSortationLogic) {
            return StdStringComparer.singleton().compare(a, b);
        } else {
            return std(a).compareTo(std(b));
        }
    }

    public static boolean isTrue(String s) {
        if (s == null) {
            return false;
        }
        s = std(s);
        return s.equals("1") || s.equals("t") || s.equals("true") || s.equals("y") || s.equals("yes");
    }

    public static boolean hasWhiteSpace(String s) {
        return hasWhitespace.matcher(s).matches();
    }

    private final static Pattern hasWhitespace = Pattern.compile("\\S*\\s+.*");

    private final static Pattern isNumber = Pattern.compile("\\s*-?\\d+\\.?\\d*\\s*");
    private final static Pattern isIntNumber = Pattern.compile("\\s*-?\\d+\\s*$");

    /**
     * Tests if the input string is an integer or decimal number. Leading and trailing whitespace is ignored.
     * 
     * @param s
     * @return
     */
    public static boolean isNumber(String s) {
        if (Strings.isEmpty(s)) {
            return false;
        }
        return isNumber.matcher(s).matches();
    }

    private final static Pattern isEmail = Pattern
            .compile("^[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9-]+)*@[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.[A-Za-z]{2,})$");

    /**
     * Validates an email using the regular expression from
     * http://examples.javacodegeeks.com/core-java/util/regex/matcher/validate-email-address-with-java-regular-expression-example/
     * 
     * @param s
     * @return
     */
    public static boolean isEmailAddress(String s) {
        return isEmail.matcher(s).matches();
    }

    public static boolean isIntNumber(String s) {
        if (Strings.isEmpty(s)) {
            return false;
        }
        return isIntNumber.matcher(s).matches();
    }

    public static boolean isEnclosedBySpeechMarks(String s) {
        return s.length() >= 2 && s.charAt(0) == '\"' && s.charAt(s.length() - 1) == '\"';
    }

    /**
     * Parse integer without throwing exception
     * 
     * @param s
     * @return
     */
    public static Long parseLong(String s) {
        if (isIntNumber(s)) {
            try {
                return Long.parseLong(s);
            } catch (Throwable e) {
                // TODO: handle exception
            }
        }
        return null;
    }

    public static String getTabs(int nbTabs) {
        StringBuilder ret = new StringBuilder();
        for (int i = 0; i < nbTabs; i++) {
            ret.append('\t');
        }
        return ret.toString();
    }

    /**
     * Adds additional tabs to the start of each line
     * 
     * @param s
     * @param nbTabs
     * @return
     */
    public static String getTabIndented(String s, int nbTabs) {
        String tabs = getTabs(nbTabs);

        StringBuilder ret = new StringBuilder();
        int len = s.length();

        if (len > 0) {
            ret.append(tabs);
        }
        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);
            ret.append(c);
            if (c == '\n' && i < len - 1) {
                ret.append(tabs);
            }
        }
        return ret.toString();
    }

    public static boolean isEmpty(Object o) {
        return o == null || isEmpty(o.toString());
    }

    public static boolean isEmptyWhenStandardised(Object o) {
        return o == null || isEmptyWhenStandardised(o.toString());
    }

    public static boolean isEmpty(String s) {
        return s == null || s.length() == 0;
    }

    public static boolean isEmptyWhenStandardised(String s) {
        return isEmpty(std(s));
    }

    public static void writeToFile(String s, File file) {
        try {
            PrintWriter out = new PrintWriter(file);
            out.append(s);
            out.close();
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    public static List<String> readFileAsLines(String path) {
        BufferedReader br = null;
        ArrayList<String> ret = new ArrayList<>();
        try {
            br = new BufferedReader(new FileReader(path));
            String line = br.readLine();
            while (line != null) {
                ret.add(line);
                line = br.readLine();
            }
        } catch (Throwable e) {
            throw new RuntimeException(e);
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (Throwable e2) {
                    throw new RuntimeException(e2);
                }
            }
        }
        return ret;
    }

    public static String readUTF8Resource(String name) {
        // Use own class loader to prevent problems when jar loaded by reflection
        InputStream is = Strings.class.getResourceAsStream(name);
        StringWriter writer = new StringWriter();
        try {
            IOUtils.copy(is, writer, Charsets.UTF_8);
            is.close();
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }

        return writer.toString();
    }

    public static String readFile(String path) {
        return readFile(path, Charset.defaultCharset());
    }

    public static String readFile(String path, Charset encoding) {
        try {
            byte[] encoded = Files.readAllBytes(Paths.get(path));
            return encoding.decode(ByteBuffer.wrap(encoded)).toString();

        } catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * Converts text to a more-display friendly format.
     * 
     * @param text
     * @return
     */
    public static String convertEnumToDisplayFriendly(String text) {
        text = text.replaceAll("_", " ");
        text = text.replaceAll("  ", " ");
        text = text.trim();
        text = text.toLowerCase();
        if (text.length() > 0) {
            text = text.substring(0, 1).toUpperCase() + text.substring(1);
        }
        return text;
    }

    /**
     * Converts text to a more-display friendly format.
     * 
     * @param text
     * @return
     */
    public static String convertEnumToDisplayFriendly(Enum<?> en) {
        return convertEnumToDisplayFriendly(en.name());
    }

    public static String getStackTrace(Throwable e) {
        StringWriter writer = new StringWriter();
        PrintWriter printWriter = new PrintWriter(writer);
        e.printStackTrace(printWriter);
        return writer.toString();
    }

    public static String getFiltered(String s, char... acceptChars) {
        TreeSet<Character> set = new TreeSet<>();
        for (char c : acceptChars) {
            set.add(c);
        }
        StringBuilder ret = new StringBuilder();
        for (int i = 0; i < s.length(); i++) {
            if (set.contains(s.charAt(i))) {
                ret.append(s.charAt(i));
            }
        }
        return ret.toString();
    }

    public static String toString(String separator, int... ints) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; ints != null && i < ints.length; i++) {
            if (i > 0) {
                builder.append(separator);
            }
            builder.append(ints[i]);
        }
        return builder.toString();
    }

    public static String toCommas(int... ints) {
        if (ints != null) {
            return toString(",", ints);
        }
        return "";
    }

    /**
     * Remove chars from the string which could prove problematic when exporting. Currently this contains chars not allowed in workbook sheet names.
     * 
     * @param s
     * @return
     */
    public static String removeExportIllegalChars(String s) {
        char[] illegals = new char[] { '/', '\\', '?', '*', ']', '[', ':' };

        StringBuilder builder = new StringBuilder();
        int n = s.length();
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            boolean found = false;
            for (char illegal : illegals) {
                if (illegal == c) {
                    found = true;
                    break;
                }
            }

            if (!found) {
                builder.append(c);
            }
        }

        return builder.toString();
    }

    public static String getLeftWithoutWordSplitting(String s, int maxNbChars) {
        int n = s.length();
        if (n <= maxNbChars) {
            return s;
        }

        // work out the max char to take
        int nbInclude = 0;
        boolean onNonBreak = true;
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            int nbChar = i + 1;

            if (onNonBreak && (c == ',' || c == ' ')) {
                if (nbChar <= maxNbChars || nbInclude == 0) {
                    nbInclude = nbChar - 1;
                }
                if (nbChar > maxNbChars) {
                    break;
                }

                onNonBreak = false;
            } else {
                onNonBreak = true;
            }
        }

        if (nbInclude == 0) {
            return "";
        }
        return s.substring(0, nbInclude);
    }

    public static void main(String[] args) {

        ArrayList<String> list = new ArrayList<>();
        for (String s : new String[] { "   VEHILCE 9", "vehicle   ", "    vehicle 01   ", "vehicle 10",
                "vehicle    23b", "ve   HIC  le 23", "  artic 1" }) {
            //   list.add(s);
            System.out.println("\"" + s + "\"" + " -> " + "\"" + std(s) + "\"");
        }
        //      Collections.sort(list, new Comparator<String>() {
        //
        //         @Override
        //         public int compare(String o1, String o2) {
        //            return compareStd(o1, o2);
        //         }
        //      });
        //      System.out.println(list);
    }

    public static TreeSet<String> toTreeSet(String... strs) {
        TreeSet<String> ret = new TreeSet<>();
        for (String s : strs) {
            ret.add(s);
        }
        return ret;
    }

    public static String getExceptionMessagesAsSingleStr(Throwable e) {
        StringBuilder b = new StringBuilder();
        for (String s : getExceptionMessages(e)) {
            b.append(s);
            b.append(System.lineSeparator());
        }
        return b.toString();
    }

    /**
     * Gets the list of all messages from the exception and and the ancestor exception(s) that caused it. The list is returned in chronological order.
     * 
     * @param e
     * @return
     */
    public static List<String> getExceptionMessages(Throwable e) {
        // get reversed list of causes so its chronological
        ArrayList<Throwable> causes = new ArrayList<>();
        while (e != null) {
            causes.add(e);
            e = e.getCause();
        }
        Collections.reverse(causes);

        // get the list of messages
        ArrayList<String> messages = new ArrayList<>();

        ArrayList<String> ret = new ArrayList<>();
        for (Throwable exception : causes) {
            if (isEmpty(exception.getMessage()) == false) {
                String msg = exception.getMessage();

                // skip if part of the message has already been shown as its likely just the same message with
                // the exception class name added to the start
                boolean found = false;
                for (String shown : messages) {
                    if (shown.length() > 3 && msg.toLowerCase().contains(shown.toLowerCase())) {
                        found = true;
                        break;
                    }
                }

                if (!found) {
                    // save to list of shown messages
                    messages.add(msg);

                    // only print the exception type if it gives the user some information
                    if (exception.getClass() != Exception.class && exception.getClass() != RuntimeException.class) {
                        ret.add("Exception of type \"" + exception.getClass().getSimpleName() + "\" : " + msg);
                    } else {
                        ret.add(msg);
                    }
                }

            }
        }
        return ret;
    }

    public static interface LineCB {
        boolean lineCB(String line);
    }

    public static void parseFileAsLines(File file, LineCB cb) {
        BufferedReader br = null;
        try {
            br = new BufferedReader(new FileReader(file));
            String line = br.readLine();

            while (line != null) {
                if (!cb.lineCB(line)) {
                    break;
                }
                line = br.readLine();
            }

        } catch (Throwable e) {
            throw new RuntimeException(e);
        } finally {
            try {
                br.close();
            } catch (Throwable e) {
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * Returns true if the string is empty, or contains solely alphanumeric characters or whitespaces or - or :
     * 
     * @param s
     * @return
     */
    public static boolean isEmptyAlphaNumericWhitespaceOrDash(String s) {
        return nonAlphaNumericOrWhitespace.matcher(s).find() == false;
    }

    /**
     * Create a copy of the array where the first element is removed.
     * @param arr
     * @return
     */
    public static String[] removeFirstElement(String[] arr) {
        int n = arr.length - 1;
        String[] ret = new String[Math.max(0, n)];
        for (int i = 0; i < n; i++) {
            ret[i] = arr[i + 1];
        }
        return ret;
    }
}