com.cinchapi.concourse.util.Strings.java Source code

Introduction

Here is the source code for com.cinchapi.concourse.util.Strings.java
Source

/*
 * Copyright (c) 2013-2016 Cinchapi Inc.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cinchapi.concourse.util;

import java.util.List;
import java.util.Set;

import javax.annotation.Nullable;

import org.slf4j.helpers.MessageFormatter;

import com.cinchapi.common.base.Characters;
import com.google.common.base.MoreObjects;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles;
import com.google.common.primitives.Floats;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import com.google.gson.JsonParseException;

/**
 * Yet another collection of utility functions for Strings that tries to add new
 * functionality or optimize existing ones.
 * 
 * @author Jeff Nelson
 */
public final class Strings {

    /**
     * Ensure that {@code string} ends with {@code suffix} by appending it to
     * {@code string} if and only if it is not already the last sequence of
     * characters in the string.
     * 
     * @param string the {@link String} to that should end with {@code suffix}
     * @param suffix the {@link String} of characters with which {@code string}
     *            should end
     * @return {@code string} if it already ends with {@code suffix} or a new
     *         {@link String} that contains {@code suffix} appended to
     *         {@code string}
     */
    public static String ensureEndsWith(String string, String suffix) {
        if (string.endsWith(suffix)) {
            return string;
        } else {
            return joinSimple(string, suffix);
        }
    }

    /**
     * Ensure that {@code string} starts with {@code prefix} by prepending it to
     * {@code string} if and only if it is not already the first sequence of
     * characters in the string.
     * 
     * @param string the {@link String} to that should start with {@code prefix}
     * @param prefix the {@link String} of characters with which {@code string}
     *            should start
     * @return {@code string} if it already begins with {@code prefix} or a new
     *         {@link String} that contains {@code prefix} prepended to
     *         {@code string}
     */
    public static String ensureStartsWith(String string, String prefix) {
        if (string.startsWith(prefix)) {
            return string;
        } else {
            return joinSimple(prefix, string);
        }
    }

    /**
     * Ensure that {@code string} is surrounded by quotes. If that is not the
     * case, alter the string so that it is and return the altered form.
     * 
     * <p>
     * Calling {@link Strings#isWithinQuotes(String)} on the result of this
     * method will always return {@code true}.
     * </p>
     * 
     * @param string the string that must be quoted
     * @return {@code string} or {@code string} surrounded by quotes if it is
     *         not already
     */
    public static String ensureWithinQuotes(String string) {
        return isWithinQuotes(string) ? string : joinSimple("\"", string, "\"");
    }

    /**
     * Wrap {@code string} within quotes if it is necessary to do so. Otherwise,
     * return the original {@code string}.
     * 
     * <p>
     * The original {@code string} will be wrapped in quotes and returned as
     * such if:
     * <ul>
     * <li>it is not already wrapped {@link #isWithinQuotes(String) within
     * quotes}, and</li>
     * <li>{@code delimiter} appears at least once</li>
     * </ul>
     * If those conditions are met, the original string will be wrapped in
     * either
     * <ul>
     * <li>double quotes if a single quote appears in the original string, or</li>
     * <li>single quotes if a double quote appears in the original string, or</li>
     * <li>double quotes if both a single and double quote appear in the
     * original string; furthermore, all instances of double quotes within the
     * original string will be escaped</li>
     * </ul>
     * </p>
     * 
     * @param string the string to potentially quote
     * @param delimiter the delimiter that determines whether quoting should
     *            happen
     * @return the original {@code string} or a properly quoted alternative
     */
    public static String ensureWithinQuotesIfNeeded(String string, char delimiter) {
        boolean foundDouble = false;
        boolean foundSingle = false;
        boolean foundDelimiter = false;
        StringBuilder escaped = new StringBuilder();
        escaped.append('"');
        if (!isWithinQuotes(string)) {
            char[] chars = string.toCharArray();
            for (int i = 0; i < chars.length; ++i) {
                char c = chars[i];
                if (c == delimiter) {
                    foundDelimiter = true;
                } else if (c == '"') {
                    foundDouble = true;
                    escaped.append('\\');
                } else if (c == '\'') {
                    foundSingle = true;
                }
                escaped.append(c);
            }
            escaped.append('"');
            if (foundDelimiter) {
                if (foundDouble && foundSingle) {
                    return escaped.toString();
                } else if (foundDouble) {
                    return Strings.format("'{}'", string);
                } else { // foundSingle or found no quotes
                    return Strings.format("\"{}\"", string);
                }
            }
        }
        return string;
    }

    /**
     * Efficiently escape inner occurrences of each of the {@code characters}
     * within the {@code string}, if necessary.
     * <p>
     * Escaped characters are prepended with the backslash ('\') character.
     * </p>
     * <p>
     * An "inner occurrence" for a character is one that is not at the head or
     * tail of the string.
     * </p>
     * 
     * @param string the string to escape
     * @param characters the characters to escape within the {@code string}
     * @return the escaped {@code string}
     */
    public static String escapeInner(String string, char... characters) {
        char c = '\0';
        char pchar = '\0';
        StringBuilder sb = null;
        Set<Character> chars = null;
        if (characters.length == 1) {
            c = characters[0];
        } else {
            chars = Sets.newHashSetWithExpectedSize(characters.length);
            for (char ch : characters) {
                chars.add(ch);
            }
        }
        char[] schars = string.toCharArray();
        int offset = 0;
        int i = 0;
        while (i < schars.length) {
            if (i > 0 && i < schars.length - 1) {
                char schar = schars[i];
                if (pchar != '\\' && ((c != '\0' && c == schar) || (chars != null && chars.contains(schar)))) {
                    sb = MoreObjects.firstNonNull(sb, new StringBuilder());
                    sb.append(schars, offset, i - offset);
                    sb.append('\\');
                    char escaped = Characters.getEscapedCharOrNullLiteral(schar);
                    if (escaped != '0') {
                        sb.append(escaped);
                    } else {
                        sb.append(schar);
                    }
                    offset = i + 1;
                }
                pchar = schar;
            }
            ++i;
        }
        if (sb != null) {
            sb.append(schars, offset, i - offset);
            return sb.toString();
        } else {
            return string;
        }
    }

/**
 * Replace all instances of "confusable" unicode characters with a
 * canoncial/normalized character.
 * <p>
 * See <a href=
 * "http://www.unicode.org/Public/security/revision-03/confusablesSummary.txt"
 * >http://www.unicode.org/Public/security/revision-03/confusablesSummary.
 * txt</a> for a list of characters that are considered to be confusable.
 * </p>
 * 
 * @param string the {@link String} in which the replacements should occur
 * @return a {@link String} free of confusable unicode characters
 */
public static String replaceUnicodeConfusables(String string) {
    char[] chars = string.toCharArray();
    for (int i = 0; i < chars.length; ++i) {
        char c = chars[i];
        switch (c) {
        default:
            break;
        case '':
        case '?':
        case '':
        case '':
        case '':
        case '':
        case '':
        case '?':
        case '':
        case '':
        case '':
        case '':
        case '':
            c = '"';
            break;
        }
        chars[i] = c;
    }
    return String.valueOf(chars);
}

    /**
     * Perform string substitution and formatting in a manner that is similar to
     * the SLF4J library.
     * 
     * <pre>
     * Strings#format("Bob is very {} because he has no {}", "brave", "fear") = "Bob is very brave because he has no fear"
     * </pre>
     * <p>
     * <strong>NOTE:</strong> This method is less efficient than using a
     * {@link StringBuilder} and manually appending variable arguments for
     * interpolation. This is provided for convenience, but don't use it for
     * anything that is performance critical.
     * </p>
     * 
     * @param pattern the message pattern which will be parsed and formatted
     * @param params an array of arguments to be substituted in place of
     *            formatting anchors
     * @return The formatted message
     */
    public static String format(String pattern, Object... params) {
        return MessageFormatter.arrayFormat(pattern, params).getMessage();
    }

    /**
     * Return a set that contains every possible substring of {@code string}
     * excluding pure whitespace strings.
     * 
     * @param string the string to divide into substrings
     * @return the set of substrings
     */
    public static Set<String> getAllSubStrings(String string) {
        Set<String> result = Sets.newHashSet();
        for (int i = 0; i < string.length(); ++i) {
            for (int j = i + 1; j <= string.length(); ++j) {
                String substring = string.substring(i, j).trim();
                if (!com.google.common.base.Strings.isNullOrEmpty(substring)) {
                    result.add(substring);
                }
            }
        }
        return result;
    }

    /**
     * An optimized version of {@link String#contains(CharSequence)} to see if
     * {@code needle} is a substring of {@code haystack}.
     * 
     * @param needle the substring for which to search
     * @param haystack the string in which to search for the substring
     * @return {@code true} if {@code needle} is a substring
     */
    public static boolean isSubString(String needle, String haystack) {
        if (needle.length() > haystack.length()) {
            return false;
        } else if (needle.length() == haystack.length()) {
            return needle.equals(haystack);
        } else {
            char[] n = needle.toCharArray();
            char[] h = haystack.toCharArray();
            int npos = 0;
            int hpos = 0;
            int stop = h.length - n.length;
            int hstart = -1;
            while (hpos < h.length && npos < n.length) {
                char hi = h[hpos];
                char ni = n[npos];
                if (hi == ni) {
                    if (hstart == -1) {
                        hstart = hpos;
                    }
                    ++npos;
                    ++hpos;
                } else {
                    if (npos > 0) {
                        npos = 0;
                        hpos = hstart + 1;
                        hstart = -1;
                    } else {
                        ++hpos;
                    }
                    if (hpos > stop) {
                        return false;
                    }
                }
            }
            return npos == n.length;
        }
    }

    /**
     * Return {@code true} if the {@code json} string is valid, otherwise return
     * {@code false}.
     * 
     * @param json a json formatted string
     * @return {@code true} if the {@code json} is valid
     */
    public static boolean isValidJson(String json) {
        char first = json.charAt(0);
        char last = json.charAt(json.length() - 1);
        if ((first == '[' || first == '{') && (last == ']' || last == '}')) {
            try {
                DataServices.jsonParser().parse(json);
                return true;
            } catch (JsonParseException e) {
                return false;
            }
        } else {
            return false;
        }
    }

    /**
     * Return {@code true} if {@code string} both starts and ends with single or
     * double quotes.
     * 
     * @param string
     * @return {@code true} if the string is between quotes
     */
    public static boolean isWithinQuotes(String string) {
        if (string.length() > 2) {
            char first = string.charAt(0);
            if (first == '"' || first == '\'') {
                char last = string.charAt(string.length() - 1);
                return first == last;
            }
        }
        return false;
    }

    /**
     * Concatenates the {@link Object#toString string} representation of all the
     * {@code args}, separated by the {@code separator} char in an efficient
     * manner.
     * 
     * @param separator the separator to place between each of the {@code args}
     * @param args the args to join
     * @return the resulting String
     */
    public static String join(char separator, Object... args) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < args.length; ++i) {
            builder.append(args[i]);
            builder.append(separator);
        }
        builder.deleteCharAt(builder.length() - 1);
        return builder.toString();
    }

    /**
     * Concatenates the {@link Object#toString string} representation of all the
     * {@code args}, separated by the {@code separator} string in an efficient
     * manner.
     * 
     * @param separator the separator to place between each of the {@code args}
     * @param args the args to join
     * @return the resulting String
     */
    public static String join(String separator, Object... args) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < args.length; ++i) {
            builder.append(args[i]);
            builder.append(separator);
        }
        builder.deleteCharAt(builder.length() - 1);
        return builder.toString();
    }

    /**
     * Concatenates the toString values of all the {@code args} in an efficient
     * manner.
     * 
     * @param args
     * @return the resulting String
     */
    public static String joinSimple(Object... args) {
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < args.length; ++i) {
            builder.append(args[i]);
        }
        return builder.toString();
    }

    /**
     * Concatenates the toString values of all the {@code args}, separated by
     * whitespace in an efficient manner.
     * 
     * @param args
     * @return the resulting String
     */
    public static String joinWithSpace(Object... args) {
        return join(' ', args);
    }

    /**
     * Split a string, using whitespace as a delimiter, as long as the
     * whitespace is not wrapped in double or single quotes.
     * 
     * @param string
     * @return the tokens that result from the split
     * @deprecated in version 0.5.0, use {@link QuoteAwareStringSplitter}
     *             instead.
     */
    @Deprecated
    public static String[] splitButRespectQuotes(String string) {
        return splitStringByDelimiterButRespectQuotes(string, " ");
    }

    /**
     * Split a camel case {@code string} into tokens that represent the distinct
     * words.
     * <p>
     * <h1>Example</h1>
     * <ul>
     * thisIsACamelCaseSTRING -> [this, Is, A, Camel, Case, S, T, R, I, N, G]
     * </ul>
     * <ul>
     * ThisIsACamelCaseSTRING -> [This, Is, A, Camel, Case, S, T, R, I, N, G]
     * </ul>
     * <ul>
     * thisisacamelcasestring -> [thisisacamelcasestring]
     * </ul>
     * </p>
     * 
     * @param string
     * @return a list of tokens after splitting the string on camel case word
     *         boundaries
     */
    public static List<String> splitCamelCase(String string) {
        List<String> words = Lists.newArrayList();
        char[] chars = string.toCharArray();
        StringBuilder word = new StringBuilder();
        for (int i = 0; i < chars.length; ++i) {
            char c = chars[i];
            if (Character.isUpperCase(c) || c == '$') {
                if (word.length() > 0) {
                    words.add(word.toString());
                }
                word.setLength(0);
            }
            word.append(c);
        }
        words.add(word.toString());
        return words;
    }

    /**
     * Split a string on a delimiter as long as that delimiter is not wrapped in
     * double or single quotes.
     * <p>
     * If {@code delimiter} is a single character string, it is more efficient
     * to use a {@link StringSplitter} as opposed to this method.
     * </p>
     * 
     * @param string the string to split
     * @param delimiter the delimiting string/regex on which the input
     *            {@code string} is split
     * @return the tokens that result from the split
     */
    public static String[] splitStringByDelimiterButRespectQuotes(String string, String delimiter) {
        // This is pretty inefficient: convert all single quotes to double
        // quotes (except one off single quotes that are used as apostrophes) so
        // the regex below works
        string = string.replaceAll(" '", " \"");
        string = string.replaceAll("' ", "\" ");
        string = string.replaceAll("'$", "\"");
        return string.split(delimiter + "(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
    }

    /**
     * This method efficiently tries to parse {@code value} into a
     * {@link Boolean} object if possible. If the string is not a boolean, then
     * the method returns {@code null} as quickly as possible.
     * 
     * @param value
     * @return a Boolean object that represents the string or {@code null} if it
     *         is not possible to parse the string into a boolean
     */
    public static Boolean tryParseBoolean(String value) {
        if (value.equalsIgnoreCase("true")) {
            return true;
        } else if (value.equalsIgnoreCase("false")) {
            return false;
        } else {
            return null;
        }
    }

    /**
     * This method efficiently tries to parse {@code value} into a
     * {@link Number} object if possible. If the string is not a number, then
     * the method returns {@code null} as quickly as possible.
     * 
     * @param value
     * @return a Number object that represents the string or {@code null} if it
     *         is not possible to parse the string into a number
     */
    @Nullable
    public static Number tryParseNumber(String value) {
        int size = value.length();
        if (value == null || size == 0) {
            return null;
        } else if (value.charAt(0) == '0' && size > 1 && value.charAt(1) != '.') {
            // Do not parse a string as a number if it has a leading 0 that is
            // not followed by a decimal (i.e. 007)
            return null;
        }
        boolean decimal = false;
        for (int i = 0; i < size; ++i) {
            char c = value.charAt(i);
            if (!Character.isDigit(c)) {
                if (i == 0 && c == '-') {
                    continue;
                } else if (c == '.') {
                    if (!decimal && size > 1) {
                        decimal = true;
                    } else {
                        // Since we've already seen a decimal, the appearance of
                        // another one suggests this is an IP address instead of
                        // a number
                        return null;
                    }
                } else if (i == size - 1 && c == 'D' && size > 1) {
                    // Respect the convention to coerce numeric strings to
                    // Double objects by appending a single 'D' character.
                    return Double.valueOf(value.substring(0, i));
                } else {
                    return null;
                }
            }
        }
        try {
            if (decimal) {
                // Try to return a float (for space compactness) if it is
                // possible to fit the entire decimal without any loss of
                // precision. In order to do this, we have to compare the string
                // output of both the parsed double and the parsed float. This
                // is kind of inefficient, so substitute for a better way if it
                // exists.
                double d = Doubles.tryParse(value);
                float f = Floats.tryParse(value);
                if (String.valueOf(d).equals(String.valueOf(f))) {
                    return f;
                } else {
                    return d;
                }
            } else {
                return MoreObjects.firstNonNull(Ints.tryParse(value), Longs.tryParse(value));
            }
        } catch (NullPointerException e) {
            throw new NumberFormatException(
                    Strings.format("{} appears to be a number but cannot be parsed as such", value));
        }
    }

    /**
     * A stricter version of {@link #tryParseNumber(String)} that does not parse
     * strings that masquerade as numbers (i.e. 3.124D). Instead this method
     * will only parse the string into a Number if it contains characters that
     * are either a decimal digit, a decimal separator or a negative sign.
     * 
     * @param value
     * @return a Number object that represents the string or {@code null} if it
     *         is not possible to parse the string into a number
     */
    @Nullable
    public static Number tryParseNumberStrict(String value) {
        if (value == null || value.length() == 0) {
            return null;
        }
        char last = value.charAt(value.length() - 1);
        if (Character.isDigit(last)) {
            return tryParseNumber(value);
        } else {
            return null;
        }
    }

    /**
     * Similar to the {@link String#valueOf(char)} method, but this one will
     * return a cached copy of the string for frequently used characters.
     * 
     * @param c the character to convert
     * @return a string of length 1 containing the input char
     */
    public static String valueOfCached(char c) {
        if (c == '(') {
            return "(";
        } else if (c == ')') {
            return ")";
        } else {
            return String.valueOf(c);
        }
    }

    private Strings() {
        /* noop */}

}