com.aliyun.odps.utils.StringUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.odps.utils.StringUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.aliyun.odps.utils;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;

/**
 * General string utils
 */
public class StringUtils {

    private static final DecimalFormat decimalFormat;

    static {
        NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH);
        decimalFormat = (DecimalFormat) numberFormat;
        decimalFormat.applyPattern("#.##");
    }

    /**
     * Make a string representation of the exception.
     *
     * @param e
     *     The exception to stringify
     * @return A string with exception name and call stack.
     */
    public static String stringifyException(Throwable e) {
        StringWriter stm = new StringWriter();
        PrintWriter wrt = new PrintWriter(stm);
        e.printStackTrace(wrt);
        wrt.close();
        return stm.toString();
    }

    /**
     * Given a full hostname, return the word upto the first dot.
     *
     * @param fullHostname
     *     the full hostname
     * @return the hostname to the first dot
     */
    public static String simpleHostname(String fullHostname) {
        int offset = fullHostname.indexOf('.');
        if (offset != -1) {
            return fullHostname.substring(0, offset);
        }
        return fullHostname;
    }

    private static DecimalFormat oneDecimal = new DecimalFormat("0.0");

    /**
     * Given an integer, return a string that is in an approximate, but human
     * readable format. It uses the bases 'k', 'm', and 'g' for 1024, 1024**2, and
     * 1024**3.
     *
     * @param number
     *     the number to format
     * @return a human readable form of the integer
     */
    public static String humanReadableInt(long number) {
        long absNumber = Math.abs(number);
        double result = number;
        String suffix = "";
        if (absNumber < 1024) {
            // nothing
        } else if (absNumber < 1024 * 1024) {
            result = number / 1024.0;
            suffix = "k";
        } else if (absNumber < 1024 * 1024 * 1024) {
            result = number / (1024.0 * 1024);
            suffix = "m";
        } else {
            result = number / (1024.0 * 1024 * 1024);
            suffix = "g";
        }
        return oneDecimal.format(result) + suffix;
    }

    /**
     * Format a percentage for presentation to the user.
     *
     * @param done
     *     the percentage to format (0.0 to 1.0)
     * @param digits
     *     the number of digits past the decimal point
     * @return a string representation of the percentage
     */
    public static String formatPercent(double done, int digits) {
        DecimalFormat percentFormat = new DecimalFormat("0.00%");
        double scale = Math.pow(10.0, digits + 2);
        double rounded = Math.floor(done * scale);
        percentFormat.setDecimalSeparatorAlwaysShown(false);
        percentFormat.setMinimumFractionDigits(digits);
        percentFormat.setMaximumFractionDigits(digits);
        return percentFormat.format(rounded / scale);
    }

    /**
     * Given an array of strings, return a comma-separated list of its elements.
     *
     * @param strs
     *     Array of strings
     * @return Empty string if strs.length is 0, comma separated list of strings
     * otherwise
     */

    public static String arrayToString(String[] strs) {
        if (strs.length == 0) {
            return "";
        }
        StringBuffer sbuf = new StringBuffer();
        sbuf.append(strs[0]);
        for (int idx = 1; idx < strs.length; idx++) {
            sbuf.append(",");
            sbuf.append(strs[idx]);
        }
        return sbuf.toString();
    }

    /**
     * Given an array of bytes it will convert the bytes to a hex string
     * representation of the bytes
     *
     * @param bytes
     * @param start
     *     start index, inclusively
     * @param end
     *     end index, exclusively
     * @return hex string representation of the byte array
     */
    public static String byteToHexString(byte[] bytes, int start, int end) {
        if (bytes == null) {
            throw new IllegalArgumentException("bytes == null");
        }
        StringBuilder s = new StringBuilder();
        for (int i = start; i < end; i++) {
            s.append(String.format("%02x", bytes[i]));
        }
        return s.toString();
    }

    /**
     * Same as byteToHexString(bytes, 0, bytes.length).
     */
    public static String byteToHexString(byte bytes[]) {
        return byteToHexString(bytes, 0, bytes.length);
    }

    /**
     * Given a hexstring this will return the byte array corresponding to the
     * string
     *
     * @param hex
     *     the hex String array
     * @return a byte array that is a hex string representation of the given
     * string. The size of the byte array is therefore hex.length/2
     */
    public static byte[] hexStringToByte(String hex) {
        byte[] bts = new byte[hex.length() / 2];
        for (int i = 0; i < bts.length; i++) {
            bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
        }
        return bts;
    }

    /**
     * @param uris
     */
    public static String uriToString(URI[] uris) {
        if (uris == null) {
            return null;
        }
        StringBuffer ret = new StringBuffer(uris[0].toString());
        for (int i = 1; i < uris.length; i++) {
            ret.append(",");
            ret.append(uris[i].toString());
        }
        return ret.toString();
    }

    /**
     * @param str
     */
    public static URI[] stringToURI(String[] str) {
        if (str == null) {
            return null;
        }
        URI[] uris = new URI[str.length];
        for (int i = 0; i < str.length; i++) {
            try {
                uris[i] = new URI(str[i]);
            } catch (URISyntaxException ur) {
                System.out.println("Exception in specified URI's " + StringUtils.stringifyException(ur));
                // making sure its asssigned to null in case of an error
                uris[i] = null;
            }
        }
        return uris;
    }

    /**
     * Given a finish and start time in long milliseconds, returns a String in the
     * format Xhrs, Ymins, Z sec, for the time difference between two times. If
     * finish time comes before start time then negative valeus of X, Y and Z wil
     * return.
     *
     * @param finishTime
     *     finish time
     * @param startTime
     *     start time
     */
    public static String formatTimeDiff(long finishTime, long startTime) {
        long timeDiff = finishTime - startTime;
        return formatTime(timeDiff);
    }

    /**
     * Given the time in long milliseconds, returns a String in the format Xhrs,
     * Ymins, Z sec.
     *
     * @param timeDiff
     *     The time difference to format
     */
    public static String formatTime(long timeDiff) {
        StringBuffer buf = new StringBuffer();
        long hours = timeDiff / (60 * 60 * 1000);
        long rem = (timeDiff % (60 * 60 * 1000));
        long minutes = rem / (60 * 1000);
        rem = rem % (60 * 1000);
        long seconds = rem / 1000;

        if (hours != 0) {
            buf.append(hours);
            buf.append("hrs, ");
        }
        if (minutes != 0) {
            buf.append(minutes);
            buf.append("mins, ");
        }
        // return "0sec if no difference
        buf.append(seconds);
        buf.append("sec");
        return buf.toString();
    }

    /**
     * Formats time in ms and appends difference (finishTime - startTime) as
     * returned by formatTimeDiff(). If finish time is 0, empty string is
     * returned, if start time is 0 then difference is not appended to return
     * value.
     *
     * @param dateFormat
     *     date format to use
     * @param finishTime
     *     fnish time
     * @param startTime
     *     start time
     * @return formatted value.
     */
    public static String getFormattedTimeWithDiff(DateFormat dateFormat, long finishTime, long startTime) {
        StringBuffer buf = new StringBuffer();
        if (0 != finishTime) {
            buf.append(dateFormat.format(new Date(finishTime)));
            if (0 != startTime) {
                buf.append(" (" + formatTimeDiff(finishTime, startTime) + ")");
            }
        }
        return buf.toString();
    }

    /**
     * Returns an arraylist of strings.
     *
     * @param str
     *     the comma seperated string values
     * @return the arraylist of the comma seperated string values
     */
    public static String[] getStrings(String str) {
        Collection<String> values = getStringCollection(str);
        if (values.size() == 0) {
            return null;
        }
        return values.toArray(new String[values.size()]);
    }

    /**
     * Returns a collection of strings.
     *
     * @param str
     *     comma seperated string values
     * @return an <code>ArrayList</code> of string values
     */
    public static Collection<String> getStringCollection(String str) {
        List<String> values = new ArrayList<String>();
        if (str == null) {
            return values;
        }
        StringTokenizer tokenizer = new StringTokenizer(str, ",");
        values = new ArrayList<String>();
        while (tokenizer.hasMoreTokens()) {
            values.add(tokenizer.nextToken());
        }
        return values;
    }

    final public static char COMMA = ',';
    final public static String COMMA_STR = ",";
    final public static char ESCAPE_CHAR = '\\';

    /**
     * Split a string using the default separator
     *
     * @param str
     *     a string that may have escaped separator
     * @return an array of strings
     */
    public static String[] split(String str) {
        return split(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Split a string using the given separator
     *
     * @param str
     *     a string that may have escaped separator
     * @param escapeChar
     *     a char that be used to escape the separator
     * @param separator
     *     a separator char
     * @return an array of strings
     */
    public static String[] split(String str, char escapeChar, char separator) {
        if (str == null) {
            return null;
        }
        ArrayList<String> strList = new ArrayList<String>();
        StringBuilder split = new StringBuilder();
        int index = 0;
        while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) {
            ++index; // move over the separator for next search
            strList.add(split.toString());
            split.setLength(0); // reset the buffer
        }
        strList.add(split.toString());
        // remove trailing empty split(s)
        int last = strList.size(); // last split
        while (--last >= 0 && "".equals(strList.get(last))) {
            strList.remove(last);
        }
        return strList.toArray(new String[strList.size()]);
    }

    public static String[] split(String str, char separatorChar) {
        return splitWorker(str, separatorChar, false);
    }

    /**
     * Finds the first occurrence of the separator character ignoring the escaped
     * separators starting from the index. Note the substring between the index
     * and the position of the separator is passed.
     *
     * @param str
     *     the source string
     * @param separator
     *     the character to find
     * @param escapeChar
     *     character used to escape
     * @param start
     *     from where to search
     * @param split
     *     used to pass back the extracted string
     */
    public static int findNext(String str, char separator, char escapeChar, int start, StringBuilder split) {
        int numPreEscapes = 0;
        for (int i = start; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (numPreEscapes == 0 && curChar == separator) { // separator
                return i;
            } else {
                split.append(curChar);
                numPreEscapes = (curChar == escapeChar) ? (++numPreEscapes) % 2 : 0;
            }
        }
        return -1;
    }

    /**
     * Escape commas in the string using the default escape char
     *
     * @param str
     *     a string
     * @return an escaped string
     */
    public static String escapeString(String str) {
        return escapeString(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Escape <code>charToEscape</code> in the string with the escape char
     * <code>escapeChar</code>
     *
     * @param str
     *     string
     * @param escapeChar
     *     escape char
     * @param charToEscape
     *     the char to be escaped
     * @return an escaped string
     */
    public static String escapeString(String str, char escapeChar, char charToEscape) {
        return escapeString(str, escapeChar, new char[] { charToEscape });
    }

    // check if the character array has the character
    private static boolean hasChar(char[] chars, char character) {
        for (char target : chars) {
            if (character == target) {
                return true;
            }
        }
        return false;
    }

    /**
     * @param charsToEscape
     *     array of characters to be escaped
     */
    public static String escapeString(String str, char escapeChar, char[] charsToEscape) {
        if (str == null) {
            return null;
        }
        StringBuilder result = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (curChar == escapeChar || hasChar(charsToEscape, curChar)) {
                // special char
                result.append(escapeChar);
            }
            result.append(curChar);
        }
        return result.toString();
    }

    /**
     * Unescape commas in the string using the default escape char
     *
     * @param str
     *     a string
     * @return an unescaped string
     */
    public static String unEscapeString(String str) {
        return unEscapeString(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Unescape <code>charToEscape</code> in the string with the escape char
     * <code>escapeChar</code>
     *
     * @param str
     *     string
     * @param escapeChar
     *     escape char
     * @param charToEscape
     *     the escaped char
     * @return an unescaped string
     */
    public static String unEscapeString(String str, char escapeChar, char charToEscape) {
        return unEscapeString(str, escapeChar, new char[] { charToEscape });
    }

    /**
     * @param charsToEscape
     *     array of characters to unescape
     */
    public static String unEscapeString(String str, char escapeChar, char[] charsToEscape) {
        if (str == null) {
            return null;
        }
        StringBuilder result = new StringBuilder(str.length());
        boolean hasPreEscape = false;
        for (int i = 0; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (hasPreEscape) {
                if (curChar != escapeChar && !hasChar(charsToEscape, curChar)) {
                    // no special char
                    throw new IllegalArgumentException(
                            "Illegal escaped string " + str + " unescaped " + escapeChar + " at " + (i - 1));
                }
                // otherwise discard the escape char
                result.append(curChar);
                hasPreEscape = false;
            } else {
                if (hasChar(charsToEscape, curChar)) {
                    throw new IllegalArgumentException(
                            "Illegal escaped string " + str + " unescaped " + curChar + " at " + i);
                } else if (curChar == escapeChar) {
                    hasPreEscape = true;
                } else {
                    result.append(curChar);
                }
            }
        }
        if (hasPreEscape) {
            throw new IllegalArgumentException(
                    "Illegal escaped string " + str + ", not expecting " + escapeChar + " in the end.");
        }
        return result.toString();
    }

    /**
     * Return hostname without throwing exception.
     *
     * @return hostname
     */
    public static String getHostname() {
        try {
            return "" + InetAddress.getLocalHost();
        } catch (UnknownHostException uhe) {
            return "" + uhe;
        }
    }

    /**
     * Return a message for logging.
     *
     * @param prefix
     *     prefix keyword for the message
     * @param msg
     *     content of the message
     * @return a message for logging
     */
    private static String toStartupShutdownString(String prefix, String[] msg) {
        StringBuffer b = new StringBuffer(prefix);
        b.append("\n/************************************************************");
        for (String s : msg) {
            b.append("\n" + prefix + s);
        }
        b.append("\n************************************************************/");
        return b.toString();
    }

    /**
     * Print a log message for starting up and shutting down
     *
     * @param clazz
     *     the class of the server
     * @param args
     *     arguments
     * @param LOG
     *     the target log object
     */
    public static void startupShutdownMessage(Class<?> clazz, String[] args,
            final org.apache.commons.logging.Log LOG) {
        final String hostname = getHostname();
        final String classname = clazz.getSimpleName();
        LOG.info(toStartupShutdownString("STARTUP_MSG: ", new String[] { "Starting " + classname,
                "  host = " + hostname, "  args = " + Arrays.asList(args) }));

        Runtime.getRuntime().addShutdownHook(new Thread() {
            public void run() {
                LOG.info(toStartupShutdownString("SHUTDOWN_MSG: ",
                        new String[] { "Shutting down " + classname + " at " + hostname }));
            }
        });
    }

    /**
     * The traditional binary prefixes, kilo, mega, ..., exa, which can be
     * represented by a 64-bit integer. TraditionalBinaryPrefix symbol are case
     * insensitive.
     */
    public static enum TraditionalBinaryPrefix {
        KILO(1024), MEGA(KILO.value << 10), GIGA(MEGA.value << 10), TERA(GIGA.value << 10), PETA(
                TERA.value << 10), EXA(PETA.value << 10);

        public final long value;
        public final char symbol;

        TraditionalBinaryPrefix(long value) {
            this.value = value;
            this.symbol = toString().charAt(0);
        }

        /**
         * @return The TraditionalBinaryPrefix object corresponding to the symbol.
         */
        public static TraditionalBinaryPrefix valueOf(char symbol) {
            symbol = Character.toUpperCase(symbol);
            for (TraditionalBinaryPrefix prefix : TraditionalBinaryPrefix.values()) {
                if (symbol == prefix.symbol) {
                    return prefix;
                }
            }
            throw new IllegalArgumentException("Unknown symbol '" + symbol + "'");
        }

        /**
         * Convert a string to long. The input string is first be trimmed and then
         * it is parsed with traditional binary prefix.
         *
         * For example, "-1230k" will be converted to -1230 * 1024 = -1259520;
         * "891g" will be converted to 891 * 1024^3 = 956703965184;
         *
         * @param s
         *     input string
         * @return a long value represented by the input string.
         */
        public static long string2long(String s) {
            s = s.trim();
            final int lastpos = s.length() - 1;
            final char lastchar = s.charAt(lastpos);
            if (Character.isDigit(lastchar)) {
                return Long.parseLong(s);
            } else {
                long prefix = TraditionalBinaryPrefix.valueOf(lastchar).value;
                long num = Long.parseLong(s.substring(0, lastpos));
                if (num > (Long.MAX_VALUE / prefix) || num < (Long.MIN_VALUE / prefix)) {
                    throw new IllegalArgumentException(s + " does not fit in a Long");
                }
                return num * prefix;
            }
        }
    }

    /**
     * Escapes HTML Special characters present in the string.
     *
     * @param string
     * @return HTML Escaped String representation
     */
    public static String escapeHTML(String string) {
        if (string == null) {
            return null;
        }
        StringBuffer sb = new StringBuffer();
        boolean lastCharacterWasSpace = false;
        char[] chars = string.toCharArray();
        for (char c : chars) {
            if (c == ' ') {
                if (lastCharacterWasSpace) {
                    lastCharacterWasSpace = false;
                    sb.append("&nbsp;");
                } else {
                    lastCharacterWasSpace = true;
                    sb.append(" ");
                }
            } else {
                lastCharacterWasSpace = false;
                switch (c) {
                case '<':
                    sb.append("&lt;");
                    break;
                case '>':
                    sb.append("&gt;");
                    break;
                case '&':
                    sb.append("&amp;");
                    break;
                case '"':
                    sb.append("&quot;");
                    break;
                default:
                    sb.append(c);
                    break;
                }
            }
        }

        return sb.toString();
    }

    /**
     * Return an abbreviated English-language desc of the byte length
     */
    public static String byteDesc(long len) {
        double val = 0.0;
        String ending = "";
        if (len < 1024 * 1024) {
            val = (1.0 * len) / 1024;
            ending = " KB";
        } else if (len < 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024 * 1024);
            ending = " MB";
        } else if (len < 1024L * 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024 * 1024 * 1024);
            ending = " GB";
        } else if (len < 1024L * 1024 * 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024L * 1024 * 1024 * 1024);
            ending = " TB";
        } else {
            val = (1.0 * len) / (1024L * 1024 * 1024 * 1024 * 1024);
            ending = " PB";
        }
        return limitDecimalTo2(val) + ending;
    }

    public static synchronized String limitDecimalTo2(double d) {
        return decimalFormat.format(d);
    }

    public static boolean isEmpty(String str) {
        return str == null || str.length() == 0;
    }

    public static boolean isNullOrEmpty(String str) {
        return (str == null) || str.trim().isEmpty();
    }

    public static boolean isBlank(String str) {
        int strLen;
        if (str == null || (strLen = str.length()) == 0) {
            return true;
        }
        for (int i = 0; i < strLen; i++) {
            if ((Character.isWhitespace(str.charAt(i)) == false)) {
                return false;
            }
        }
        return true;
    }

    public static boolean isNotBlank(String str) {
        return !StringUtils.isBlank(str);
    }

    public static String join(Object[] array, char separator) {
        if (array == null) {
            return null;
        }

        return join(array, separator, 0, array.length);
    }

    public static String join(Object[] array, char separator, int startIndex, int endIndex) {
        if (array == null) {
            return null;
        }
        int bufSize = (endIndex - startIndex);
        if (bufSize <= 0) {
            return "";
        }

        bufSize *= ((array[startIndex] == null ? 16 : array[startIndex].toString().length()) + 1);
        StringBuffer buf = new StringBuffer(bufSize);

        for (int i = startIndex; i < endIndex; i++) {
            if (i > startIndex) {
                buf.append(separator);
            }
            if (array[i] != null) {
                buf.append(array[i]);
            }
        }
        return buf.toString();
    }

    public static String join(Object[] array, String separator) {
        if (array == null) {
            return null;
        }
        return join(array, separator, 0, array.length);
    }

    public static String join(Object[] array, String separator, int startIndex, int endIndex) {
        if (array == null) {
            return null;
        }
        if (separator == null) {
            separator = "";
        }

        // endIndex - startIndex > 0:   Len = NofStrings *(len(firstString) + len(separator))
        //           (Assuming that all Strings are roughly equally long)
        int bufSize = (endIndex - startIndex);
        if (bufSize <= 0) {
            return "";
        }

        bufSize *= ((array[startIndex] == null ? 16 : array[startIndex].toString().length()) + separator.length());

        StringBuilder buf = new StringBuilder(bufSize);

        for (int i = startIndex; i < endIndex; i++) {
            if (i > startIndex) {
                buf.append(separator);
            }
            if (array[i] != null) {
                buf.append(array[i]);
            }
        }
        return buf.toString();
    }

    public static String[] splitPreserveAllTokens(String str, char separatorChar) {
        return splitWorker(str, separatorChar, true);
    }

    private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) {
        // Performance tuned for 2.0 (JDK1.4)

        if (str == null) {
            return null;
        }
        int len = str.length();
        if (len == 0) {
            return new String[0];
        }
        List list = new ArrayList();
        int i = 0, start = 0;
        boolean match = false;
        boolean lastMatch = false;
        while (i < len) {
            if (str.charAt(i) == separatorChar) {
                if (match || preserveAllTokens) {
                    list.add(str.substring(start, i));
                    match = false;
                    lastMatch = true;
                }
                start = ++i;
                continue;
            }
            lastMatch = false;
            match = true;
            i++;
        }
        if (match || (preserveAllTokens && lastMatch)) {
            list.add(str.substring(start, i));
        }
        return (String[]) list.toArray(new String[list.size()]);
    }

    public static String strip(String str, String stripChars) {
        if (isEmpty(str)) {
            return str;
        }
        str = stripStart(str, stripChars);
        return stripEnd(str, stripChars);
    }

    public static String stripStart(String str, String stripChars) {
        int strLen;
        if (str == null || (strLen = str.length()) == 0) {
            return str;
        }
        int start = 0;
        if (stripChars == null) {
            while ((start != strLen) && Character.isWhitespace(str.charAt(start))) {
                start++;
            }
        } else if (stripChars.length() == 0) {
            return str;
        } else {
            while ((start != strLen) && (stripChars.indexOf(str.charAt(start)) != -1)) {
                start++;
            }
        }
        return str.substring(start);
    }

    public static String stripEnd(String str, String stripChars) {
        int end;
        if (str == null || (end = str.length()) == 0) {
            return str;
        }

        if (stripChars == null) {
            while ((end != 0) && Character.isWhitespace(str.charAt(end - 1))) {
                end--;
            }
        } else if (stripChars.length() == 0) {
            return str;
        } else {
            while ((end != 0) && (stripChars.indexOf(str.charAt(end - 1)) != -1)) {
                end--;
            }
        }
        return str.substring(0, end);
    }

    public static boolean equals(String str1, String str2) {
        return str1 == null ? str2 == null : str1.equals(str2);
    }
}