com.salas.bb.utils.StringUtils.java Source code

Introduction

Here is the source code for com.salas.bb.utils.StringUtils.java
Source

// BlogBridge -- RSS feed reader, manager, and web based service
// Copyright (C) 2002-2006 by R. Pito Salas
//
// This program is free software; you can redistribute it and/or modify it under
// the terms of the GNU General Public License as published by the Free Software Foundation;
// either version 2 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
// without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along with this program;
// if not, write to the Free Software Foundation, Inc., 59 Temple Place,
// Suite 330, Boston, MA 02111-1307 USA
//
// Contact: R. Pito Salas
// mailto:pitosalas@users.sourceforge.net
// More information: about BlogBridge
// http://www.blogbridge.com
// http://sourceforge.net/projects/blogbridge
//
// $Id: StringUtils.java,v 1.42 2007/11/07 17:16:48 spyromus Exp $
//

package com.salas.bb.utils;

import com.salas.bb.utils.i18n.Strings;
import sun.io.Converters;
import sun.misc.BASE64Encoder;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Collection of string utilities.
 */
public final class StringUtils extends org.apache.commons.lang.StringUtils {
    private static final Logger LOG = Logger.getLogger(StringUtils.class.getName());

    private static final String[] SIZE_UNIT = { "Bytes", "Kb", "Mb", "Gb" };

    private static final DecimalFormat FORMAT = new DecimalFormat();

    private static final Pattern PATTERN_KEYWORDS = Pattern.compile("\\s*((\\\"([^\\\"]*)\\\"|([^\\s\\\"]+))\\s*)");
    private static final Pattern PATTERN_URL_WITH_PROTOCOL = Pattern.compile("^[a-zA-Z]+:/");
    private static final Pattern PATTERN_PUNCTUATION = Pattern
            .compile("([\\-\\+#\\$%^&\\_*\\s,.\\(\\)\\[\\]<>!\\?\"':;/\\\\])+");

    private static final Map<String, Character> ENTITIES = new HashMap<String, Character>();

    static {
        FORMAT.setMaximumFractionDigits(1);
        FORMAT.setMinimumFractionDigits(1);

        String[] fromA0 = { "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy",
                "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro",
                "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest",
                "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute",
                "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc",
                "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN",
                "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave",
                "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute",
                "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute",
                "thorn", "yuml" };
        for (int i = 0; i < fromA0.length; i++)
            ENTITIES.put(fromA0[i], (char) (0xa0 + i));
        ENTITIES.put("trade", (char) 8482);
        ENTITIES.put("OElig", (char) 338);
        ENTITIES.put("oelig", (char) 339);
        ENTITIES.put("Scaron", (char) 352);
        ENTITIES.put("scaron", (char) 353);
        ENTITIES.put("Yuml", (char) 376);
        ENTITIES.put("circ", (char) 710);
        ENTITIES.put("tilde", (char) 732);
        ENTITIES.put("ensp", (char) 8194);
        ENTITIES.put("emsp", (char) 8195);
        ENTITIES.put("thinsp", (char) 8201);
        ENTITIES.put("zwnj", (char) 8204);
        ENTITIES.put("zwj", (char) 8205);
        ENTITIES.put("lrm", (char) 8206);
        ENTITIES.put("rrm", (char) 8207);
        ENTITIES.put("ndash", (char) 8211);
        ENTITIES.put("mdash", (char) 8212);
        ENTITIES.put("lsquo", (char) 8216);
        ENTITIES.put("rsquo", (char) 8217);
        ENTITIES.put("sbquo", (char) 8218);
        ENTITIES.put("ldquo", (char) 8220);
        ENTITIES.put("rdquo", (char) 8221);
        ENTITIES.put("bdquo", (char) 8222);
        ENTITIES.put("dagger", (char) 8224);
        ENTITIES.put("Dagger", (char) 8225);
        ENTITIES.put("hellip", (char) 8230);
        ENTITIES.put("permil", (char) 8240);
        ENTITIES.put("lsaquo", (char) 8249);
        ENTITIES.put("rsaquo", (char) 8250);
        ENTITIES.put("euro", (char) 8364);
        ENTITIES.put("amp", '&');
        ENTITIES.put("lt", '<');
        ENTITIES.put("gt", '>');
        ENTITIES.put("apos", '\'');
        ENTITIES.put("quot", '"');
    }

    /**
     * Hidden utility class constructor.
     */
    private StringUtils() {
    }

    /**
     * Converts array of bytes in UTF-8 encoding to appropriate string. If encoding
     * isn't supported then the array will be converted into string using default
     * encoding and record will be put in log with severe priority.
     *
     * @param string    bytes forming string.
     *
     * @return resulting string.
     */
    public static String fromUTF8(byte[] string) {
        if (string == null)
            return null;

        String str;

        try {
            str = new String(string, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            LOG.severe(Strings.error("utf8.not.supported"));
            str = new String(string);
        }

        return str;
    }

    /**
     * Converts array of byte arrays in UTF-8 encoding to array of strings. The notes are
     * the same as for <code>fromUTF8(String)</code> method.
     *
     * @param strings   array of byte arrays to decode.
     *
     * @return resulting array of strings.
     */
    public static String[] fromUTF8(byte[][] strings) {
        if (strings == null)
            return null;

        String[] strs = new String[strings.length];
        for (int i = 0; i < strings.length; i++) {
            byte[] string = strings[i];
            strs[i] = fromUTF8(string);
        }

        return strs;
    }

    /**
     * Converts string into array of bytes in UTF-8 encoding. If UTF-8 encoding isn't supported
     * then the tring is converted into bytes in default encoding and record is put in log
     * with severe priority.
     *
     * @param string    string to convert.
     *
     * @return resulting array of bytes.
     */
    public static byte[] toUTF8(String string) {
        if (string == null)
            return null;

        byte[] result;

        try {
            result = string.getBytes("UTF-8");
        } catch (UnsupportedEncodingException e) {
            LOG.severe(Strings.error("utf8.not.supported"));
            result = string.getBytes();
        }

        return result;
    }

    /**
     * Converts multi-line text into the array of strings.
     *
     * @param text      text.
     *
     * @return array of strings.
     */
    public static String[] multilineToArray(String text) {
        return text == null ? null : split(text, "\n");
    }

    /**
     * Converts any value to multi-line text. Arrays of strings are converted that each string
     * appears on its own line.
     *
     * @param value arbitrary value.
     *
     * @return multi-line text.
     */
    public static String anyToMultiline(Object value) {
        String result = Constants.EMPTY_STRING;

        if (value instanceof String[]) {
            result = arrayToMultiline((String[]) value);
        } else if (value != null) {
            result = value.toString();
        }

        return result;
    }

    /**
     * Converts array of strings to multi-line text where each string appears on it's own line.
     *
     * @param aStrings  array of strings.
     *
     * @return multi-line.
     */
    public static String arrayToMultiline(String[] aStrings) {
        return aStrings == null ? null : join(aStrings, "\n");
    }

    /**
     * Converts text from source encoding into Unicode. If encoding isn't supported you will get
     * the original text.
     *
     * @param text              text.
     * @param sourceEncoding    source encoding.
     *
     * @return converted text.
     */
    public static String decodeForced(String text, String sourceEncoding) {
        if (text == null)
            return null;
        if (sourceEncoding == null || sourceEncoding.equals(Converters.getDefaultEncodingName()))
            return text;

        try {
            text = new String(text.getBytes("ISO8859-1"), sourceEncoding);
        } catch (UnsupportedEncodingException e) {
            // We don't cate about it.
        }

        return text;
    }

    /**
     * Returns the first line of article.
     *
     * @param aText text to scan.
     * 
     * @return first text line.
     */
    public static String getFirstSentense(String aText) {
        if (aText == null)
            return null;

        int size = aText.length();

        int start;
        int length;

        for (start = 0; start < size && Character.isWhitespace(aText.charAt(start)); start++)
            ;
        for (length = 0; start + length < size && (!isSentenseTerminator(aText.charAt(start + length))); length++)
            ;

        return length > 0 ? aText.substring(start, start + length).trim() : Constants.EMPTY_STRING;
    }

    /**
     * Returns TRUE if char is a sentense terminator.
     *
     * @param ch    char to test.
     *
     * @return TRUE if char is a sentense terminator.
     */
    public static boolean isSentenseTerminator(char ch) {
        return ch == '.' || ch == '?' || ch == '!';
    }

    /**
     * Returns the stringified size.
     *
     * @param size size in bytes.
     *
     * @return string representation.
     */
    public static String sizeToString(double size) {
        return sizeToString(size, 0);
    }

    /**
     * Returns the stringified size.
     *
     * @param size      size in units.
     * @param unitIndex unit index.
     *
     * @return string represenation.
     */
    private static String sizeToString(double size, int unitIndex) {
        String value;

        if (size < 512 || unitIndex == SIZE_UNIT.length - 1) {
            value = FORMAT.format(size) + " " + SIZE_UNIT[unitIndex];
        } else {
            value = sizeToString(size / 1024, unitIndex + 1);
        }

        return value;
    }

    /**
     * Encodes string to be put in URL.
     *
     * <p>Example:</p>
     * <pre>
     *  input string: 'a &?b'
     *  output string: 'a+%26%3Fb'
     * </pre>
     *
     * @param str string to encode.
     *
     * @return encoded string or NULL if source was NULL.
     */
    public static String encodeForURL(String str) {
        if (str == null)
            return null;

        try {
            str = URLEncoder.encode(str, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(Strings.error("utf8.not.supported"), e);
        }

        return str;
    }

    /**
     * Converts list of keywords separated by whitespace. Each keyword can actually
     * contain several words if enclosed in double quotes.
     *
     * @param keywords keywords string.
     *
     * @return list of keywords or <code>NULL</code> if <code>keywords</code>
     *         were <code>NULL</code>.
     */
    public static String[] keywordsToArray(String keywords) {
        List<String> matches = keywordsToList(keywords);
        return matches == null ? null : matches.toArray(new String[matches.size()]);
    }

    /**
     * Converts list of keywords separated by whitespace. Each keyword can actually
     * contain several words if enclosed in double quotes.
     *
     * @param keywords keywords string.
     *
     * @return list of keywords or <code>NULL</code> if <code>keywords</code>
     *         were <code>NULL</code>.
     */
    public static List<String> keywordsToList(String keywords) {
        if (keywords == null)
            return null;

        Matcher mat = PATTERN_KEYWORDS.matcher(keywords);

        List<String> matches = new ArrayList<String>();
        while (mat.find()) {
            String keyword = mat.group(3);
            if (keyword == null)
                keyword = mat.group(2);
            if (keyword != null && !"*".equals(keyword) && !"+".equals(keyword) && !matches.contains(keyword))
                matches.add(keyword);
        }

        return matches;
    }

    /**
     * Places multi-word keyword in quotes only if it isn't in quotes already.
     *
     * @param keyword param to quote if necessary.
     *
     * @return updated keyword.
     */
    public static String quoteKeywordIfNecessary(String keyword) {
        keyword = keyword.trim();
        if (keyword.indexOf(' ') != -1 && keyword.charAt(0) != '"' && keyword.charAt(keyword.length() - 1) != '"') {
            keyword = "\"" + keyword + "\"";
        }

        return keyword;
    }

    /**
     * Converts keywords from <code>a|b|c d|e</code> or <code>a, b, c d, e</code>
     * looks to <code>a b "c d" e</code>.
     *
     * @param aKeywords keywords to convert.
     *
     * @return new-look keywords.
     */
    public static String convertKeywordsToNewFormat(String aKeywords) {
        String result;

        if (aKeywords.indexOf('|') != -1) {
            result = breakAndRejoinKeywords(aKeywords, "|");
        } else if (aKeywords.indexOf(',') != -1) {
            result = breakAndRejoinKeywords(aKeywords, ",");
        } else
            result = aKeywords;

        return result;
    }

    /**
     * Breaks current keywords list appart and rejoins it using curren keywords
     * rules.
     *
     * @param aKeywords         list of keywords.
     * @param currentSeparator  the separator to be used for breaking.
     *
     * @return newly formed keywords list.
     */
    private static String breakAndRejoinKeywords(String aKeywords, String currentSeparator) {
        String[] keywordsList = split(aKeywords, currentSeparator);

        return arrayToQuotedKeywords(keywordsList);
    }

    /**
     * Converts the array of keywords into the space-delimited list with quoted multi-word
     * items.
     *
     * @param aKeywordsList list.
     *
     * @return space-delimete and quoted list of keywords.
     */
    public static String arrayToQuotedKeywords(String[] aKeywordsList) {
        String result = null;

        if (aKeywordsList != null) {
            if (aKeywordsList.length > 0) {
                StringBuffer buf = new StringBuffer();
                buf.append(quoteKeywordIfNecessary(aKeywordsList[0]));
                for (int i = 1; i < aKeywordsList.length; i++) {
                    buf.append(" ").append(quoteKeywordIfNecessary(aKeywordsList[i]));
                }
                result = buf.toString();
            } else
                result = "";
        }

        return result;
    }

    /**
     * Digests the buffer with key using MD5 algorithm.
     *
     * @param buffer    buffer.
     * @param key       key is secret key (password or something else which isn't
     *                  going to be passed over network).
     *
     * @return digested buffer.
     *
     * @throws NoSuchAlgorithmException if there's no MD5 algorithm implemetation.
     */
    public static byte[] digestMD5(String buffer, String key) throws NoSuchAlgorithmException {
        MessageDigest md5 = MessageDigest.getInstance("MD5");
        md5.update(buffer.getBytes());
        return md5.digest(key.getBytes());
    }

    /**
     * Creates basic authentication token given user name and password.
     *
     * @param user      user name.
     * @param password  password.
     *
     * @return token.
     */
    public static String createBasicAuthToken(String user, String password) {
        String token = user + ":" + password;
        String base64Token = new BASE64Encoder().encode(token.getBytes());

        return "Basic " + base64Token;
    }

    /**
     * Creates pattern from the keywords list.
     *
     * @param keywords keywords list.
     *
     * @return keywords regex pattern.
     */
    public static String keywordsToPattern(String keywords) {
        return keywordsToPattern(keywordsToArray(keywords));
    }

    /**
     * Creates pattern from the keywords list.
     *
     * @param aKeywords keywords list.
     *
     * @return keywords regex pattern.
     */
    public static String keywordsToPattern(String[] aKeywords) {
        String pattern;

        if (aKeywords != null && aKeywords.length > 0) {
            pattern = join(aKeywords, "|");
            pattern = pattern.replaceAll("\\\\", "\\\\\\\\");
            pattern = pattern.replaceAll("\\.", "\\\\.");
            pattern = pattern.replaceAll("\\n+", "|");
            pattern = pattern.replaceAll("\\?", "\\\\?");
            pattern = pattern.replaceAll("\\(", "\\\\(");
            pattern = pattern.replaceAll("\\)", "\\\\)");
            pattern = pattern.replaceAll("\\[", "\\\\[");
            pattern = pattern.replaceAll("\\]", "\\\\]");

            pattern = pattern.replaceAll("\\++", "\\\\w+");
            pattern = pattern.replaceAll("\\s+", "\\\\s+");
            pattern = pattern.replaceAll("\\*+", "\\\\w*");
            pattern = pattern.replaceAll("\\\\s\\+\\\\w\\*\\\\s\\+", "\\\\s+(\\\\w*\\\\s+)?");

            pattern = pattern.replaceAll("\\|\\|+", "\\|");
            pattern = pattern.replaceAll("(^\\||\\|$)", "");

            String start = "\\W";
            String end = start;

            if (pattern.startsWith("\\s+")) {
                start = "\\s";
                pattern = pattern.substring(3);
            }

            if (pattern.endsWith("\\s+")) {
                end = "\\s";
                pattern = pattern.substring(0, pattern.length() - 3);
            }

            pattern = "(^|" + start + ")(" + pattern.trim() + ")($|" + end + ")";
        } else
            pattern = null;

        return pattern;
    }

    /**
     * Performs different cleanups of URL. Removes extra spaces, converts "feed://" into "http://",
     * takes only first line of draggeed URL which is actual link under FireFox 1.5 (Win).
     *
     * @param link  link being dragged into application.
     *
     * @return final link.
     */
    public static String cleanDraggedURL(String link) {
        if (link == null)
            return null;

        link = link.trim();

        // If URL starts with feed:// we change it to http://
        if (link.startsWith("feed:"))
            link = "http:" + link.substring(5);

        // FireFox 1.5 under Win has two lines: URL and description taken from page
        // We leave only the first line -- the URL
        int index = link.indexOf(0x0a);
        if (index != -1)
            link = link.substring(0, index).trim();

        return link;
    }

    /**
     * Scans for tags definitions in micro-format. The text should contain A-links to
     * tag categories with "rel" attribute equal to "tag". The last section of URL is
     * taken as tag.
     *
     * @param aText text to parse.
     *
     * @return list of tags detected.
     */
    public static String[] collectTags(String aText) {
        List<String> tagsList = null;

        if (aText != null) {
            Pattern pat = Pattern.compile("<a\\s+[^>]*rel\\s*=\\s*['\"]tag['\"][^>]*>", Pattern.CASE_INSENSITIVE);
            Matcher matcher = pat.matcher(aText);

            Pattern patTag = null;

            while (matcher.find()) {
                if (tagsList == null) {
                    tagsList = new ArrayList<String>();
                    patTag = Pattern.compile("href\\s*=\\s*['\"]([^'\"/]+/+)+([\\+a-zA-Z0-9]+)['\"]");
                }

                Matcher m2 = patTag.matcher(matcher.group());

                if (m2.find())
                    tagsList.add(m2.group(2).replaceAll("\\+", " "));
            }
        }

        return tagsList == null ? Constants.EMPTY_STRING_LIST : tagsList.toArray(new String[tagsList.size()]);
    }

    /**
     * Adds protocol part to URL (http://) if none is specified and removes spaces around text.
     *
     * @param url source URL.
     *
     * @return modified URL.
     */
    public static String fixURL(String url) {
        if (url != null) {
            url = url.trim();
            if (url.length() == 0) {
                url = null;
            } else if (url.startsWith("feed:")) {
                url = url.substring(5).replaceAll("^/+", "");
                url = fixURL(url);
            } else if (!PATTERN_URL_WITH_PROTOCOL.matcher(url).find()) {
                url = "http://" + url;
            }
        }

        return url;
    }

    /**
     * Unescapes the string.
     *
     * @param str string.
     *
     * @return unescaped version.
     */
    public static String quickUnescape(String str) {
        if (str == null)
            return null;

        str = str.replaceAll("&amp;", "&");
        str = str.replaceAll("&lt;", "<");
        str = str.replaceAll("&gt;", ">");
        str = str.replaceAll("&apos;", "'");
        str = str.replaceAll("&quot;", "\"");

        return str;
    }

    /**
     * Complete recoding of all HTML entities into Unicode symbols.
     *
     * @param str string.
     *
     * @return result.
     */
    public static String unescape(String str) {
        if (isEmpty(str))
            return str;

        Pattern p = Pattern.compile("&(([^#;\\s]{3,6})|#([0-9]{1,4})|#x([0-9a-fA-F]{1,4}));");
        Matcher m = p.matcher(str);
        StringBuffer sb = new StringBuffer();
        while (m.find()) {
            Character c;

            String strEntity = m.group(2);
            String decEntity = m.group(3);
            String hexEntity = m.group(4);
            if (strEntity != null) {
                // String entity
                c = ENTITIES.get(strEntity);
            } else {
                c = decEntity != null ? (char) Integer.parseInt(decEntity) : (char) Integer.parseInt(hexEntity, 16);
            }

            m.appendReplacement(sb, c == null ? m.group() : c.toString());
        }
        m.appendTail(sb);

        return sb.toString();
    }

    /**
     * Checks whether the given strings is a valid e-mail address.
     *
     * @param email address.
     *
     * @return <code>TRUE</code> if valid.
     */
    public static boolean isValidEmail(String email) {
        return !isEmpty(email) && email.trim().matches("^[^@]+@[^\\.]+(\\.[^\\.]+)+$");
    }

    /**
     * Converts the list of URLs in string form into the array of URL objects.
     *
     * @param str   URLs string.
     *
     * @return array of URLs.
     */
    public static URL[] strToURLs(String str) {
        URL[] newURLs = null;

        if (isNotEmpty(str)) {
            String[] urls = split(str, Constants.URL_SEPARATOR);
            List<URL> urlsList = new ArrayList<URL>(urls.length);
            for (String url : urls) {
                try {
                    urlsList.add(new URL(url));
                } catch (MalformedURLException e) {
                    // Wrong url specified -- skipping
                }
            }

            newURLs = urlsList.toArray(new URL[urlsList.size()]);
        }

        return newURLs;
    }

    /**
     * Returns space-separated list of first <code>N</code> words.
     *
     * @param str   string.
     * @param n     number of words.
     *
     * @return list or <code>NULL</code> if string is <code>NULL</code> or <code>N</code> is less than <code>1</code>.
     */
    public static String getUpToNWords(String str, int n) {
        if (str == null || n < 1)
            return null;

        String[] split = split(str, " ,.()[]<>!?\"':;/\\", n + 1);
        if (split.length > 1)
            split[split.length - 1] = "";

        return join(split, " ").trim();
    }

    private static Pattern lastPattern;
    private static int lastSentences = -1;

    /**
     * Returns the excerpt consisting of given number of sentences unless they are shorter or longer than given
     * limits. In this case minimum or maximum allowed number of characters plus "..." are returned.
     *
     * @param str       string to process.
     * @param sentences the number of sentences.
     * @param min       minimum characters.
     * @param max       maximum characters.
     *
     * @return the result.
     */
    public static String excerpt(String str, int sentences, int min, int max) {
        if (str == null)
            return str;

        String res;

        // Create pattern or reuse
        Pattern pat;
        if (lastSentences != sentences) {
            String patS = "^([^\\.!?]+(\\.+|!+|\\?+)+){" + sentences + "}";
            pat = Pattern.compile(patS);

            lastSentences = sentences;
            lastPattern = pat;
        } else
            pat = lastPattern;

        // Match the string
        Matcher m = pat.matcher(str);
        if (m.find()) {
            res = m.group().trim();
            int len = res.length();
            if (len < min)
                res = excerpt(str, min);
            else if (len > max)
                res = excerpt(str, max);
        } else
            res = excerpt(str, max);

        return res;
    }

    /**
     * Returns the excerpt with given number of characters plus "..." unless
     * the string is shorter than limit.
     *
     * @param str   string to process.
     * @param len   number of characters.
     *
     * @return the result.
     */
    public static String excerpt(String str, int len) {
        return str == null || str.length() <= len ? str : str.substring(0, len) + "...";
    }

    /**
     * Takes the string, removes punctuation, lowercases it and returns the words in a given range glued
     * together with spaces. If there's not enough words, the maximum available number of them is returned.
     *
     * @param str   string.
     * @param from  the first word to return.
     * @param to    the last word to return.
     *
     * @return the result.
     */
    public static String getWordsInRange(String str, int from, int to) {
        if (isEmpty(str))
            return str;
        if (from > to)
            throw new IllegalArgumentException("From can't be bigger than To.");

        // Remove all punctuation and collapse spaces plus lowercase
        str = PATTERN_PUNCTUATION.matcher(str).replaceAll(" ").toLowerCase().trim();
        String[] strs = str.split(" ");

        // Figure out what are our limits
        from = Math.min(from, strs.length);
        to = Math.min(to + 1, strs.length);

        // Glue words back together
        String[] arr = new String[to - from];
        System.arraycopy(strs, from, arr, 0, to - from);

        return join(arr, " ");
    }

    /**
     * Safely intern's a string.
     *
     * @param s string.
     *
     * @return intern'ed version.
     */
    public static String intern(String s) {
        return s == null ? null : s.intern();
    }
}