org.apache.solr.common.util.StrUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.common.util.StrUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.common.util;

import java.io.IOException;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

import org.apache.solr.common.SolrException;

/**
 *
 */
public class StrUtils {
    public static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
            'e', 'f' };

    public static List<String> splitSmart(String s, char separator) {
        ArrayList<String> lst = new ArrayList<>(4);
        splitSmart(s, separator, lst);
        return lst;

    }

    static final String DELIM_CHARS = "/:;.,%#";

    public static List<String> split(String s, char sep) {
        if (DELIM_CHARS.indexOf(s.charAt(0)) > -1) {
            sep = s.charAt(0);
        }
        return splitSmart(s, sep, true);

    }

    public static List<String> splitSmart(String s, char separator, boolean trimEmpty) {
        List<String> l = splitSmart(s, separator);
        if (trimEmpty) {
            if (l.size() > 0 && l.get(0).isEmpty())
                l.remove(0);
        }
        return l;
    }

    /**
     * Split a string based on a separator, but don't split if it's inside
     * a string.  Assume '\' escapes the next char both inside and
     * outside strings.
     */
    public static void splitSmart(String s, char separator, List<String> lst) {
        int pos = 0, start = 0, end = s.length();
        char inString = 0;
        char ch = 0;
        while (pos < end) {
            char prevChar = ch;
            ch = s.charAt(pos++);
            if (ch == '\\') { // skip escaped chars
                pos++;
            } else if (inString != 0 && ch == inString) {
                inString = 0;
            } else if (ch == '\'' || ch == '"') {
                // If char is directly preceeded by a number or letter
                // then don't treat it as the start of a string.
                // Examples: 50" TV, or can't
                if (!Character.isLetterOrDigit(prevChar)) {
                    inString = ch;
                }
            } else if (ch == separator && inString == 0) {
                lst.add(s.substring(start, pos - 1));
                start = pos;
            }
        }
        if (start < end) {
            lst.add(s.substring(start, end));
        }

        /***
         if (SolrCore.log.isLoggable(Level.FINEST)) {
         SolrCore.log.trace("splitCommand=" + lst);
         }
         ***/

    }

    /**
     * Splits a backslash escaped string on the separator.
     * <p>
     * Current backslash escaping supported:
     * <br> \n \t \r \b \f are escaped the same as a Java String
     * <br> Other characters following a backslash are produced verbatim (\c =&gt; c)
     *
     * @param s         the string to split
     * @param separator the separator to split on
     * @param decode    decode backslash escaping
     * @return not null
     */
    public static List<String> splitSmart(String s, String separator, boolean decode) {
        ArrayList<String> lst = new ArrayList<>(2);
        StringBuilder sb = new StringBuilder();
        int pos = 0, end = s.length();
        while (pos < end) {
            if (s.startsWith(separator, pos)) {
                if (sb.length() > 0) {
                    lst.add(sb.toString());
                    sb = new StringBuilder();
                }
                pos += separator.length();
                continue;
            }

            char ch = s.charAt(pos++);
            if (ch == '\\') {
                if (!decode)
                    sb.append(ch);
                if (pos >= end)
                    break; // ERROR, or let it go?
                ch = s.charAt(pos++);
                if (decode) {
                    switch (ch) {
                    case 'n':
                        ch = '\n';
                        break;
                    case 't':
                        ch = '\t';
                        break;
                    case 'r':
                        ch = '\r';
                        break;
                    case 'b':
                        ch = '\b';
                        break;
                    case 'f':
                        ch = '\f';
                        break;
                    }
                }
            }

            sb.append(ch);
        }

        if (sb.length() > 0) {
            lst.add(sb.toString());
        }

        return lst;
    }

    /**
     * Splits file names separated by comma character.
     * File names can contain comma characters escaped by backslash '\'
     *
     * @param fileNames the string containing file names
     * @return a list of file names with the escaping backslashed removed
     */
    public static List<String> splitFileNames(String fileNames) {
        if (fileNames == null)
            return Collections.<String>emptyList();

        List<String> result = new ArrayList<>();
        for (String file : fileNames.split("(?<!\\\\),")) {
            result.add(file.replaceAll("\\\\(?=,)", ""));
        }

        return result;
    }

    /**
     * Creates a backslash escaped string, joining all the items.
     *
     * @see #escapeTextWithSeparator
     */
    public static String join(Collection<?> items, char separator) {
        if (items == null)
            return "";
        StringBuilder sb = new StringBuilder(items.size() << 3);
        boolean first = true;
        for (Object o : items) {
            String item = String.valueOf(o);
            if (first) {
                first = false;
            } else {
                sb.append(separator);
            }
            appendEscapedTextToBuilder(sb, item, separator);
        }
        return sb.toString();
    }

    public static List<String> splitWS(String s, boolean decode) {
        ArrayList<String> lst = new ArrayList<>(2);
        StringBuilder sb = new StringBuilder();
        int pos = 0, end = s.length();
        while (pos < end) {
            char ch = s.charAt(pos++);
            if (Character.isWhitespace(ch)) {
                if (sb.length() > 0) {
                    lst.add(sb.toString());
                    sb = new StringBuilder();
                }
                continue;
            }

            if (ch == '\\') {
                if (!decode)
                    sb.append(ch);
                if (pos >= end)
                    break; // ERROR, or let it go?
                ch = s.charAt(pos++);
                if (decode) {
                    switch (ch) {
                    case 'n':
                        ch = '\n';
                        break;
                    case 't':
                        ch = '\t';
                        break;
                    case 'r':
                        ch = '\r';
                        break;
                    case 'b':
                        ch = '\b';
                        break;
                    case 'f':
                        ch = '\f';
                        break;
                    }
                }
            }

            sb.append(ch);
        }

        if (sb.length() > 0) {
            lst.add(sb.toString());
        }

        return lst;
    }

    public static List<String> toLower(List<String> strings) {
        ArrayList<String> ret = new ArrayList<>(strings.size());
        for (String str : strings) {
            ret.add(str.toLowerCase(Locale.ROOT));
        }
        return ret;
    }

    /**
     * Return if a string starts with '1', 't', or 'T'
     * and return false otherwise.
     */
    public static boolean parseBoolean(String s) {
        char ch = s.length() > 0 ? s.charAt(0) : 0;
        return (ch == '1' || ch == 't' || ch == 'T');
    }

    /**
     * how to transform a String into a boolean... more flexible than
     * Boolean.parseBoolean() to enable easier integration with html forms.
     */
    public static boolean parseBool(String s) {
        if (s != null) {
            if (s.startsWith("true") || s.startsWith("on") || s.startsWith("yes")) {
                return true;
            }
            if (s.startsWith("false") || s.startsWith("off") || s.equals("no")) {
                return false;
            }
        }
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "invalid boolean value: " + s);
    }

    /**
     * {@link NullPointerException} and {@link SolrException} free version of {@link #parseBool(String)}
     *
     * @return parsed boolean value (or def, if s is null or invalid)
     */
    public static boolean parseBool(String s, boolean def) {
        if (s != null) {
            if (s.startsWith("true") || s.startsWith("on") || s.startsWith("yes")) {
                return true;
            }
            if (s.startsWith("false") || s.startsWith("off") || s.equals("no")) {
                return false;
            }
        }
        return def;
    }

    /**
     * URLEncodes a value, replacing only enough chars so that
     * the URL may be unambiguously pasted back into a browser.
     * <p>
     * Characters with a numeric value less than 32 are encoded.
     * &amp;,=,%,+,space are encoded.
     */
    public static void partialURLEncodeVal(Appendable dest, String val) throws IOException {
        for (int i = 0; i < val.length(); i++) {
            char ch = val.charAt(i);
            if (ch < 32) {
                dest.append('%');
                if (ch < 0x10)
                    dest.append('0');
                dest.append(Integer.toHexString(ch));
            } else {
                switch (ch) {
                case ' ':
                    dest.append('+');
                    break;
                case '&':
                    dest.append("%26");
                    break;
                case '%':
                    dest.append("%25");
                    break;
                case '=':
                    dest.append("%3D");
                    break;
                case '+':
                    dest.append("%2B");
                    break;
                default:
                    dest.append(ch);
                    break;
                }
            }
        }
    }

    /**
     * Creates a new copy of the string with the separator backslash escaped.
     *
     * @see #join
     */
    public static String escapeTextWithSeparator(String item, char separator) {
        StringBuilder sb = new StringBuilder(item.length() * 2);
        appendEscapedTextToBuilder(sb, item, separator);
        return sb.toString();
    }

    /**
     * writes chars from item to out, backslash escaping as needed based on separator --
     * but does not append the separator itself
     */
    public static void appendEscapedTextToBuilder(StringBuilder out, String item, char separator) {
        for (int i = 0; i < item.length(); i++) {
            char ch = item.charAt(i);
            if (ch == '\\' || ch == separator) {
                out.append('\\');
            }
            out.append(ch);
        }
    }

    /**
     * Format using {@link MessageFormat} but with the ROOT locale
     */
    public static String formatString(String pattern, Object... args) {
        return new MessageFormat(pattern, Locale.ROOT).format(args);
    }
}