Example usage for java.lang Character isWhitespace

List of usage examples for java.lang Character isWhitespace

Introduction

In this page you can find the example usage for java.lang Character isWhitespace.

Prototype

public static boolean isWhitespace(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is white space according to Java.

Usage

From source file:henplus.commands.SQLCommand.java

/**
 * parses 'tablename ((AS)? alias)? [,...]' and returns a map, that maps the names (or aliases) to the tablenames.
 *//*  w w w  .  j a va  2s  .  c o m*/
private Map<String, String> tableDeclParser(final String tableDecl) {
    final StringTokenizer tokenizer = new StringTokenizer(tableDecl, " \t\n\r\f,", true);
    final Map<String, String> result = new HashMap<String, String>();
    String tok;
    String table = null;
    String alias = null;
    int state = 0;
    while (tokenizer.hasMoreElements()) {
        tok = tokenizer.nextToken();
        if (tok.length() == 1 && Character.isWhitespace(tok.charAt(0))) {
            continue;
        }
        switch (state) {
        case 0: { // initial/endstate
            table = tok;
            alias = tok;
            state = 1;
            break;
        }
        case 1: { // table seen, waiting for potential alias.
            if ("AS".equals(tok.toUpperCase())) {
                state = 2;
            } else if (",".equals(tok)) {
                state = 0; // we are done.
            } else {
                alias = tok;
                state = 3;
            }
            break;
        }
        case 2: { // 'AS' seen, waiting definitly for alias.
            if (",".equals(tok)) {
                // error: alias missing for $table.
                state = 0;
            } else {
                alias = tok;
                state = 3;
            }
            break;
        }
        case 3: { // waiting for ',' at end of 'table (as)? alias'
            if (!",".equals(tok)) {
                // error: ',' expected.
            }
            state = 0;
            break;
        }
        }

        if (state == 0) {
            result.put(alias, table);
        }
    }
    // store any unfinished state..
    if (state == 1 || state == 3) {
        result.put(alias, table);
    } else if (state == 2) {
        // error: alias expected for $table.
    }
    return result;
}

From source file:egovframework.oe1.utl.fcc.service.EgovStringUtil.java

License:asdf

/**
 * <p>//from w  ww  .j a  va  2  s  . c  o  m
 * ?? {@link Character#isWhitespace(char)}? ??
 *  ? .
 * </p>
 * 
 * <pre>
 * StringUtil.removeWhitespace(null)         = null
 * StringUtil.removeWhitespace(&quot;&quot;)           = &quot;&quot;
 * StringUtil.removeWhitespace(&quot;abc&quot;)        = &quot;abc&quot;
 * StringUtil.removeWhitespace(&quot;   ab  c  &quot;) = &quot;abc&quot;
 * </pre>
 * @param str
 *        the String to delete whitespace from, may
 *        be null
 * @return the String without whitespaces,
 *         <code>null</code> if null String input
 */
public static String removeWhitespace(String str) {
    if (isEmpty(str)) {
        return str;
    }
    int sz = str.length();
    char[] chs = new char[sz];
    int count = 0;
    for (int i = 0; i < sz; i++) {
        if (!Character.isWhitespace(str.charAt(i))) {
            chs[count++] = str.charAt(i);
        }
    }
    if (count == sz) {
        return str;
    }
    return new String(chs, 0, count);
}

From source file:de.knowwe.diaflux.utils.DOTImporter.java

private int skipWhiteSpace(String section, int start) throws ImportException {
    int i = 0;/*from   w  w  w.jav a2  s .c o m*/
    while (Character.isWhitespace(section.charAt(start + i)) || (section.charAt(start + i) == '=')) {
        i = i + 1;
        if ((start + i) >= section.length()) {
            throw new ImportException("Invalid attributes");
        }
    }

    return start + i;
}

From source file:ths.commons.util.StringUtils.java

/**
 * <p>Strips any of a set of characters from the start of a String.</p>
 *
 * <p>A {@code null} input String returns {@code null}.
 * An empty string ("") input returns the empty string.</p>
 *
 * <p>If the stripChars String is {@code null}, whitespace is
 * stripped as defined by {@link Character#isWhitespace(char)}.</p>
 *
 * <pre>//  www . j a  v  a2s .c om
 * StringUtils.stripStart(null, *)          = null
 * StringUtils.stripStart("", *)            = ""
 * StringUtils.stripStart("abc", "")        = "abc"
 * StringUtils.stripStart("abc", null)      = "abc"
 * StringUtils.stripStart("  abc", null)    = "abc"
 * StringUtils.stripStart("abc  ", null)    = "abc  "
 * StringUtils.stripStart(" abc ", null)    = "abc "
 * StringUtils.stripStart("yxabc  ", "xyz") = "abc  "
 * </pre>
 *
 * @param str  the String to remove characters from, may be null
 * @param stripChars  the characters to remove, null treated as whitespace
 * @return the stripped String, {@code null} if null String input
 */
public static String stripStart(String str, String stripChars) {
    int strLen;
    if (str == null || (strLen = str.length()) == 0) {
        return str;
    }
    int start = 0;
    if (stripChars == null) {
        while ((start != strLen) && Character.isWhitespace(str.charAt(start))) {
            start++;
        }
    } else if (stripChars.length() == 0) {
        return str;
    } else {
        while ((start != strLen) && (stripChars.indexOf(str.charAt(start)) != INDEX_NOT_FOUND)) {
            start++;
        }
    }
    return str.substring(start);
}

From source file:ca.mcgill.cs.swevo.qualyzer.editors.RTFDocumentProvider2.java

private ParserPair getUnicode(InputStream contentStream, Map<String, Integer> state) throws IOException {

    StringBuilder control = new StringBuilder();
    int c = contentStream.read();
    if (c != -1) {
        char ch = (char) c;
        if (ch == UNICODE_COUNT) {
            ParserPair number = getNumber(contentStream);
            control.append(UNICODE_COUNT_FULL);
            control.append(number.fString);
            c = number.fChar;/*  w  w  w.  java2s .c  om*/
            // This is a control so a space is a delimiter.
            if (Character.isWhitespace((char) c)) {
                c = contentStream.read();
            }
        } else if (!Character.isDigit(ch)) {
            ParserPair result = handleControl(contentStream, c, String.valueOf(UNICODE), state);
            c = result.fChar;
            control = new StringBuilder(result.fString);
        } else {
            ParserPair number = getNumber(contentStream, Integer.parseInt(String.valueOf(ch)));
            int replacement = number.fChar;
            ParserPair replPair = getUnicodeReplacement(contentStream, replacement, state);
            c = replPair.fChar;

            control.append(UNICODE);
            control.append(number.fString);
            control.append(replPair.fString);
        }
    }

    return new ParserPair(c, control.toString());
}

From source file:com.openerp.addons.messages.Message.java

public static String capitalizeString(String string) {
    char[] chars = string.toLowerCase().toCharArray();
    boolean found = false;
    for (int i = 0; i < chars.length; i++) {
        if (!found && Character.isLetter(chars[i])) {
            chars[i] = Character.toUpperCase(chars[i]);
            found = true;/*from w  w  w  .j ava 2s  . c  om*/
        } else if (Character.isWhitespace(chars[i]) || chars[i] == '.' || chars[i] == '\'') { // You can add other chars here
            found = false;
        }
    }
    return String.valueOf(chars);
}

From source file:HtmlEncoder.java

/**
 *  Do "smart" encodging on a string. This means that valid HTML entities and tags,
 *  Helma macros and HTML comments are passed through unescaped, while
 *  other occurrences of '<', '>' and '&' are encoded to HTML entities.
 *
 *  @param str the string to encode/*  ww w.ja v  a 2 s  . c  o m*/
 *  @param ret the string buffer to encode to
 *  @param paragraphs if true use p tags for paragraphs, otherwise just use br's
 *  @param allowedTags a set containing the names of allowed tags as strings. All other
 *                     tags will be escaped
 */
public final static void encode(String str, StringBuffer ret, boolean paragraphs, Set allowedTags) {
    if (str == null) {
        return;
    }

    int l = str.length();

    // where to insert the <p> tag in case we want to create a paragraph later on
    int paragraphStart = ret.length();

    // what kind of element/text are we leaving and entering?
    // this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
    // depending on this information, we decide whether and how to insert
    // paragraphs and line breaks. "entering" a tag means we're at the '<'
    // and exiting means we're at the '>', not that it's a start or close tag.
    byte entering = TEXT;
    byte exiting = TEXT;

    Stack openTags = new Stack();

    // are we currently within a < and a > that consitute some kind of tag?
    // we use tag balancing to know whether we are inside a tag (and should
    // pass things through unchanged) or outside (and should encode stuff).
    boolean insideTag = false;

    // are we inside an HTML tag?
    boolean insideHtmlTag = false;
    boolean insideCloseTag = false;
    byte htmlTagMode = TAG_NAME;

    // if we are inside a <code> tag, we encode everything to make
    // documentation work easier
    boolean insideCodeTag = false;
    boolean insidePreTag = false;

    // are we within a Helma <% macro %> tag? We treat macro tags and
    // comments specially, since we can't rely on tag balancing
    // to know when we leave a macro tag or comment.
    boolean insideMacroTag = false;

    // are we inside an HTML comment?
    boolean insideComment = false;

    // the quotation mark we are in within an HTML or Macro tag, if any
    char htmlQuoteChar = '\u0000';
    char macroQuoteChar = '\u0000';

    // number of newlines met since the last non-whitespace character
    int linebreaks = 0;

    // did we meet a backslash escape?
    boolean escape = false;

    boolean triggerBreak = false;

    for (int i = 0; i < l; i++) {
        char c = str.charAt(i);

        // step one: check if this is the beginning of an HTML tag, comment or
        // Helma macro.
        if (c == '<') {
            if (i < (l - 2)) {
                if (!insideMacroTag && ('%' == str.charAt(i + 1))) {
                    // this is the beginning of a Helma macro tag
                    if (!insideCodeTag) {
                        insideMacroTag = insideTag = true;
                        macroQuoteChar = '\u0000';
                    }
                } else if (('!' == str.charAt(i + 1)) && ('-' == str.charAt(i + 2))) {
                    // the beginning of an HTML comment?
                    if (!insideCodeTag) {
                        insideComment = insideTag = ((i < (l - 3)) && ('-' == str.charAt(i + 3)));
                    }
                } else if (!insideTag) {
                    // check if this is a HTML tag.
                    insideCloseTag = ('/' == str.charAt(i + 1));
                    int tagStart = insideCloseTag ? (i + 2) : (i + 1);
                    int j = tagStart;

                    while ((j < l) && Character.isLetterOrDigit(str.charAt(j)))
                        j++;

                    if ((j > tagStart) && (j < l)) {
                        String tagName = str.substring(tagStart, j).toLowerCase();

                        if ("code".equals(tagName) && insideCloseTag && insideCodeTag) {
                            insideCodeTag = false;
                        }

                        if (((allowedTags == null) || allowedTags.contains(tagName))
                                && allTags.contains(tagName) && !insideCodeTag) {
                            insideHtmlTag = insideTag = true;
                            htmlQuoteChar = '\u0000';
                            htmlTagMode = TAG_NAME;

                            exiting = entering;
                            entering = TEXT;

                            if (internalTags.contains(tagName)) {
                                entering = INTERNAL;
                            } else if (blockTags.contains(tagName)) {
                                entering = BLOCK;
                            } else if (semiBlockTags.contains(tagName)) {
                                entering = paragraphs ? BLOCK : SEMIBLOCK;
                            }

                            if (entering > 0) {
                                triggerBreak = !insidePreTag;
                            }

                            if (insideCloseTag) {
                                int t = openTags.search(tagName);

                                if (t == -1) {
                                    i = j;
                                    insideHtmlTag = insideTag = false;

                                    continue;
                                } else if (t > 1) {
                                    for (int k = 1; k < t; k++) {
                                        Object tag = openTags.pop();
                                        if (!emptyTags.contains(tag)) {
                                            ret.append("</");
                                            ret.append(tag);
                                            ret.append(">");
                                        }
                                    }
                                }

                                openTags.pop();
                            } else {
                                openTags.push(tagName);
                            }

                            if ("code".equals(tagName) && !insideCloseTag) {
                                insideCodeTag = true;
                            }

                            if ("pre".equals(tagName)) {
                                insidePreTag = !insideCloseTag;
                            }
                        }
                    }
                }
            } // if (i < l-2)
        }

        if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {

            if (!insideTag) {
                exiting = entering;
                entering = TEXT;
                if (exiting >= SEMIBLOCK) {
                    paragraphStart = ret.length();
                }
            }

            if (entering != INTERNAL && exiting != INTERNAL) {
                int swallowBreaks = 0;
                if (paragraphs && (entering != BLOCK || exiting != BLOCK) && (exiting < BLOCK)
                        && (linebreaks > 1) && paragraphStart < ret.length()) {
                    ret.insert(paragraphStart, "<p>");
                    ret.append("</p>");
                    swallowBreaks = 2;
                }

                // treat entering a SEMIBLOCK as entering a TEXT 
                int _entering = entering == SEMIBLOCK ? TEXT : entering;
                for (int k = linebreaks - 1; k >= 0; k--) {
                    if (k >= swallowBreaks && k >= _entering && k >= exiting) {
                        ret.append("<br />");
                    }
                    ret.append(newLine);
                }
                if (exiting >= SEMIBLOCK || linebreaks > 1) {
                    paragraphStart = ret.length();
                }

            }

            linebreaks = 0;
            triggerBreak = false;
        }

        switch (c) {
        case '<':

            if (insideTag) {
                ret.append('<');
            } else {
                ret.append("&lt;");
            }

            break;

        case '&':

            // check if this is an HTML entity already,
            // in which case we pass it though unchanged
            if ((i < (l - 3)) && !insideCodeTag) {
                // is this a numeric entity?
                if (str.charAt(i + 1) == '#') {
                    int j = i + 2;

                    while ((j < l) && Character.isDigit(str.charAt(j)))
                        j++;

                    if ((j < l) && (str.charAt(j) == ';')) {
                        ret.append("&");

                        break;
                    }
                } else {
                    int j = i + 1;

                    while ((j < l) && Character.isLetterOrDigit(str.charAt(j)))
                        j++;

                    if ((j < l) && (str.charAt(j) == ';')) {
                        ret.append("&");

                        break;
                    }
                }
            }

            // we didn't reach a break, so encode the ampersand as HTML entity
            ret.append("&amp;");

            break;

        case '\\':
            ret.append(c);

            if (insideTag && !insideComment) {
                escape = !escape;
            }

            break;

        case '"':
        case '\'':
            ret.append(c);

            if (!insideComment) {
                // check if the quote is escaped
                if (insideMacroTag) {
                    if (escape) {
                        escape = false;
                    } else if (macroQuoteChar == c) {
                        macroQuoteChar = '\u0000';
                    } else if (macroQuoteChar == '\u0000') {
                        macroQuoteChar = c;
                    }
                } else if (insideHtmlTag) {
                    if (escape) {
                        escape = false;
                    } else if (htmlQuoteChar == c) {
                        htmlQuoteChar = '\u0000';
                        htmlTagMode = TAG_SPACE;
                    } else if (htmlQuoteChar == '\u0000') {
                        htmlQuoteChar = c;
                    }
                }
            }

            break;

        case '\n':
            if (insideTag || insidePreTag) {
                ret.append('\n');
            } else {
                linebreaks++;
            }

            break;
        case '\r':
            if (insideTag || insidePreTag) {
                ret.append('\r');
            }
            break;

        case '>':

            // For Helma macro tags and comments, we overrule tag balancing,
            // i.e. we don't require that '<' and '>' be balanced within
            // macros and comments. Rather, we check for the matching closing tag.
            if (insideComment) {
                ret.append('>');
                insideComment = !((str.charAt(i - 2) == '-') && (str.charAt(i - 1) == '-'));
            } else if (insideMacroTag) {
                ret.append('>');
                insideMacroTag = !((str.charAt(i - 1) == '%') && (macroQuoteChar == '\u0000'));
            } else if (insideHtmlTag) {
                ret.append('>');

                // only leave HTML tag if quotation marks are balanced
                // within that tag.
                insideHtmlTag = htmlQuoteChar != '\u0000';

                // Check if this is an empty tag so we don't generate an
                // additional </close> tag.
                if (str.charAt(i - 1) == '/') {
                    // this is to avoid misinterpreting tags like
                    // <a href=http://foo/> as empty
                    if (htmlTagMode != TAG_ATT_VAL && htmlTagMode != TAG_ATT_NAME) {
                        openTags.pop();
                    }
                }

                exiting = entering;
                if (exiting > 0) {
                    triggerBreak = !insidePreTag;
                }

            } else {
                ret.append("&gt;");
            }

            // check if we still are inside any kind of tag
            insideTag = insideComment || insideMacroTag || insideHtmlTag;
            insideCloseTag = insideTag;

            break;

        default:

            if (insideHtmlTag && !insideCloseTag) {
                switch (htmlTagMode) {
                case TAG_NAME:
                    if (!Character.isLetterOrDigit(c)) {
                        htmlTagMode = TAG_SPACE;
                    }
                    break;
                case TAG_SPACE:
                    if (Character.isLetterOrDigit(c)) {
                        htmlTagMode = TAG_ATT_NAME;
                    }
                    break;
                case TAG_ATT_NAME:
                    if (c == '=') {
                        htmlTagMode = TAG_ATT_VAL;
                    } else if (c == ' ') {
                        htmlTagMode = TAG_SPACE;
                    }
                    break;
                case TAG_ATT_VAL:
                    if (Character.isWhitespace(c) && htmlQuoteChar == '\u0000') {
                        htmlTagMode = TAG_SPACE;
                    }
                    break;
                }
            }
            if (c < 128) {
                ret.append(c);
            } else if ((c >= 128) && (c < 256)) {
                ret.append(transform[c - 128]);
            } else {
                ret.append("&#");
                ret.append((int) c);
                ret.append(";");
            }

            escape = false;
        }
    }

    // if tags were opened but not closed, close them.
    int o = openTags.size();

    if (o > 0) {
        for (int k = 0; k < o; k++) {
            Object tag = openTags.pop();
            if (!emptyTags.contains(tag)) {
                ret.append("</");
                ret.append(tag);
                ret.append(">");
            }
        }
    }

    // add remaining newlines we may have collected
    int swallowBreaks = 0;
    if (paragraphs && entering < BLOCK) {
        ret.insert(paragraphStart, "<p>");
        ret.append("</p>");
        swallowBreaks = 2;
    }

    if (linebreaks > 0) {
        for (int i = linebreaks - 1; i >= 0; i--) {
            if (i >= swallowBreaks && i > exiting) {
                ret.append("<br />");
            }
            ret.append(newLine);
        }
    }
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

/**
 * Length of longest whitespace prefix.//from  ww  w. j a  v a2s  .  co m
 */
private static int getLeftSpacesPaddingCount(final String str) {
    final int len = str.length();
    for (int i = 0; i < len; i++) {
        char c = str.charAt(i);
        if (!Character.isWhitespace(c))
            return i;
    }
    return len;
}

From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java

/**
 * Number of whitespace characters that follow a non-whitespace charachter
 * (if the given string is all whitespace, this returns 0).
 *///w  w w  .ja  va 2  s .  c o  m
private static int getRightSpacesPaddingCount(final String str) {
    final int lenIdx = str.length() - 1;
    for (int i = 0; i < lenIdx; i++) {
        char c = str.charAt(lenIdx - i);
        if (!Character.isWhitespace(c))
            return i;
    }
    return 0;
}

From source file:lucee.commons.lang.StringUtil.java

/**
 * //from  www  . j  av a  2s .co m
 * @param c character to check
 * @param checkSpecialWhiteSpace if set to true, lucee checks also uncommon white spaces.
 * @return
 */
public static boolean isWhiteSpace(char c, boolean checkSpecialWhiteSpace) {
    if (Character.isWhitespace(c))
        return true;
    if (checkSpecialWhiteSpace) {
        for (int i = 0; i < SPECIAL_WHITE_SPACE_CHARS.length; i++) {
            if (c == SPECIAL_WHITE_SPACE_CHARS[i])
                return true;
        }
    }
    return false;
}