List of usage examples for java.lang Character isWhitespace
public static boolean isWhitespace(int codePoint)
From source file:henplus.commands.SQLCommand.java
/** * parses 'tablename ((AS)? alias)? [,...]' and returns a map, that maps the names (or aliases) to the tablenames. *//* w w w . j a va 2s . c o m*/ private Map<String, String> tableDeclParser(final String tableDecl) { final StringTokenizer tokenizer = new StringTokenizer(tableDecl, " \t\n\r\f,", true); final Map<String, String> result = new HashMap<String, String>(); String tok; String table = null; String alias = null; int state = 0; while (tokenizer.hasMoreElements()) { tok = tokenizer.nextToken(); if (tok.length() == 1 && Character.isWhitespace(tok.charAt(0))) { continue; } switch (state) { case 0: { // initial/endstate table = tok; alias = tok; state = 1; break; } case 1: { // table seen, waiting for potential alias. if ("AS".equals(tok.toUpperCase())) { state = 2; } else if (",".equals(tok)) { state = 0; // we are done. } else { alias = tok; state = 3; } break; } case 2: { // 'AS' seen, waiting definitly for alias. if (",".equals(tok)) { // error: alias missing for $table. state = 0; } else { alias = tok; state = 3; } break; } case 3: { // waiting for ',' at end of 'table (as)? alias' if (!",".equals(tok)) { // error: ',' expected. } state = 0; break; } } if (state == 0) { result.put(alias, table); } } // store any unfinished state.. if (state == 1 || state == 3) { result.put(alias, table); } else if (state == 2) { // error: alias expected for $table. } return result; }
From source file:egovframework.oe1.utl.fcc.service.EgovStringUtil.java
License:asdf
/** * <p>//from w ww .j a va 2 s . c o m * ?? {@link Character#isWhitespace(char)}? ?? * ? . * </p> * * <pre> * StringUtil.removeWhitespace(null) = null * StringUtil.removeWhitespace("") = "" * StringUtil.removeWhitespace("abc") = "abc" * StringUtil.removeWhitespace(" ab c ") = "abc" * </pre> * @param str * the String to delete whitespace from, may * be null * @return the String without whitespaces, * <code>null</code> if null String input */ public static String removeWhitespace(String str) { if (isEmpty(str)) { return str; } int sz = str.length(); char[] chs = new char[sz]; int count = 0; for (int i = 0; i < sz; i++) { if (!Character.isWhitespace(str.charAt(i))) { chs[count++] = str.charAt(i); } } if (count == sz) { return str; } return new String(chs, 0, count); }
From source file:de.knowwe.diaflux.utils.DOTImporter.java
private int skipWhiteSpace(String section, int start) throws ImportException { int i = 0;/*from w w w.jav a2 s .c o m*/ while (Character.isWhitespace(section.charAt(start + i)) || (section.charAt(start + i) == '=')) { i = i + 1; if ((start + i) >= section.length()) { throw new ImportException("Invalid attributes"); } } return start + i; }
From source file:ths.commons.util.StringUtils.java
/** * <p>Strips any of a set of characters from the start of a String.</p> * * <p>A {@code null} input String returns {@code null}. * An empty string ("") input returns the empty string.</p> * * <p>If the stripChars String is {@code null}, whitespace is * stripped as defined by {@link Character#isWhitespace(char)}.</p> * * <pre>// www . j a v a2s .c om * StringUtils.stripStart(null, *) = null * StringUtils.stripStart("", *) = "" * StringUtils.stripStart("abc", "") = "abc" * StringUtils.stripStart("abc", null) = "abc" * StringUtils.stripStart(" abc", null) = "abc" * StringUtils.stripStart("abc ", null) = "abc " * StringUtils.stripStart(" abc ", null) = "abc " * StringUtils.stripStart("yxabc ", "xyz") = "abc " * </pre> * * @param str the String to remove characters from, may be null * @param stripChars the characters to remove, null treated as whitespace * @return the stripped String, {@code null} if null String input */ public static String stripStart(String str, String stripChars) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } int start = 0; if (stripChars == null) { while ((start != strLen) && Character.isWhitespace(str.charAt(start))) { start++; } } else if (stripChars.length() == 0) { return str; } else { while ((start != strLen) && (stripChars.indexOf(str.charAt(start)) != INDEX_NOT_FOUND)) { start++; } } return str.substring(start); }
From source file:ca.mcgill.cs.swevo.qualyzer.editors.RTFDocumentProvider2.java
private ParserPair getUnicode(InputStream contentStream, Map<String, Integer> state) throws IOException { StringBuilder control = new StringBuilder(); int c = contentStream.read(); if (c != -1) { char ch = (char) c; if (ch == UNICODE_COUNT) { ParserPair number = getNumber(contentStream); control.append(UNICODE_COUNT_FULL); control.append(number.fString); c = number.fChar;/* w w w. java2s .c om*/ // This is a control so a space is a delimiter. if (Character.isWhitespace((char) c)) { c = contentStream.read(); } } else if (!Character.isDigit(ch)) { ParserPair result = handleControl(contentStream, c, String.valueOf(UNICODE), state); c = result.fChar; control = new StringBuilder(result.fString); } else { ParserPair number = getNumber(contentStream, Integer.parseInt(String.valueOf(ch))); int replacement = number.fChar; ParserPair replPair = getUnicodeReplacement(contentStream, replacement, state); c = replPair.fChar; control.append(UNICODE); control.append(number.fString); control.append(replPair.fString); } } return new ParserPair(c, control.toString()); }
From source file:com.openerp.addons.messages.Message.java
public static String capitalizeString(String string) { char[] chars = string.toLowerCase().toCharArray(); boolean found = false; for (int i = 0; i < chars.length; i++) { if (!found && Character.isLetter(chars[i])) { chars[i] = Character.toUpperCase(chars[i]); found = true;/*from w w w .j ava 2s . c om*/ } else if (Character.isWhitespace(chars[i]) || chars[i] == '.' || chars[i] == '\'') { // You can add other chars here found = false; } } return String.valueOf(chars); }
From source file:HtmlEncoder.java
/** * Do "smart" encodging on a string. This means that valid HTML entities and tags, * Helma macros and HTML comments are passed through unescaped, while * other occurrences of '<', '>' and '&' are encoded to HTML entities. * * @param str the string to encode/* ww w.ja v a 2 s . c o m*/ * @param ret the string buffer to encode to * @param paragraphs if true use p tags for paragraphs, otherwise just use br's * @param allowedTags a set containing the names of allowed tags as strings. All other * tags will be escaped */ public final static void encode(String str, StringBuffer ret, boolean paragraphs, Set allowedTags) { if (str == null) { return; } int l = str.length(); // where to insert the <p> tag in case we want to create a paragraph later on int paragraphStart = ret.length(); // what kind of element/text are we leaving and entering? // this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL // depending on this information, we decide whether and how to insert // paragraphs and line breaks. "entering" a tag means we're at the '<' // and exiting means we're at the '>', not that it's a start or close tag. byte entering = TEXT; byte exiting = TEXT; Stack openTags = new Stack(); // are we currently within a < and a > that consitute some kind of tag? // we use tag balancing to know whether we are inside a tag (and should // pass things through unchanged) or outside (and should encode stuff). boolean insideTag = false; // are we inside an HTML tag? boolean insideHtmlTag = false; boolean insideCloseTag = false; byte htmlTagMode = TAG_NAME; // if we are inside a <code> tag, we encode everything to make // documentation work easier boolean insideCodeTag = false; boolean insidePreTag = false; // are we within a Helma <% macro %> tag? We treat macro tags and // comments specially, since we can't rely on tag balancing // to know when we leave a macro tag or comment. boolean insideMacroTag = false; // are we inside an HTML comment? boolean insideComment = false; // the quotation mark we are in within an HTML or Macro tag, if any char htmlQuoteChar = '\u0000'; char macroQuoteChar = '\u0000'; // number of newlines met since the last non-whitespace character int linebreaks = 0; // did we meet a backslash escape? boolean escape = false; boolean triggerBreak = false; for (int i = 0; i < l; i++) { char c = str.charAt(i); // step one: check if this is the beginning of an HTML tag, comment or // Helma macro. if (c == '<') { if (i < (l - 2)) { if (!insideMacroTag && ('%' == str.charAt(i + 1))) { // this is the beginning of a Helma macro tag if (!insideCodeTag) { insideMacroTag = insideTag = true; macroQuoteChar = '\u0000'; } } else if (('!' == str.charAt(i + 1)) && ('-' == str.charAt(i + 2))) { // the beginning of an HTML comment? if (!insideCodeTag) { insideComment = insideTag = ((i < (l - 3)) && ('-' == str.charAt(i + 3))); } } else if (!insideTag) { // check if this is a HTML tag. insideCloseTag = ('/' == str.charAt(i + 1)); int tagStart = insideCloseTag ? (i + 2) : (i + 1); int j = tagStart; while ((j < l) && Character.isLetterOrDigit(str.charAt(j))) j++; if ((j > tagStart) && (j < l)) { String tagName = str.substring(tagStart, j).toLowerCase(); if ("code".equals(tagName) && insideCloseTag && insideCodeTag) { insideCodeTag = false; } if (((allowedTags == null) || allowedTags.contains(tagName)) && allTags.contains(tagName) && !insideCodeTag) { insideHtmlTag = insideTag = true; htmlQuoteChar = '\u0000'; htmlTagMode = TAG_NAME; exiting = entering; entering = TEXT; if (internalTags.contains(tagName)) { entering = INTERNAL; } else if (blockTags.contains(tagName)) { entering = BLOCK; } else if (semiBlockTags.contains(tagName)) { entering = paragraphs ? BLOCK : SEMIBLOCK; } if (entering > 0) { triggerBreak = !insidePreTag; } if (insideCloseTag) { int t = openTags.search(tagName); if (t == -1) { i = j; insideHtmlTag = insideTag = false; continue; } else if (t > 1) { for (int k = 1; k < t; k++) { Object tag = openTags.pop(); if (!emptyTags.contains(tag)) { ret.append("</"); ret.append(tag); ret.append(">"); } } } openTags.pop(); } else { openTags.push(tagName); } if ("code".equals(tagName) && !insideCloseTag) { insideCodeTag = true; } if ("pre".equals(tagName)) { insidePreTag = !insideCloseTag; } } } } } // if (i < l-2) } if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) { if (!insideTag) { exiting = entering; entering = TEXT; if (exiting >= SEMIBLOCK) { paragraphStart = ret.length(); } } if (entering != INTERNAL && exiting != INTERNAL) { int swallowBreaks = 0; if (paragraphs && (entering != BLOCK || exiting != BLOCK) && (exiting < BLOCK) && (linebreaks > 1) && paragraphStart < ret.length()) { ret.insert(paragraphStart, "<p>"); ret.append("</p>"); swallowBreaks = 2; } // treat entering a SEMIBLOCK as entering a TEXT int _entering = entering == SEMIBLOCK ? TEXT : entering; for (int k = linebreaks - 1; k >= 0; k--) { if (k >= swallowBreaks && k >= _entering && k >= exiting) { ret.append("<br />"); } ret.append(newLine); } if (exiting >= SEMIBLOCK || linebreaks > 1) { paragraphStart = ret.length(); } } linebreaks = 0; triggerBreak = false; } switch (c) { case '<': if (insideTag) { ret.append('<'); } else { ret.append("<"); } break; case '&': // check if this is an HTML entity already, // in which case we pass it though unchanged if ((i < (l - 3)) && !insideCodeTag) { // is this a numeric entity? if (str.charAt(i + 1) == '#') { int j = i + 2; while ((j < l) && Character.isDigit(str.charAt(j))) j++; if ((j < l) && (str.charAt(j) == ';')) { ret.append("&"); break; } } else { int j = i + 1; while ((j < l) && Character.isLetterOrDigit(str.charAt(j))) j++; if ((j < l) && (str.charAt(j) == ';')) { ret.append("&"); break; } } } // we didn't reach a break, so encode the ampersand as HTML entity ret.append("&"); break; case '\\': ret.append(c); if (insideTag && !insideComment) { escape = !escape; } break; case '"': case '\'': ret.append(c); if (!insideComment) { // check if the quote is escaped if (insideMacroTag) { if (escape) { escape = false; } else if (macroQuoteChar == c) { macroQuoteChar = '\u0000'; } else if (macroQuoteChar == '\u0000') { macroQuoteChar = c; } } else if (insideHtmlTag) { if (escape) { escape = false; } else if (htmlQuoteChar == c) { htmlQuoteChar = '\u0000'; htmlTagMode = TAG_SPACE; } else if (htmlQuoteChar == '\u0000') { htmlQuoteChar = c; } } } break; case '\n': if (insideTag || insidePreTag) { ret.append('\n'); } else { linebreaks++; } break; case '\r': if (insideTag || insidePreTag) { ret.append('\r'); } break; case '>': // For Helma macro tags and comments, we overrule tag balancing, // i.e. we don't require that '<' and '>' be balanced within // macros and comments. Rather, we check for the matching closing tag. if (insideComment) { ret.append('>'); insideComment = !((str.charAt(i - 2) == '-') && (str.charAt(i - 1) == '-')); } else if (insideMacroTag) { ret.append('>'); insideMacroTag = !((str.charAt(i - 1) == '%') && (macroQuoteChar == '\u0000')); } else if (insideHtmlTag) { ret.append('>'); // only leave HTML tag if quotation marks are balanced // within that tag. insideHtmlTag = htmlQuoteChar != '\u0000'; // Check if this is an empty tag so we don't generate an // additional </close> tag. if (str.charAt(i - 1) == '/') { // this is to avoid misinterpreting tags like // <a href=http://foo/> as empty if (htmlTagMode != TAG_ATT_VAL && htmlTagMode != TAG_ATT_NAME) { openTags.pop(); } } exiting = entering; if (exiting > 0) { triggerBreak = !insidePreTag; } } else { ret.append(">"); } // check if we still are inside any kind of tag insideTag = insideComment || insideMacroTag || insideHtmlTag; insideCloseTag = insideTag; break; default: if (insideHtmlTag && !insideCloseTag) { switch (htmlTagMode) { case TAG_NAME: if (!Character.isLetterOrDigit(c)) { htmlTagMode = TAG_SPACE; } break; case TAG_SPACE: if (Character.isLetterOrDigit(c)) { htmlTagMode = TAG_ATT_NAME; } break; case TAG_ATT_NAME: if (c == '=') { htmlTagMode = TAG_ATT_VAL; } else if (c == ' ') { htmlTagMode = TAG_SPACE; } break; case TAG_ATT_VAL: if (Character.isWhitespace(c) && htmlQuoteChar == '\u0000') { htmlTagMode = TAG_SPACE; } break; } } if (c < 128) { ret.append(c); } else if ((c >= 128) && (c < 256)) { ret.append(transform[c - 128]); } else { ret.append("&#"); ret.append((int) c); ret.append(";"); } escape = false; } } // if tags were opened but not closed, close them. int o = openTags.size(); if (o > 0) { for (int k = 0; k < o; k++) { Object tag = openTags.pop(); if (!emptyTags.contains(tag)) { ret.append("</"); ret.append(tag); ret.append(">"); } } } // add remaining newlines we may have collected int swallowBreaks = 0; if (paragraphs && entering < BLOCK) { ret.insert(paragraphStart, "<p>"); ret.append("</p>"); swallowBreaks = 2; } if (linebreaks > 0) { for (int i = linebreaks - 1; i >= 0; i--) { if (i >= swallowBreaks && i > exiting) { ret.append("<br />"); } ret.append(newLine); } } }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
/** * Length of longest whitespace prefix.//from ww w. j a v a2s . co m */ private static int getLeftSpacesPaddingCount(final String str) { final int len = str.length(); for (int i = 0; i < len; i++) { char c = str.charAt(i); if (!Character.isWhitespace(c)) return i; } return len; }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
/** * Number of whitespace characters that follow a non-whitespace charachter * (if the given string is all whitespace, this returns 0). *///w w w .ja va 2 s . c o m private static int getRightSpacesPaddingCount(final String str) { final int lenIdx = str.length() - 1; for (int i = 0; i < lenIdx; i++) { char c = str.charAt(lenIdx - i); if (!Character.isWhitespace(c)) return i; } return 0; }
From source file:lucee.commons.lang.StringUtil.java
/** * //from www . j av a 2s .co m * @param c character to check * @param checkSpecialWhiteSpace if set to true, lucee checks also uncommon white spaces. * @return */ public static boolean isWhiteSpace(char c, boolean checkSpecialWhiteSpace) { if (Character.isWhitespace(c)) return true; if (checkSpecialWhiteSpace) { for (int i = 0; i < SPECIAL_WHITE_SPACE_CHARS.length; i++) { if (c == SPECIAL_WHITE_SPACE_CHARS[i]) return true; } } return false; }