List of usage examples for java.lang Character isLetter
public static boolean isLetter(int codePoint)
From source file:banner.tagging.dictionary.DictionaryTagger.java
public void add(String text, Collection<EntityType> types) { // TODO Make configurable // if (text.length() == 1) // return;// www. ja va 2s. co m // TODO Add ability to not add items over N (eg 10) tokens long List<String> tokens = process(text); add(tokens, types); if (generate2PartVariations) { if (tokens.size() == 1 && tokens.get(0).matches("[A-Za-z]+[0-9]+")) { int split = 0; String token = tokens.get(0); while (Character.isLetter(token.charAt(split))) split++; add2Part(token.substring(0, split), token.substring(split, token.length()), types); } if (tokens.size() == 2) { add2Part(tokens.get(0), tokens.get(1), types); } if (tokens.size() == 3 && (tokens.get(1).equals("-") || tokens.get(1).equals("/"))) { add2Part(tokens.get(0), tokens.get(2), types); } } // TODO These lines add GENE recall but drop precision // if (tokens.size() > 1 && tokens.get(tokens.size() - // 1).equals("homolog")) // add(tokens.subList(0, tokens.size() - 1), types); }
From source file:com.google.dart.engine.services.refactoring.NamingConventions.java
private static RefactoringStatus validateIdentifier0(String identifier, String identifierName) { // has leading/trailing spaces String trimmed = identifier.trim(); if (!identifier.equals(trimmed)) { String message = MessageFormat.format("{0} must not start or end with a blank.", identifierName); return RefactoringStatus.createErrorStatus(message); }/*from w ww . j ava 2 s . co m*/ // empty int length = identifier.length(); if (length == 0) { String message = MessageFormat.format("{0} must not be empty.", identifierName); return RefactoringStatus.createErrorStatus(message); } char currentChar = identifier.charAt(0); if (!Character.isLetter(currentChar) && currentChar != '_') { String message = MessageFormat.format("{0} must not start with ''{1}''.", identifierName, currentChar); return RefactoringStatus.createErrorStatus(message); } for (int i = 1; i < length; i++) { currentChar = identifier.charAt(i); if (!Character.isLetterOrDigit(currentChar) && currentChar != '_') { String message = MessageFormat.format("{0} must not contain ''{1}''.", identifierName, currentChar); return RefactoringStatus.createErrorStatus(message); } } return new RefactoringStatus(); }
From source file:net.sf.jabref.bst.BibtexWidth.java
/** * * @param toMeasure//from w ww . j a v a2 s. c o m * @param warn * may-be-null * @return */ public static int width(String toMeasure) { /* * From Bibtex: We use the natural width for all but special characters, * and we complain if the string isn't brace-balanced. */ int i = 0; int n = toMeasure.length(); int braceLevel = 0; char[] c = toMeasure.toCharArray(); int result = 0; /* * From Bibtex: * * We use the natural widths of all characters except that some * characters have no width: braces, control sequences (except for the * usual 13 accented and foreign characters, whose widths are given in * the next module), and |white_space| following control sequences (even * a null control sequence). * */ while (i < n) { if (c[i] == '{') { braceLevel++; if ((braceLevel == 1) && ((i + 1) < n) && (c[i + 1] == '\\')) { i++; // skip brace while ((i < n) && (braceLevel > 0)) { i++; // skip backslash int afterBackslash = i; while ((i < n) && Character.isLetter(c[i])) { i++; } if ((i < n) && (i == afterBackslash)) { i++; // Skip non-alpha control seq } else { if (BibtexCaseChanger.findSpecialChar(c, afterBackslash).isPresent()) { result += BibtexWidth.getSpecialCharWidth(c, afterBackslash); } } while ((i < n) && Character.isWhitespace(c[i])) { i++; } while ((i < n) && (braceLevel > 0) && (c[i] != '\\')) { if (c[i] == '}') { braceLevel--; } else if (c[i] == '{') { braceLevel++; } else { result += BibtexWidth.getCharWidth(c[i]); } i++; } } continue; } } else if (c[i] == '}') { if (braceLevel > 0) { braceLevel--; } else { LOGGER.warn("Too many closing braces in string: " + toMeasure); } } result += BibtexWidth.getCharWidth(c[i]); i++; } if (braceLevel > 0) { LOGGER.warn("No enough closing braces in string: " + toMeasure); } return result; }
From source file:com.anysoftkeyboard.dictionaries.content.ContactsDictionary.java
@Override protected void addWordFromStorageToMemory(String name, int frequency) { //the word in Contacts is actually the full name, //so, let's break it to individual words. int len = name.length(); // TODO: Better tokenization for non-Latin writing systems String previousNamePart = null; for (int i = 0; i < len; i++) { if (Character.isLetter(name.charAt(i))) { int j; for (j = i + 1; j < len; j++) { char c = name.charAt(j); if (c != '-' && c != '\'' && !Character.isLetter(c)) { break; }/*from ww w . j a va 2s . c o m*/ } String namePart = name.substring(i, j); i = j - 1; // Safeguard against adding really long // words. Stack // may overflow due to recursion // Also don't add single letter words, // possibly confuses // capitalization of i. final int namePartLength = namePart.length(); if (namePartLength < MAX_WORD_LENGTH && namePartLength > 1) { //adding to next-namePart dictionary if (previousNamePart != null) { Map<CharSequence, NextWord> nextWords; if (mLoadingPhaseNextNames.containsKey(previousNamePart)) { nextWords = mLoadingPhaseNextNames.get(previousNamePart); } else { nextWords = new ArrayMap<>(); mLoadingPhaseNextNames.put(previousNamePart, nextWords); } if (nextWords.containsKey(namePart)) nextWords.get(namePart).markAsUsed(); else nextWords.put(namePart, new NextWord(namePart)); } int oldFrequency = getWordFrequency(namePart); //ensuring that frequencies do not go lower if (oldFrequency < frequency) { super.addWordFromStorageToMemory(namePart, frequency); } } //remembering this for the next loop previousNamePart = namePart; } } }
From source file:com.prowidesoftware.swift.model.IBAN.java
/** * * @param iban/* w ww. j av a 2s . c o m*/ * @return the resulting IBAN */ public String removeNonAlpha(final String iban) { final StringBuilder result = new StringBuilder(); for (int i = 0; i < iban.length(); i++) { char c = iban.charAt(i); if (Character.isLetter(c) || Character.isDigit(c)) { result.append((char) c); } } return result.toString(); }
From source file:eu.crisis_economics.configuration.FromFileConfigurationContext.java
private static String scanForNamesAndReplace(String expression, String name, Number value) { int cursor = 0; String replacement = value.toString(), result = expression; int replacementCursorDelta = replacement.length() - name.length(); while (true) { cursor = result.indexOf(name, cursor); if (cursor == -1) break; int nextCharAfterWordIndex = cursor + name.length(); if (nextCharAfterWordIndex == result.length()) { result = result.substring(0, cursor) + replacement; break; }/*from w w w . j a v a2 s. c om*/ Character nextCharAfterWord = result.charAt(nextCharAfterWordIndex); if (Character.isLetter(nextCharAfterWord) || nextCharAfterWord == '_') ++cursor; else { result = result.substring(0, cursor) + replacement + result.substring(nextCharAfterWordIndex); cursor += replacementCursorDelta; } } return result; }
From source file:com.joliciel.talismane.tokeniser.filters.TokenRegexFilterImpl.java
Pattern getPattern() { if (pattern == null) { // we may need to replace WordLists by the list contents String myRegex = this.regex; if (LOG.isTraceEnabled()) { LOG.trace("Regex: " + myRegex); }/* w w w . java 2 s . c o m*/ if (this.autoWordBoundaries) { Boolean startsWithLetter = null; for (int i = 0; i < myRegex.length() && startsWithLetter == null; i++) { char c = myRegex.charAt(i); if (c == '\\') { i++; c = myRegex.charAt(i); if (c == 'd' || c == 'w') { startsWithLetter = true; } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') { startsWithLetter = false; } else if (c == 'p') { i += 2; // skip the open curly brackets int closeCurlyBrackets = myRegex.indexOf('}', i); int openParentheses = myRegex.indexOf('(', i); int endIndex = closeCurlyBrackets; if (openParentheses > 0 && openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(i, endIndex); if (specialClass.equals("WordList")) { startsWithLetter = true; } } } break; } else if (c == '[' || c == '(') { // do nothing } else if (Character.isLetter(c) || Character.isDigit(c)) { startsWithLetter = true; } else { startsWithLetter = false; } } Boolean endsWithLetter = null; for (int i = myRegex.length() - 1; i >= 0 && endsWithLetter == null; i--) { char c = myRegex.charAt(i); char prevC = ' '; if (i >= 1) prevC = myRegex.charAt(i - 1); if (prevC == '\\') { if (c == 'd' || c == 'w') { endsWithLetter = true; } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') { endsWithLetter = false; } else if (c == 'p') { i += 2; // skip the open curly brackets int closeCurlyBrackets = myRegex.indexOf('}', i); int openParentheses = myRegex.indexOf('(', i); int endIndex = closeCurlyBrackets; if (openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(i, endIndex); if (specialClass.equals("WordList") || specialClass.equals("Alpha") || specialClass.equals("Lower") || specialClass.equals("Upper") || specialClass.equals("ASCII") || specialClass.equals("Digit")) { startsWithLetter = true; } } } break; } else if (c == ']' || c == ')' || c == '+') { // do nothing } else if (c == '}') { int startIndex = myRegex.lastIndexOf('{') + 1; int closeCurlyBrackets = myRegex.indexOf('}', startIndex); int openParentheses = myRegex.indexOf('(', startIndex); int endIndex = closeCurlyBrackets; if (openParentheses > 0 && openParentheses < closeCurlyBrackets) endIndex = openParentheses; if (endIndex > 0) { String specialClass = myRegex.substring(startIndex, endIndex); if (specialClass.equals("WordList") || specialClass.equals("Alpha") || specialClass.equals("Lower") || specialClass.equals("Upper") || specialClass.equals("ASCII") || specialClass.equals("Digit")) { endsWithLetter = true; } } break; } else if (Character.isLetter(c) || Character.isDigit(c)) { endsWithLetter = true; } else { endsWithLetter = false; } } if (startsWithLetter != null && startsWithLetter) { myRegex = "\\b" + myRegex; } if (endsWithLetter != null && endsWithLetter) { myRegex = myRegex + "\\b"; } if (LOG.isTraceEnabled()) { LOG.trace("After autoWordBoundaries: " + myRegex); } } if (!this.caseSensitive || !this.diacriticSensitive) { StringBuilder regexBuilder = new StringBuilder(); for (int i = 0; i < myRegex.length(); i++) { char c = myRegex.charAt(i); if (c == '\\') { // escape - skip next regexBuilder.append(c); i++; c = myRegex.charAt(i); regexBuilder.append(c); } else if (c == '[') { // character group, don't change it regexBuilder.append(c); while (c != ']' && i < myRegex.length()) { i++; c = myRegex.charAt(i); regexBuilder.append(c); } } else if (c == '{') { // command, don't change it regexBuilder.append(c); while (c != '}' && i < myRegex.length()) { i++; c = myRegex.charAt(i); regexBuilder.append(c); } } else if (Character.isLetter(c)) { Set<String> chars = new TreeSet<String>(); chars.add("" + c); char noAccent = diacriticPattern.matcher(Normalizer.normalize("" + c, Form.NFD)) .replaceAll("").charAt(0); if (!this.caseSensitive) { chars.add("" + Character.toUpperCase(c)); chars.add("" + Character.toLowerCase(c)); chars.add("" + Character.toUpperCase(noAccent)); } if (!this.diacriticSensitive) { chars.add("" + noAccent); if (!this.caseSensitive) { chars.add("" + Character.toLowerCase(noAccent)); } } if (chars.size() == 1) { regexBuilder.append(c); } else { regexBuilder.append('['); for (String oneChar : chars) { regexBuilder.append(oneChar); } regexBuilder.append(']'); } } else { regexBuilder.append(c); } } myRegex = regexBuilder.toString(); if (LOG.isTraceEnabled()) { LOG.trace("After caseSensitive: " + myRegex); } } Matcher matcher = wordListPattern.matcher(myRegex); StringBuilder regexBuilder = new StringBuilder(); int lastIndex = 0; while (matcher.find()) { String[] params = matcher.group(1).split(","); int start = matcher.start(); int end = matcher.end(); regexBuilder.append(myRegex.substring(lastIndex, start)); String wordListName = params[0]; boolean uppercaseOptional = false; boolean diacriticsOptional = false; boolean lowercaseOptional = false; boolean firstParam = true; for (String param : params) { if (firstParam) { /* word list name */ } else if (param.equals("diacriticsOptional")) diacriticsOptional = true; else if (param.equals("uppercaseOptional")) uppercaseOptional = true; else if (param.equals("lowercaseOptional")) lowercaseOptional = true; else throw new TalismaneException( "Unknown parameter in word list " + matcher.group(1) + ": " + param); firstParam = false; } ExternalWordList wordList = externalResourceFinder.getExternalWordList(wordListName); if (wordList == null) throw new TalismaneException("Unknown word list: " + wordListName); StringBuilder sb = new StringBuilder(); boolean firstWord = true; for (String word : wordList.getWordList()) { if (!firstWord) sb.append("|"); word = Normalizer.normalize(word, Form.NFC); if (uppercaseOptional || diacriticsOptional) { String wordNoDiacritics = Normalizer.normalize(word, Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); String wordLowercase = word.toLowerCase(Locale.ENGLISH); String wordLowercaseNoDiacritics = Normalizer.normalize(wordLowercase, Form.NFD) .replaceAll("\\p{InCombiningDiacriticalMarks}+", ""); String wordUppercase = wordNoDiacritics.toUpperCase(Locale.ENGLISH); boolean needsGrouping = false; if (uppercaseOptional && !word.equals(wordLowercase)) needsGrouping = true; if (diacriticsOptional && !word.equals(wordNoDiacritics)) needsGrouping = true; if (lowercaseOptional && !word.equals(wordUppercase)) needsGrouping = true; if (needsGrouping) { for (int i = 0; i < word.length(); i++) { char c = word.charAt(i); boolean grouped = false; if (uppercaseOptional && c != wordLowercase.charAt(i)) grouped = true; if (diacriticsOptional && c != wordNoDiacritics.charAt(i)) grouped = true; if (lowercaseOptional && c != wordUppercase.charAt(i)) grouped = true; if (!grouped) sb.append(c); else { sb.append("["); String group = "" + c; if (uppercaseOptional && group.indexOf(wordLowercase.charAt(i)) < 0) group += (wordLowercase.charAt(i)); if (lowercaseOptional && group.indexOf(wordUppercase.charAt(i)) < 0) group += (wordUppercase.charAt(i)); if (diacriticsOptional && group.indexOf(wordNoDiacritics.charAt(i)) < 0) group += (wordNoDiacritics.charAt(i)); if (uppercaseOptional && diacriticsOptional && group.indexOf(wordLowercaseNoDiacritics.charAt(i)) < 0) group += (wordLowercaseNoDiacritics.charAt(i)); sb.append(group); sb.append("]"); } // does this letter need grouping? } // next letter } else { sb.append(word); } // any options activated? } else { sb.append(word); } firstWord = false; } // next word in list regexBuilder.append(sb.toString()); lastIndex = end; } // next match regexBuilder.append(myRegex.substring(lastIndex)); myRegex = regexBuilder.toString(); this.pattern = Pattern.compile(myRegex, Pattern.UNICODE_CHARACTER_CLASS); } return pattern; }
From source file:org.lightjason.agentspeak.common.CCommon.java
/** * checks if an action is usable/*from ww w. ja v a 2s. c om*/ * * @param p_action action object * @return boolean usable flag */ private static boolean actionusable(final IAction p_action) { if ((p_action.name() == null) || (p_action.name().isEmpty()) || (p_action.name().get(0).trim().isEmpty())) { LOGGER.warning(CCommon.languagestring(CCommon.class, "actionnameempty")); return false; } if (!Character.isLetter(p_action.name().get(0).charAt(0))) { LOGGER.warning(CCommon.languagestring(CCommon.class, "actionletter", p_action)); return false; } if (!Character.isLowerCase(p_action.name().get(0).charAt(0))) { LOGGER.warning(CCommon.languagestring(CCommon.class, "actionlowercase", p_action)); return false; } if (p_action.minimalArgumentNumber() < 0) { LOGGER.warning(CCommon.languagestring(CCommon.class, "actionargumentsnumber", p_action)); return false; } return true; }
From source file:org.bd2kccc.bd2kcccpubmed.Crawler.java
int getCase(String word) { if (word.isEmpty()) return 0; boolean uppercase = true; boolean lowercase = true; boolean titlecase = true; boolean mixedcase = true; char[] letters = word.toCharArray(); if (Character.isLetter(letters[0])) { if (Character.isLowerCase(letters[0])) { //titlecase = false; //let's not require every word to be titlecase }// ww w. j av a2 s . com } else { uppercase = false; lowercase = false; titlecase = false; } for (int i = 1; i < letters.length; i++) { char letter = letters[i]; if (!Character.isLetter(letter)) { uppercase = false; lowercase = false; titlecase = false; break; } if (Character.isLowerCase(letter)) uppercase = false; else { lowercase = false; titlecase = false; } } if (uppercase && word.length() > 1) return UPPERCASE; if (lowercase) return LOWERCASE; if (titlecase) return TITLECASE; if (mixedcase) return MIXEDCASE; return 0; }
From source file:org.pentaho.di.jdbc.SQLParser.java
String[] parse(boolean extractTable) throws SQLException { boolean isSelect = false; boolean isModified = false; boolean isSlowScan = true; try {//from ww w . ja va 2s .c om while (s < len) { final char c = in[s]; switch (c) { case '{': escape(); isModified = true; break; case '[': case '"': case '\'': copyString(); break; case '?': copyParam(null, d); break; case '/': if (s + 1 < len && in[s + 1] == '*') { skipMultiComments(); } else { out[d++] = c; s++; } break; case '-': if (s + 1 < len && in[s + 1] == '-') { skipSingleComments(); } else { out[d++] = c; s++; } break; default: if (isSlowScan && Character.isLetter(c)) { if (keyWord == null) { keyWord = copyKeyWord(); if ("select".equals(keyWord)) { isSelect = true; } isSlowScan = extractTable && isSelect; break; } if (extractTable && isSelect) { String sqlWord = copyKeyWord(); if ("from".equals(sqlWord)) { // Ensure only first 'from' is processed isSlowScan = false; tableName = getTableName(); } break; } } out[d++] = c; s++; break; } } String result[] = new String[4]; // return sql and procname result[0] = (isModified) ? new String(out, 0, d) : sql; result[1] = procName; result[2] = (keyWord == null) ? "" : keyWord; result[3] = tableName; return result; } catch (IndexOutOfBoundsException e) { // Should only come here if string is invalid in some way. throw new SQLException( BaseMessages.getString(PKG, "error.parsesql.missing", String.valueOf(terminator)), "22025"); } }