Example usage for java.lang Character isLetter

List of usage examples for java.lang Character isLetter

Introduction

In this page you can find the example usage for java.lang Character isLetter.

Prototype

public static boolean isLetter(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is a letter.

Usage

From source file:banner.tagging.dictionary.DictionaryTagger.java

public void add(String text, Collection<EntityType> types) {
    // TODO Make configurable
    // if (text.length() == 1)
    // return;// www. ja  va 2s.  co m
    // TODO Add ability to not add items over N (eg 10) tokens long
    List<String> tokens = process(text);
    add(tokens, types);
    if (generate2PartVariations) {
        if (tokens.size() == 1 && tokens.get(0).matches("[A-Za-z]+[0-9]+")) {
            int split = 0;
            String token = tokens.get(0);
            while (Character.isLetter(token.charAt(split)))
                split++;
            add2Part(token.substring(0, split), token.substring(split, token.length()), types);
        }
        if (tokens.size() == 2) {
            add2Part(tokens.get(0), tokens.get(1), types);
        }
        if (tokens.size() == 3 && (tokens.get(1).equals("-") || tokens.get(1).equals("/"))) {
            add2Part(tokens.get(0), tokens.get(2), types);
        }
    }
    // TODO These lines add GENE recall but drop precision
    // if (tokens.size() > 1 && tokens.get(tokens.size() -
    // 1).equals("homolog"))
    // add(tokens.subList(0, tokens.size() - 1), types);
}

From source file:com.google.dart.engine.services.refactoring.NamingConventions.java

private static RefactoringStatus validateIdentifier0(String identifier, String identifierName) {
    // has leading/trailing spaces
    String trimmed = identifier.trim();
    if (!identifier.equals(trimmed)) {
        String message = MessageFormat.format("{0} must not start or end with a blank.", identifierName);
        return RefactoringStatus.createErrorStatus(message);
    }/*from  w ww . j  ava 2 s  . co  m*/
    // empty
    int length = identifier.length();
    if (length == 0) {
        String message = MessageFormat.format("{0} must not be empty.", identifierName);
        return RefactoringStatus.createErrorStatus(message);
    }
    char currentChar = identifier.charAt(0);
    if (!Character.isLetter(currentChar) && currentChar != '_') {
        String message = MessageFormat.format("{0} must not start with ''{1}''.", identifierName, currentChar);
        return RefactoringStatus.createErrorStatus(message);
    }
    for (int i = 1; i < length; i++) {
        currentChar = identifier.charAt(i);
        if (!Character.isLetterOrDigit(currentChar) && currentChar != '_') {
            String message = MessageFormat.format("{0} must not contain ''{1}''.", identifierName, currentChar);
            return RefactoringStatus.createErrorStatus(message);
        }
    }
    return new RefactoringStatus();
}

From source file:net.sf.jabref.bst.BibtexWidth.java

/**
 *
 * @param toMeasure//from w ww  .  j a v  a2  s. c  o  m
 * @param warn
 *            may-be-null
 * @return
 */
public static int width(String toMeasure) {

    /*
     * From Bibtex: We use the natural width for all but special characters,
     * and we complain if the string isn't brace-balanced.
     */

    int i = 0;
    int n = toMeasure.length();
    int braceLevel = 0;
    char[] c = toMeasure.toCharArray();
    int result = 0;

    /*
     * From Bibtex:
     *
     * We use the natural widths of all characters except that some
     * characters have no width: braces, control sequences (except for the
     * usual 13 accented and foreign characters, whose widths are given in
     * the next module), and |white_space| following control sequences (even
     * a null control sequence).
     *
     */
    while (i < n) {
        if (c[i] == '{') {
            braceLevel++;
            if ((braceLevel == 1) && ((i + 1) < n) && (c[i + 1] == '\\')) {
                i++; // skip brace
                while ((i < n) && (braceLevel > 0)) {
                    i++; // skip backslash

                    int afterBackslash = i;
                    while ((i < n) && Character.isLetter(c[i])) {
                        i++;
                    }
                    if ((i < n) && (i == afterBackslash)) {
                        i++; // Skip non-alpha control seq
                    } else {
                        if (BibtexCaseChanger.findSpecialChar(c, afterBackslash).isPresent()) {
                            result += BibtexWidth.getSpecialCharWidth(c, afterBackslash);
                        }
                    }
                    while ((i < n) && Character.isWhitespace(c[i])) {
                        i++;
                    }
                    while ((i < n) && (braceLevel > 0) && (c[i] != '\\')) {
                        if (c[i] == '}') {
                            braceLevel--;
                        } else if (c[i] == '{') {
                            braceLevel++;
                        } else {
                            result += BibtexWidth.getCharWidth(c[i]);
                        }
                        i++;
                    }
                }
                continue;
            }
        } else if (c[i] == '}') {
            if (braceLevel > 0) {
                braceLevel--;
            } else {
                LOGGER.warn("Too many closing braces in string: " + toMeasure);
            }
        }
        result += BibtexWidth.getCharWidth(c[i]);
        i++;
    }
    if (braceLevel > 0) {
        LOGGER.warn("No enough closing braces in string: " + toMeasure);
    }
    return result;
}

From source file:com.anysoftkeyboard.dictionaries.content.ContactsDictionary.java

@Override
protected void addWordFromStorageToMemory(String name, int frequency) {
    //the word in Contacts is actually the full name,
    //so, let's break it to individual words.
    int len = name.length();

    // TODO: Better tokenization for non-Latin writing systems
    String previousNamePart = null;
    for (int i = 0; i < len; i++) {
        if (Character.isLetter(name.charAt(i))) {
            int j;
            for (j = i + 1; j < len; j++) {
                char c = name.charAt(j);

                if (c != '-' && c != '\'' && !Character.isLetter(c)) {
                    break;
                }/*from ww w  .  j  a  va 2s .  c o m*/
            }

            String namePart = name.substring(i, j);
            i = j - 1;

            // Safeguard against adding really long
            // words. Stack
            // may overflow due to recursion
            // Also don't add single letter words,
            // possibly confuses
            // capitalization of i.
            final int namePartLength = namePart.length();
            if (namePartLength < MAX_WORD_LENGTH && namePartLength > 1) {
                //adding to next-namePart dictionary
                if (previousNamePart != null) {
                    Map<CharSequence, NextWord> nextWords;
                    if (mLoadingPhaseNextNames.containsKey(previousNamePart)) {
                        nextWords = mLoadingPhaseNextNames.get(previousNamePart);
                    } else {
                        nextWords = new ArrayMap<>();
                        mLoadingPhaseNextNames.put(previousNamePart, nextWords);
                    }

                    if (nextWords.containsKey(namePart))
                        nextWords.get(namePart).markAsUsed();
                    else
                        nextWords.put(namePart, new NextWord(namePart));
                }

                int oldFrequency = getWordFrequency(namePart);
                //ensuring that frequencies do not go lower
                if (oldFrequency < frequency) {
                    super.addWordFromStorageToMemory(namePart, frequency);
                }
            }
            //remembering this for the next loop
            previousNamePart = namePart;
        }
    }
}

From source file:com.prowidesoftware.swift.model.IBAN.java

/**
 *
 * @param iban/* w ww.  j av  a  2s . c o  m*/
 * @return the resulting IBAN
 */
public String removeNonAlpha(final String iban) {
    final StringBuilder result = new StringBuilder();
    for (int i = 0; i < iban.length(); i++) {
        char c = iban.charAt(i);
        if (Character.isLetter(c) || Character.isDigit(c)) {
            result.append((char) c);
        }
    }
    return result.toString();
}

From source file:eu.crisis_economics.configuration.FromFileConfigurationContext.java

private static String scanForNamesAndReplace(String expression, String name, Number value) {
    int cursor = 0;
    String replacement = value.toString(), result = expression;
    int replacementCursorDelta = replacement.length() - name.length();
    while (true) {
        cursor = result.indexOf(name, cursor);
        if (cursor == -1)
            break;
        int nextCharAfterWordIndex = cursor + name.length();
        if (nextCharAfterWordIndex == result.length()) {
            result = result.substring(0, cursor) + replacement;
            break;
        }/*from  w  w  w  .  j  a  v  a2 s.  c  om*/
        Character nextCharAfterWord = result.charAt(nextCharAfterWordIndex);
        if (Character.isLetter(nextCharAfterWord) || nextCharAfterWord == '_')
            ++cursor;
        else {
            result = result.substring(0, cursor) + replacement + result.substring(nextCharAfterWordIndex);
            cursor += replacementCursorDelta;
        }
    }
    return result;
}

From source file:com.joliciel.talismane.tokeniser.filters.TokenRegexFilterImpl.java

Pattern getPattern() {
    if (pattern == null) {
        // we may need to replace WordLists by the list contents
        String myRegex = this.regex;

        if (LOG.isTraceEnabled()) {
            LOG.trace("Regex: " + myRegex);
        }/* w w w  .  java  2 s .  c o  m*/

        if (this.autoWordBoundaries) {
            Boolean startsWithLetter = null;
            for (int i = 0; i < myRegex.length() && startsWithLetter == null; i++) {
                char c = myRegex.charAt(i);
                if (c == '\\') {
                    i++;
                    c = myRegex.charAt(i);
                    if (c == 'd' || c == 'w') {
                        startsWithLetter = true;
                    } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') {
                        startsWithLetter = false;
                    } else if (c == 'p') {
                        i += 2; // skip the open curly brackets
                        int closeCurlyBrackets = myRegex.indexOf('}', i);
                        int openParentheses = myRegex.indexOf('(', i);
                        int endIndex = closeCurlyBrackets;
                        if (openParentheses > 0 && openParentheses < closeCurlyBrackets)
                            endIndex = openParentheses;
                        if (endIndex > 0) {
                            String specialClass = myRegex.substring(i, endIndex);
                            if (specialClass.equals("WordList")) {
                                startsWithLetter = true;
                            }
                        }
                    }
                    break;
                } else if (c == '[' || c == '(') {
                    // do nothing
                } else if (Character.isLetter(c) || Character.isDigit(c)) {
                    startsWithLetter = true;
                } else {
                    startsWithLetter = false;
                }
            }

            Boolean endsWithLetter = null;
            for (int i = myRegex.length() - 1; i >= 0 && endsWithLetter == null; i--) {
                char c = myRegex.charAt(i);
                char prevC = ' ';
                if (i >= 1)
                    prevC = myRegex.charAt(i - 1);
                if (prevC == '\\') {
                    if (c == 'd' || c == 'w') {
                        endsWithLetter = true;
                    } else if (c == 's' || c == 'W' || c == 'b' || c == 'B') {
                        endsWithLetter = false;
                    } else if (c == 'p') {
                        i += 2; // skip the open curly brackets
                        int closeCurlyBrackets = myRegex.indexOf('}', i);
                        int openParentheses = myRegex.indexOf('(', i);
                        int endIndex = closeCurlyBrackets;
                        if (openParentheses < closeCurlyBrackets)
                            endIndex = openParentheses;
                        if (endIndex > 0) {
                            String specialClass = myRegex.substring(i, endIndex);
                            if (specialClass.equals("WordList") || specialClass.equals("Alpha")
                                    || specialClass.equals("Lower") || specialClass.equals("Upper")
                                    || specialClass.equals("ASCII") || specialClass.equals("Digit")) {
                                startsWithLetter = true;
                            }
                        }
                    }
                    break;
                } else if (c == ']' || c == ')' || c == '+') {
                    // do nothing
                } else if (c == '}') {
                    int startIndex = myRegex.lastIndexOf('{') + 1;
                    int closeCurlyBrackets = myRegex.indexOf('}', startIndex);
                    int openParentheses = myRegex.indexOf('(', startIndex);
                    int endIndex = closeCurlyBrackets;
                    if (openParentheses > 0 && openParentheses < closeCurlyBrackets)
                        endIndex = openParentheses;
                    if (endIndex > 0) {
                        String specialClass = myRegex.substring(startIndex, endIndex);
                        if (specialClass.equals("WordList") || specialClass.equals("Alpha")
                                || specialClass.equals("Lower") || specialClass.equals("Upper")
                                || specialClass.equals("ASCII") || specialClass.equals("Digit")) {
                            endsWithLetter = true;
                        }
                    }
                    break;
                } else if (Character.isLetter(c) || Character.isDigit(c)) {
                    endsWithLetter = true;
                } else {
                    endsWithLetter = false;
                }
            }

            if (startsWithLetter != null && startsWithLetter) {
                myRegex = "\\b" + myRegex;
            }
            if (endsWithLetter != null && endsWithLetter) {
                myRegex = myRegex + "\\b";
            }
            if (LOG.isTraceEnabled()) {
                LOG.trace("After autoWordBoundaries: " + myRegex);
            }
        }

        if (!this.caseSensitive || !this.diacriticSensitive) {
            StringBuilder regexBuilder = new StringBuilder();
            for (int i = 0; i < myRegex.length(); i++) {
                char c = myRegex.charAt(i);
                if (c == '\\') {
                    // escape - skip next
                    regexBuilder.append(c);
                    i++;
                    c = myRegex.charAt(i);
                    regexBuilder.append(c);
                } else if (c == '[') {
                    // character group, don't change it
                    regexBuilder.append(c);
                    while (c != ']' && i < myRegex.length()) {
                        i++;
                        c = myRegex.charAt(i);
                        regexBuilder.append(c);
                    }
                } else if (c == '{') {
                    // command, don't change it
                    regexBuilder.append(c);
                    while (c != '}' && i < myRegex.length()) {
                        i++;
                        c = myRegex.charAt(i);
                        regexBuilder.append(c);
                    }
                } else if (Character.isLetter(c)) {
                    Set<String> chars = new TreeSet<String>();
                    chars.add("" + c);
                    char noAccent = diacriticPattern.matcher(Normalizer.normalize("" + c, Form.NFD))
                            .replaceAll("").charAt(0);

                    if (!this.caseSensitive) {
                        chars.add("" + Character.toUpperCase(c));
                        chars.add("" + Character.toLowerCase(c));
                        chars.add("" + Character.toUpperCase(noAccent));
                    }
                    if (!this.diacriticSensitive) {
                        chars.add("" + noAccent);
                        if (!this.caseSensitive) {
                            chars.add("" + Character.toLowerCase(noAccent));
                        }
                    }
                    if (chars.size() == 1) {
                        regexBuilder.append(c);
                    } else {
                        regexBuilder.append('[');
                        for (String oneChar : chars) {
                            regexBuilder.append(oneChar);
                        }
                        regexBuilder.append(']');
                    }
                } else {
                    regexBuilder.append(c);
                }
            }
            myRegex = regexBuilder.toString();
            if (LOG.isTraceEnabled()) {
                LOG.trace("After caseSensitive: " + myRegex);
            }
        }

        Matcher matcher = wordListPattern.matcher(myRegex);
        StringBuilder regexBuilder = new StringBuilder();

        int lastIndex = 0;
        while (matcher.find()) {
            String[] params = matcher.group(1).split(",");
            int start = matcher.start();
            int end = matcher.end();
            regexBuilder.append(myRegex.substring(lastIndex, start));

            String wordListName = params[0];
            boolean uppercaseOptional = false;
            boolean diacriticsOptional = false;
            boolean lowercaseOptional = false;
            boolean firstParam = true;
            for (String param : params) {
                if (firstParam) {
                    /* word list name */ } else if (param.equals("diacriticsOptional"))
                    diacriticsOptional = true;
                else if (param.equals("uppercaseOptional"))
                    uppercaseOptional = true;
                else if (param.equals("lowercaseOptional"))
                    lowercaseOptional = true;
                else
                    throw new TalismaneException(
                            "Unknown parameter in word list " + matcher.group(1) + ": " + param);
                firstParam = false;
            }

            ExternalWordList wordList = externalResourceFinder.getExternalWordList(wordListName);
            if (wordList == null)
                throw new TalismaneException("Unknown word list: " + wordListName);

            StringBuilder sb = new StringBuilder();

            boolean firstWord = true;
            for (String word : wordList.getWordList()) {
                if (!firstWord)
                    sb.append("|");
                word = Normalizer.normalize(word, Form.NFC);
                if (uppercaseOptional || diacriticsOptional) {
                    String wordNoDiacritics = Normalizer.normalize(word, Form.NFD)
                            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
                    String wordLowercase = word.toLowerCase(Locale.ENGLISH);
                    String wordLowercaseNoDiacritics = Normalizer.normalize(wordLowercase, Form.NFD)
                            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
                    String wordUppercase = wordNoDiacritics.toUpperCase(Locale.ENGLISH);

                    boolean needsGrouping = false;
                    if (uppercaseOptional && !word.equals(wordLowercase))
                        needsGrouping = true;
                    if (diacriticsOptional && !word.equals(wordNoDiacritics))
                        needsGrouping = true;
                    if (lowercaseOptional && !word.equals(wordUppercase))
                        needsGrouping = true;
                    if (needsGrouping) {
                        for (int i = 0; i < word.length(); i++) {
                            char c = word.charAt(i);

                            boolean grouped = false;
                            if (uppercaseOptional && c != wordLowercase.charAt(i))
                                grouped = true;
                            if (diacriticsOptional && c != wordNoDiacritics.charAt(i))
                                grouped = true;
                            if (lowercaseOptional && c != wordUppercase.charAt(i))
                                grouped = true;

                            if (!grouped)
                                sb.append(c);
                            else {
                                sb.append("[");
                                String group = "" + c;
                                if (uppercaseOptional && group.indexOf(wordLowercase.charAt(i)) < 0)
                                    group += (wordLowercase.charAt(i));
                                if (lowercaseOptional && group.indexOf(wordUppercase.charAt(i)) < 0)
                                    group += (wordUppercase.charAt(i));
                                if (diacriticsOptional && group.indexOf(wordNoDiacritics.charAt(i)) < 0)
                                    group += (wordNoDiacritics.charAt(i));
                                if (uppercaseOptional && diacriticsOptional
                                        && group.indexOf(wordLowercaseNoDiacritics.charAt(i)) < 0)
                                    group += (wordLowercaseNoDiacritics.charAt(i));

                                sb.append(group);
                                sb.append("]");
                            } // does this letter need grouping?
                        } // next letter
                    } else {
                        sb.append(word);
                    } // any options activated?
                } else {
                    sb.append(word);
                }
                firstWord = false;
            } // next word in list

            regexBuilder.append(sb.toString());
            lastIndex = end;
        } // next match
        regexBuilder.append(myRegex.substring(lastIndex));
        myRegex = regexBuilder.toString();
        this.pattern = Pattern.compile(myRegex, Pattern.UNICODE_CHARACTER_CLASS);
    }
    return pattern;
}

From source file:org.lightjason.agentspeak.common.CCommon.java

/**
 * checks if an action is usable/*from  ww w. ja  v  a 2s.  c om*/
 *
 * @param p_action action object
 * @return boolean usable flag
 */
private static boolean actionusable(final IAction p_action) {
    if ((p_action.name() == null) || (p_action.name().isEmpty()) || (p_action.name().get(0).trim().isEmpty())) {
        LOGGER.warning(CCommon.languagestring(CCommon.class, "actionnameempty"));
        return false;
    }

    if (!Character.isLetter(p_action.name().get(0).charAt(0))) {
        LOGGER.warning(CCommon.languagestring(CCommon.class, "actionletter", p_action));
        return false;
    }

    if (!Character.isLowerCase(p_action.name().get(0).charAt(0))) {
        LOGGER.warning(CCommon.languagestring(CCommon.class, "actionlowercase", p_action));
        return false;
    }

    if (p_action.minimalArgumentNumber() < 0) {
        LOGGER.warning(CCommon.languagestring(CCommon.class, "actionargumentsnumber", p_action));
        return false;
    }

    return true;
}

From source file:org.bd2kccc.bd2kcccpubmed.Crawler.java

int getCase(String word) {

    if (word.isEmpty())
        return 0;

    boolean uppercase = true;
    boolean lowercase = true;
    boolean titlecase = true;
    boolean mixedcase = true;
    char[] letters = word.toCharArray();

    if (Character.isLetter(letters[0])) {
        if (Character.isLowerCase(letters[0])) {
            //titlecase = false;
            //let's not require every word to be titlecase
        }// ww  w. j av  a2 s  . com
    } else {
        uppercase = false;
        lowercase = false;
        titlecase = false;
    }

    for (int i = 1; i < letters.length; i++) {
        char letter = letters[i];

        if (!Character.isLetter(letter)) {
            uppercase = false;
            lowercase = false;
            titlecase = false;
            break;
        }

        if (Character.isLowerCase(letter))
            uppercase = false;
        else {
            lowercase = false;
            titlecase = false;
        }
    }

    if (uppercase && word.length() > 1)
        return UPPERCASE;
    if (lowercase)
        return LOWERCASE;
    if (titlecase)
        return TITLECASE;
    if (mixedcase)
        return MIXEDCASE;

    return 0;
}

From source file:org.pentaho.di.jdbc.SQLParser.java

String[] parse(boolean extractTable) throws SQLException {

    boolean isSelect = false;
    boolean isModified = false;
    boolean isSlowScan = true;
    try {//from   ww  w . ja va 2s  .c  om
        while (s < len) {
            final char c = in[s];

            switch (c) {
            case '{':
                escape();
                isModified = true;
                break;
            case '[':
            case '"':
            case '\'':
                copyString();
                break;
            case '?':
                copyParam(null, d);
                break;
            case '/':
                if (s + 1 < len && in[s + 1] == '*') {
                    skipMultiComments();
                } else {
                    out[d++] = c;
                    s++;
                }
                break;
            case '-':
                if (s + 1 < len && in[s + 1] == '-') {
                    skipSingleComments();
                } else {
                    out[d++] = c;
                    s++;
                }
                break;
            default:
                if (isSlowScan && Character.isLetter(c)) {
                    if (keyWord == null) {
                        keyWord = copyKeyWord();
                        if ("select".equals(keyWord)) {
                            isSelect = true;
                        }
                        isSlowScan = extractTable && isSelect;
                        break;
                    }
                    if (extractTable && isSelect) {
                        String sqlWord = copyKeyWord();
                        if ("from".equals(sqlWord)) {
                            // Ensure only first 'from' is processed
                            isSlowScan = false;
                            tableName = getTableName();
                        }
                        break;
                    }
                }

                out[d++] = c;
                s++;
                break;
            }
        }

        String result[] = new String[4];

        // return sql and procname
        result[0] = (isModified) ? new String(out, 0, d) : sql;
        result[1] = procName;
        result[2] = (keyWord == null) ? "" : keyWord;
        result[3] = tableName;
        return result;
    } catch (IndexOutOfBoundsException e) {
        // Should only come here if string is invalid in some way.
        throw new SQLException(
                BaseMessages.getString(PKG, "error.parsesql.missing", String.valueOf(terminator)), "22025");
    }

}