Example usage for java.lang Character isAlphabetic

List of usage examples for java.lang Character isAlphabetic

Introduction

In this page you can find the example usage for java.lang Character isAlphabetic.

Prototype

public static boolean isAlphabetic(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is an alphabet.

Usage

From source file:org.apache.brooklyn.util.net.Urls.java

/** as {@link #isUrlWithProtocol(String)} but configurable to be strict (false, false) or allow newline chars (if e.g. in an unescaped argument) */
public static boolean isUrlWithProtocol(String x, boolean allowSpacesAfterCharAfterColon,
        boolean allowMultiline) {
    if (x == null)
        return false;
    for (int i = 0; i < x.length(); i++) {
        char c = x.charAt(i);
        if (c == ':') {
            if (i == 0 || i + 1 >= x.length())
                return false;
            char c2 = x.charAt(i + 1);
            // never allow a whitespace or quote mark right after the ':', that is too similar to json/yaml!
            if (Character.isWhitespace(c2) || c2 == '\'' || c2 == '\"')
                return false;
            if (!allowMultiline) {
                if (x.indexOf('\n') >= 0)
                    return false;
                if (x.indexOf('\r') >= 0)
                    return false;
            }/* w w w .  j  a  v  a2 s  .co  m*/
            if (!allowSpacesAfterCharAfterColon) {
                if (x.indexOf(' ') >= 0)
                    return false;
                if (x.indexOf('\t') >= 0)
                    return false;
            }
            return true;
        }

        // protocol schema as per https://en.wikipedia.org/wiki/Uniform_Resource_Locator
        if (i == 0) {
            if (!Character.isLetter(c))
                return false;
        } else if (!Character.isAlphabetic(c) && !Character.isDigit(c) && c != '+' && c != '.' && c != '-') {
            return false;
        }
    }
    // no colon found
    return false;
}

From source file:org.apache.openmeetings.screenshare.job.OmKeyEvent.java

public OmKeyEvent(Map<String, Object> obj) {
     alt = TRUE.equals(obj.get("alt"));
     ctrl = TRUE.equals(obj.get("ctrl"));
     shift = TRUE.equals(obj.get("shift")) || isUpperCase(ch);
     ch = (char) getInt(obj, "char");
     key = inKey = getInt(obj, "key");
     Integer _key = null;//  ww w  .jav  a  2  s .  c  o  m
     if (CharUtils.isAsciiPrintable(ch)) {
         boolean alpha = Character.isAlphabetic(ch);
         if (alpha) { // can't be combined due to different types
             key = getKeyStroke(toUpperCase(ch), 0).getKeyCode();
         } else {
             key = getKeyStroke(Character.valueOf(ch), 0).getKeyCode();
         }
         if (key == 0) {
             _key = CHAR_MAP.get(ch);
             if (_key == null) {
                 // fallback
                 key = inKey;
             }
         }
         if (!alpha && _key == null) {
             _key = KEY_MAP.get(key);
         }
     } else {
         _key = KEY_MAP.get(key);
     }
     this.key = _key == null ? key : _key;
     log.debug("sequence:: shift {}, ch {}, orig {} -> key {}({}), map {}", shift, ch == 0 ? ' ' : ch, inKey,
             key, Integer.toHexString(key), _key);
 }

From source file:org.apdplat.superword.tools.Definition.java

public static List<String> parseDefinitionForWebster(String html, String cssPath) {
    List<String> list = new ArrayList<>();
    try {/*  w  w w  .j a v a2 s. c o m*/
        for (Element element : Jsoup.parse(html)
                .select("div.tense-box.quick-def-box.simple-def-box.card-box.def-text div.inner-box-wrapper")) {
            StringBuilder definition = new StringBuilder();
            String partOfSpeech = element.select("div.word-attributes span.main-attr em").text().trim();
            for (Element defElement : element.select(
                    "div.definition-block.def-text ul.definition-list.no-count li p.definition-inner-item span")) {
                String def = defElement.text().trim();
                if (def.length() < 3) {
                    continue;
                }
                if (Character.isAlphabetic(def.charAt(0))) {
                    def = ": " + def;
                } else {
                    int index = 0;
                    while (!Character.isAlphabetic(def.charAt(++index))) {
                        //
                    }
                    def = ": " + def.substring(index);
                }
                definition.append(partOfSpeech).append(" ").append(def);
                list.add(definition.toString());
                definition.setLength(0);
            }
        }
    } catch (Exception e) {
        LOGGER.error("?", e);
    }
    return list;
}

From source file:org.apdplat.superword.tools.PdfParser.java

private static void addLineToParagraph(String line, String lastLine, String nextLine, StringBuilder paragraph) {
    if (StringUtils.isBlank(line)) {
        return;//from w  ww .ja  v a2s .  c o  m
    }
    if (nextLine != null) {
        //????
        if (Character.isDigit(line.charAt(0)) && Character.isAlphabetic(line.charAt(line.length() - 1))
        //?
                && (StringUtils.isBlank(nextLine) || Character.isDigit(nextLine.charAt(0))
                        || Character.isUpperCase(nextLine.charAt(0)))) {
            LOGGER.debug("???" + line);
            return;
        }
    }
    paragraph.append(line).append(" ");
}

From source file:org.apdplat.superword.tools.PdfParser.java

/**
 * ???// www  . java2s. c  o  m
 * @param paragraph
 * @return
 */
private static List<String> segSentence(String paragraph) {
    List<String> data = new ArrayList<>();
    //??
    paragraph = prepareSeg(paragraph);
    if (StringUtils.isBlank(paragraph)) {
        return data;
    }
    //???
    for (String s : paragraph.split("[.]")) {
        if (StringUtils.isBlank(s)) {
            continue;
        }
        LOGGER.debug("???" + s);
        s = processSentence(s);
        if (s == null) {
            continue;
        }
        //??
        if (Character.isAlphabetic(s.charAt(s.length() - 1))) {
            s += ".";
        }
        //.
        s = s.replace("??", ".");
        data.add(s);
        LOGGER.debug("??" + s);
        if (LOGGER.isDebugEnabled()) {
            int length = s.split("\\s+").length;
            //??
            SENTENCE_LENGTH_INFO.putIfAbsent(length, new AtomicInteger());
            SENTENCE_LENGTH_INFO.get(length).incrementAndGet();
        }
    }
    return data;
}

From source file:org.languagetool.rules.spelling.hunspell.HunspellRule.java

/**
 * @since public since 4.1//from  w  ww  .  j  a  v a 2  s. c om
 */
@Experimental
public boolean isMisspelled(String word) {
    try {
        if (needsInit) {
            init();
        }
        boolean isAlphabetic = true;
        if (word.length() == 1) { // hunspell dictionaries usually do not contain punctuation
            isAlphabetic = Character.isAlphabetic(word.charAt(0));
        }
        return (isAlphabetic && !"--".equals(word) && hunspellDict.misspelled(word) && !ignoreWord(word))
                || isProhibited(removeTrailingDot(word));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.mongojx.fluent.core.MongoJxParser.java

public static QueryTuple bind(String filter, Object... parameters) {
    ParsingEvent state = null;/*from   www.  j ava2 s  .  c o m*/
    int keyStartIndex = 0;
    int keyStopIndex = 0;
    int valueStartIndex = 0;
    int parameterIndex = 0;
    Document finalDocument = new Document();
    Document document = finalDocument;
    int depth = 0;
    int arrays = 1;
    List<Document> documents = new ArrayList();

    for (int i = 0; i < filter.length(); i++) {
        char character = filter.charAt(i);
        if (isJsonObjectStarting(character)) {
            if (state == KEY_VALUE_DELIMITER) {
                Document nestedDocument = new Document();
                document.put(filter.substring(keyStartIndex, keyStopIndex), nestedDocument);
                document = nestedDocument;
            } else if (state == IN_ARRAY_AND_SEPARATOR) {
                document = new Document();
                finalDocument = document;
            }
            state = START_OBJECT;
            keyStartIndex = i + 1;
            depth++;
        } else if (isDoubleStringCharacter(character)) {
            if (isDoubleStringEnding(state)) {
                state = END_DOUBLE_QUOTE_STRING;
                document.put(filter.substring(keyStartIndex, keyStopIndex),
                        filter.substring(valueStartIndex, i));
            } else {
                valueStartIndex = i + 1;
                state = START_DOUBLE_QUOTE_STRING;
            }
        } else if (isSingleStringCharacter(character)) {
            if (isSingleStringEnding(state)) {
                state = END_SINGLE_QUOTE_STRING;
            } else if (isSingleStringStarting(state)) {
                keyStartIndex = i;
                state = START_SINGLE_QUOTE_STRING;
            }
        } else if (isJsonObjectEnding(character)) {
            depth--;
            if (depth == 0) {
                documents.add(finalDocument);
                state = END_OF_IN_ARRAY_OBJECT;
            }
        } else if (isArrayStarting(character)) {
            arrays--;
            if (arrays == 0) {
                return new QueryTuple(i + 1, documents);
            }
        } else if (isArrayEnding(character)) {
            QueryTuple queryTuple = bind(filter.substring(i + 1), parameters);
            document.put(filter.substring(keyStartIndex, keyStopIndex), queryTuple.getDocuments());
            i = i + queryTuple.getIndex();
        } else if (isKeyValueSeparator(character)) {
            if (state != KEY_END) {
                keyStopIndex = i;
            }
            document.put(filter.substring(keyStartIndex, keyStopIndex), "");
            state = KEY_VALUE_DELIMITER;
        } else if (isParameter(character)) {
            state = PARAMETER;
            document.put(filter.substring(keyStartIndex, keyStopIndex), parameters[parameterIndex]);
            parameterIndex++;
        } else if (isSeparator(character)) {
            keyStartIndex = i + 1;
            if (state == END_OF_IN_ARRAY_OBJECT) {
                state = IN_ARRAY_AND_SEPARATOR;
            } else {
                state = AND_SEPARATOR;
            }
        } else if (Character.isWhitespace(character)) {
            if (state == AND_SEPARATOR) {
                keyStartIndex = i + 1;
            } else if (state == START_OBJECT) {
                keyStartIndex = i + 1;
            } else if (state == START_INLINE_NUMERIC_VALUE) {
                state = END_INLINE_NUMERIC_VALUE;
                String value = filter.substring(valueStartIndex, i);
                document.put(filter.substring(keyStartIndex, keyStopIndex), NumberUtils.createNumber(value));
            } else if (state == KEY_START) {
                keyStopIndex = i;
                state = KEY_END;
            }
        } else if (Character.isDigit(character) && state == KEY_VALUE_DELIMITER) {
            state = START_INLINE_NUMERIC_VALUE;
            valueStartIndex = i;
        } else if (Character.isAlphabetic(character) && (state == AND_SEPARATOR || state == START_OBJECT)) {
            state = KEY_START;
        }

    }

    return new QueryTuple(filter.length(), documents);
}

From source file:org.ovirt.api.metamodel.analyzer.ModelAnalyzer.java

/**
 * Creates a document with the given name and, populates it with the content read from the given input stream, and
 * adds it to the model./*from   w  ww .ja  v  a2s  .  c o m*/
 *
 * @param file the name of file containing the document, including the extension
 * @param in the input stream that will be used to populate the document
 * @throws IOException if something fails while reading the content of the document
 */
private void analyzeDocument(String file, InputStream in) throws IOException {
    // Create the document:
    Document document = new Document();

    // Remove the extension from the file name:
    file = FilenameUtils.getBaseName(file);

    // The name of the document can contain a prefix to explicitly indicate the order of the document relative to
    // the other documents of the model. This prefix should be separated from the rest of the name using a dash, and
    // that dash should be ignored.
    String prefix = null;
    int index = file.indexOf('-');
    if (index > 0) {
        prefix = file.substring(0, index);
        file = file.substring(index + 1);
    }
    Name name = NameParser.parseUsingCase(file);
    if (prefix != null && !prefix.isEmpty()) {
        List<String> words = name.getWords();
        words.add(0, prefix);
        name.setWords(words);
        if (Character.isAlphabetic(prefix.charAt(0))) {
            document.setAppendix(true);
        }
    }
    document.setName(name);

    // Read the source of the document:
    String source = IOUtils.toString(in, StandardCharsets.UTF_8);
    document.setSource(source);

    // Add the document to the model:
    model.addDocument(document);
}

From source file:org.structr.files.text.FulltextTokenizer.java

@Override
 public void write(final char[] cbuf, final int off, final int len) throws IOException {

     if (wordCount < wordCountLimit) {

         final int limit = off + len;
         final int length = Math.min(limit, cbuf.length);

         for (int i = off; i < length; i++) {

             final char c = cbuf[i];

             // remove occurrences of more than 10 identical chars in a row
             if (c == lastCharacter) {

                 if (consecutiveCharCount++ >= 10) {
                     continue;
                 }//from w  w w  .  j  av a 2 s . co m

             } else {

                 consecutiveCharCount = 0;
             }

             if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !SpecialChars.contains(c)) {

                 flush();

                 if (Character.isWhitespace(c)) {

                     rawText.append(c);

                 } else {

                     rawText.append(" ");
                 }

             } else {

                 wordBuffer.append(c);
                 rawText.append(c);
             }

             lastCharacter = c;
         }
     }
 }

From source file:org.structr.text.FulltextIndexerModule.java

@Override
public GraphObjectMap getContextObject(final String searchTerm, final String text, final int contextLength) {

    final GraphObjectMap contextObject = new GraphObjectMap();
    final Set<String> contextValues = new LinkedHashSet<>();
    final String[] searchParts = searchTerm.split("[\\s,;]+");
    final GenericProperty contextKey = new GenericProperty("context");

    for (final String searchString : searchParts) {

        final String lowerCaseSearchString = searchString.toLowerCase();
        final String lowerCaseText = text.toLowerCase();
        final StringBuilder wordBuffer = new StringBuilder();
        final StringBuilder lineBuffer = new StringBuilder();
        final int textLength = text.length();

        /*/*from w  ww .ja  va 2 s  . co m*/
            * we take an average word length of 8 characters, multiply
            * it by the desired prefix and suffix word count, add 20%
            * and try to extract up to prefixLength words.
         */
        // modify these parameters to tune prefix and suffix word extraction
        // loop variables
        int newlineCount = 0;
        int wordCount = 0; // wordCount starts at 1 because we include the matching word
        int pos = -1;

        do {

            // find next occurrence
            pos = lowerCaseText.indexOf(lowerCaseSearchString, pos + 1);
            if (pos > 0) {

                lineBuffer.setLength(0);
                wordBuffer.setLength(0);

                wordCount = 0;
                newlineCount = 0;

                // fetch context words before search hit
                for (int i = pos; i >= 0; i--) {

                    final char c = text.charAt(i);

                    if (!Character.isAlphabetic(c) && !Character.isDigit(c)
                            && !FulltextTokenizer.SpecialChars.contains(c)) {

                        wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);

                        // store character in buffer
                        wordBuffer.insert(0, c);

                        if (c == '\n') {

                            // increase newline count
                            newlineCount++;

                        } else {

                            // reset newline count
                            newlineCount = 0;
                        }

                        // paragraph boundary reached
                        if (newlineCount > 1) {
                            break;
                        }

                        // stop if we collected half of the desired word count
                        if (wordCount > contextLength / 2) {
                            break;
                        }

                    } else {

                        // store character in buffer
                        wordBuffer.insert(0, c);

                        // reset newline count
                        newlineCount = 0;
                    }
                }

                wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);

                wordBuffer.setLength(0);

                // fetch context words after search hit
                for (int i = pos + 1; i < textLength; i++) {

                    final char c = text.charAt(i);

                    if (!Character.isAlphabetic(c) && !Character.isDigit(c)
                            && !FulltextTokenizer.SpecialChars.contains(c)) {

                        wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);

                        // store character in buffer
                        wordBuffer.append(c);

                        if (c == '\n') {

                            // increase newline count
                            newlineCount++;

                        } else {

                            // reset newline count
                            newlineCount = 0;
                        }

                        // paragraph boundary reached
                        if (newlineCount > 1) {
                            break;
                        }

                        // stop if we collected enough words
                        if (wordCount > contextLength) {
                            break;
                        }

                    } else {

                        // store character in buffer
                        wordBuffer.append(c);

                        // reset newline count
                        newlineCount = 0;
                    }
                }

                wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);

                // replace single newlines with space
                contextValues.add(lineBuffer.toString().trim());
            }

        } while (pos >= 0);
    }

    contextObject.put(contextKey, contextValues);

    return contextObject;

}