Example usage for java.lang Character isAlphabetic

List of usage examples for java.lang Character isAlphabetic

Introduction

In this page you can find the example usage for java.lang Character isAlphabetic.

Prototype

public static boolean isAlphabetic(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is an alphabet.

Usage

From source file:org.structr.web.common.DownloadHelper.java

public static GraphObjectMap getContextObject(final String searchTerm, final String text,
        final int contextLength) {

    final GraphObjectMap contextObject = new GraphObjectMap();
    final Set<String> contextValues = new LinkedHashSet<>();
    final String[] searchParts = searchTerm.split("[\\s,;]+");
    final GenericProperty contextKey = new GenericProperty("context");

    for (final String searchString : searchParts) {

        final String lowerCaseSearchString = searchString.toLowerCase();
        final String lowerCaseText = text.toLowerCase();
        final StringBuilder wordBuffer = new StringBuilder();
        final StringBuilder lineBuffer = new StringBuilder();
        final int textLength = text.length();

        /*//from  www  .  jav  a2 s.co m
            * we take an average word length of 8 characters, multiply
            * it by the desired prefix and suffix word count, add 20%
            * and try to extract up to prefixLength words.
         */
        // modify these parameters to tune prefix and suffix word extraction
        // loop variables
        int newlineCount = 0;
        int wordCount = 0; // wordCount starts at 1 because we include the matching word
        int pos = -1;

        do {

            // find next occurrence
            pos = lowerCaseText.indexOf(lowerCaseSearchString, pos + 1);
            if (pos > 0) {

                lineBuffer.setLength(0);
                wordBuffer.setLength(0);

                wordCount = 0;
                newlineCount = 0;

                // fetch context words before search hit
                for (int i = pos; i >= 0; i--) {

                    final char c = text.charAt(i);

                    if (!Character.isAlphabetic(c) && !Character.isDigit(c)
                            && !FulltextTokenizer.SpecialChars.contains(c)) {

                        wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);

                        // store character in buffer
                        wordBuffer.insert(0, c);

                        if (c == '\n') {

                            // increase newline count
                            newlineCount++;

                        } else {

                            // reset newline count
                            newlineCount = 0;
                        }

                        // paragraph boundary reached
                        if (newlineCount > 1) {
                            break;
                        }

                        // stop if we collected half of the desired word count
                        if (wordCount > contextLength / 2) {
                            break;
                        }

                    } else {

                        // store character in buffer
                        wordBuffer.insert(0, c);

                        // reset newline count
                        newlineCount = 0;
                    }
                }

                wordCount += flushWordBuffer(lineBuffer, wordBuffer, true);

                wordBuffer.setLength(0);

                // fetch context words after search hit
                for (int i = pos + 1; i < textLength; i++) {

                    final char c = text.charAt(i);

                    if (!Character.isAlphabetic(c) && !Character.isDigit(c)
                            && !FulltextTokenizer.SpecialChars.contains(c)) {

                        wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);

                        // store character in buffer
                        wordBuffer.append(c);

                        if (c == '\n') {

                            // increase newline count
                            newlineCount++;

                        } else {

                            // reset newline count
                            newlineCount = 0;
                        }

                        // paragraph boundary reached
                        if (newlineCount > 1) {
                            break;
                        }

                        // stop if we collected enough words
                        if (wordCount > contextLength) {
                            break;
                        }

                    } else {

                        // store character in buffer
                        wordBuffer.append(c);

                        // reset newline count
                        newlineCount = 0;
                    }
                }

                wordCount += flushWordBuffer(lineBuffer, wordBuffer, false);

                // replace single newlines with space
                contextValues.add(lineBuffer.toString().trim());
            }

        } while (pos >= 0);
    }

    contextObject.put(contextKey, contextValues);

    return contextObject;

}

From source file:org.wikipedia.nirvana.archive.ArchiveWithHeadersWithItemsCount.java

public String headerVariableToConstant(String header, IntAndString data) {
    String str = header;/*from  ww  w . ja v a2s.  c  o  m*/
    int pos = 0;
    for (HeaderFormatItem item : patternOfHeader) {
        if (item.period == Period.NONE) {
            if (item.string.equals(template)) {
                int start = pos;
                while (Character.isDigit(str.charAt(pos)))
                    pos++;

                str = header.substring(0, start) + template + str.substring(pos);
                if (data != null) {
                    data.val = Integer.parseInt(header.substring(start, pos));
                    data.str = str;
                }
                return str;
            } else if (item.string.length() > 0) {
                pos += item.string.length();
            } else {
                return str;
            }
        } else {
            if (item.period.isNumeric()) {
                while (Character.isDigit(str.charAt(pos)))
                    pos++;
            } else {
                while (Character.isAlphabetic(str.charAt(pos)))
                    pos++;
            }

        }
    }
    return str;
}

From source file:org.yasmin.core.config.parser.KeyValueToken.java

/**
 * Checks if the given line is a valid keypair line.
 * /*from  w ww .  j  a  va  2  s.  c om*/
 * @param line
 *            The line to be verified
 * @return <code>true</code> if the line is a valid keypair line.
 */
private boolean isValid(String line) {
    if (!Character.isAlphabetic(line.charAt(0))) {
        return false;
    }

    // Line must not end with {
    if (StringUtils.strip(line).endsWith("{")) {
        return false;
    }

    int equalIndex = line.indexOf("=");
    // Starts with a character... it must have an equal sign...
    if (equalIndex == -1) {
        return false;
    }

    // Equal sign found... Let's validate the key...
    String keyName = line.substring(0, equalIndex);
    if (!keyName.matches(KEY_REGEX)) {
        return false;
    }

    return true;
}

From source file:qa.ProcessFrameProcessor.java

public void toClearParserFormat(String clearParserFileName) throws FileNotFoundException, IOException {

    ArrayList<ProcessFrame> processFrames = getProcArr();
    PrintWriter writer = new PrintWriter(clearParserFileName);
    for (ProcessFrame p : processFrames) {
        String rawText = p.getRawText();

        rawText = rawText.replace(".", " ");
        rawText = rawText.replaceAll("\"", "");
        rawText = rawText.trim();/*  www.ja va  2  s .  c  o m*/
        for (int j = rawText.length() - 1;; j--) {
            if (Character.isAlphabetic(rawText.charAt(j))) {
                rawText = rawText.substring(0, j + 1);
                rawText += ".";
                break;
            }
        }
        /*rawText = rawText.replace(".", " ");
         rawText = rawText.replaceAll("\"", "");
         rawText = rawText.trim();
         rawText += ".";**/

        // update tokenized text here
        List<String> tokenized = slem.tokenize(rawText);
        p.setTokenizedText(tokenized.toArray(new String[tokenized.size()]));
        try {
            DependencyTree tree = depParser.parse(rawText);
            String conLLStr = ClearParserUtil.toClearParserFormat(tree, p);
            writer.println(conLLStr);
            writer.println();
        } catch (Exception e) {
            e.printStackTrace();
            //System.out.println(rawText);
        }

    }
    writer.close();
}

From source file:qa.ProcessFrameProcessor.java

public void toConLL2009Format(String conll2009FileName) throws FileNotFoundException, IOException {

    ArrayList<ProcessFrame> processFrames = getProcArr();
    PrintWriter writer = new PrintWriter(conll2009FileName);
    for (ProcessFrame p : processFrames) {
        String rawText = p.getRawText();

        rawText = rawText.replace(".", " ");
        rawText = rawText.replaceAll("\"", "");
        rawText = rawText.trim();/* w w  w . j av  a2 s. c  o m*/
        for (int j = rawText.length() - 1;; j--) {
            if (Character.isAlphabetic(rawText.charAt(j))) {
                rawText = rawText.substring(0, j + 1);
                rawText += ".";
                break;
            }
        }
        /*rawText = rawText.replace(".", " ");
         rawText = rawText.replaceAll("\"", "");
         rawText = rawText.trim();
         rawText += ".";**/

        // update tokenized text here
        List<String> tokenized = slem.tokenize(rawText);
        p.setTokenizedText(tokenized.toArray(new String[tokenized.size()]));
        try {
            DependencyTree tree = depParser.parse(rawText);
            String conLLStr = ClearParserUtil.toCONLL2009Format(tree, p);
            writer.println(conLLStr);
            //writer.println();
        } catch (Exception e) {
            e.printStackTrace();
            //System.out.println(rawText);
        }

    }
    writer.close();
}