Example usage for java.lang Character isSupplementaryCodePoint

List of usage examples for java.lang Character isSupplementaryCodePoint

Introduction

In this page you can find the example usage for java.lang Character isSupplementaryCodePoint.

Prototype

public static boolean isSupplementaryCodePoint(int codePoint) 

Source Link

Document

Determines whether the specified character (Unicode code point) is in the supplementary character range.

Usage

From source file:Main.java

public static void main(String[] args) {
    int cp1 = 0x0065, cp2 = 0x0abcd;

    boolean b1 = Character.isSupplementaryCodePoint(cp1);
    boolean b2 = Character.isSupplementaryCodePoint(cp2);

    System.out.println(b1);/*from   w w w . ja  va2  s.co  m*/
    System.out.println(b2);
}

From source file:Main.java

public static final String filterUCS4(String str) {
    if (TextUtils.isEmpty(str)) {
        return str;
    }/*from ww w  .  j  av a  2  s.  c o m*/

    if (str.codePointCount(0, str.length()) == str.length()) {
        return str;
    }

    StringBuilder sb = new StringBuilder();

    int index = 0;
    while (index < str.length()) {
        int codePoint = str.codePointAt(index);
        index += Character.charCount(codePoint);
        if (Character.isSupplementaryCodePoint(codePoint)) {
            continue;
        }

        sb.appendCodePoint(codePoint);
    }

    return sb.toString();
}

From source file:SpinnerTest.java

private static int[] toCodePointArray(String str) {
    int[] codePoints = new int[str.codePointCount(0, str.length())];
    for (int i = 0, j = 0; i < str.length(); i++, j++) {
        int cp = str.codePointAt(i);
        if (Character.isSupplementaryCodePoint(cp))
            i++;/*from  w ww.  ja  v  a 2 s .  com*/
        codePoints[j] = cp;
    }
    return codePoints;
}

From source file:FormatTest.java

public void insertString(FilterBypass fb, int offset, String string, AttributeSet attr)
        throws BadLocationException {
    StringBuilder builder = new StringBuilder(string);
    for (int i = builder.length() - 1; i >= 0; i--) {
        int cp = builder.codePointAt(i);
        if (!Character.isDigit(cp) && cp != '-') {
            builder.deleteCharAt(i);/* w w  w. j av  a  2 s.c o m*/
            if (Character.isSupplementaryCodePoint(cp)) {
                i--;
                builder.deleteCharAt(i);
            }
        }
    }
    super.insertString(fb, offset, builder.toString(), attr);
}

From source file:FormatTest.java

public void replace(FilterBypass fb, int offset, int length, String string, AttributeSet attr)
        throws BadLocationException {
    if (string != null) {
        StringBuilder builder = new StringBuilder(string);
        for (int i = builder.length() - 1; i >= 0; i--) {
            int cp = builder.codePointAt(i);
            if (!Character.isDigit(cp) && cp != '-') {
                builder.deleteCharAt(i);
                if (Character.isSupplementaryCodePoint(cp)) {
                    i--;// w ww.  jav  a 2  s.co  m
                    builder.deleteCharAt(i);
                }
            }
        }
        string = builder.toString();
    }
    super.replace(fb, offset, length, string, attr);
}

From source file:gate.creole.tokeniser.SimpleTokeniser.java

/**
 * The method that does the actual tokenisation.
 *///from  w  ww  .j a v a2s .c  o  m
@Override
public void execute() throws ExecutionException {
    interrupted = false;
    AnnotationSet annotationSet;
    //check the input
    if (document == null) {
        throw new ExecutionException("No document to tokenise!");
    }

    if (annotationSetName == null || annotationSetName.equals(""))
        annotationSet = document.getAnnotations();
    else
        annotationSet = document.getAnnotations(annotationSetName);

    fireStatusChanged("Tokenising " + document.getName() + "...");

    String content = document.getContent().toString();
    int length = content.length();
    int currentChar;
    int charsInCurrentCP = 1;

    DFSMState graphPosition = dInitialState;

    //the index of the first character of the token trying to be recognised
    int tokenStart = 0;

    DFSMState lastMatchingState = null;
    DFSMState nextState;
    String tokenString;
    int charIdx = 0;
    int oldCharIdx = 0;
    FeatureMap newTokenFm;

    while (charIdx < length) {
        currentChar = content.codePointAt(charIdx);
        // number of chars we have to advance after processing this code point.
        // 1 in the vast majority of cases, but 2 where the code point is a
        // supplementary character represented as a surrogate pair.
        charsInCurrentCP = Character.isSupplementaryCodePoint(currentChar) ? 2 : 1;

        //      Out.println(
        //      currentChar + typesMnemonics[Character.getType(currentChar)+128]);
        nextState = graphPosition.next(typeIds.get(new Integer(Character.getType(currentChar))).intValue());

        if (null != nextState) {
            graphPosition = nextState;
            if (graphPosition.isFinal()) {
                lastMatchingState = graphPosition;
            }
            charIdx += charsInCurrentCP;
        } else {//we have a match!
            newTokenFm = Factory.newFeatureMap();

            if (null == lastMatchingState) {
                // no rule matches this character, so create a single-char
                // DEFAULT_TOKEN annotation covering it and start again after it
                charIdx = tokenStart + charsInCurrentCP;
                tokenString = content.substring(tokenStart, charIdx);
                newTokenFm.put("type", "UNKNOWN");
                newTokenFm.put(TOKEN_STRING_FEATURE_NAME, tokenString);
                newTokenFm.put(TOKEN_LENGTH_FEATURE_NAME, Integer.toString(tokenString.length()));

                try {
                    annotationSet.add(new Long(tokenStart), new Long(charIdx), "DEFAULT_TOKEN", newTokenFm);
                } catch (InvalidOffsetException ioe) {
                    //This REALLY shouldn't happen!
                    ioe.printStackTrace(Err.getPrintWriter());
                }
                // Out.println("Default token: " + tokenStart +
                //             "->" + tokenStart + " :" + tokenString + ";");
            } else {
                // we've reached the end of a string that the FSM recognised
                tokenString = content.substring(tokenStart, charIdx);
                newTokenFm.put(TOKEN_STRING_FEATURE_NAME, tokenString);
                newTokenFm.put(TOKEN_LENGTH_FEATURE_NAME, Integer.toString(tokenString.length()));

                for (int i = 1; i < lastMatchingState.getTokenDesc().length; i++) {
                    newTokenFm.put(lastMatchingState.getTokenDesc()[i][0],
                            lastMatchingState.getTokenDesc()[i][1]);
                    //Out.println(lastMatchingState.getTokenDesc()[i][0] + "=" +
                    //                       lastMatchingState.getTokenDesc()[i][1]);
                }

                try {
                    annotationSet.add(new Long(tokenStart), new Long(charIdx),
                            lastMatchingState.getTokenDesc()[0][0], newTokenFm);
                } catch (InvalidOffsetException ioe) {
                    //This REALLY shouldn't happen!
                    throw new GateRuntimeException(ioe.toString());
                }

                // Out.println(lastMatchingState.getTokenDesc()[0][0] +
                //              ": " + tokenStart + "->" + lastMatch +
                //              " :" + tokenString + ";");
                //charIdx = lastMatch + 1;
            }

            // reset to initial state and start looking again from here
            lastMatchingState = null;
            graphPosition = dInitialState;
            tokenStart = charIdx;
        }

        if ((charIdx - oldCharIdx > 256)) {
            fireProgressChanged((100 * charIdx) / length);
            oldCharIdx = charIdx;
            if (isInterrupted())
                throw new ExecutionInterruptedException();
        }

    } // while(charIdx < length)

    if (null != lastMatchingState) {
        // we dropped off the end having found a match, annotate it
        tokenString = content.substring(tokenStart, charIdx);
        newTokenFm = Factory.newFeatureMap();
        newTokenFm.put(TOKEN_STRING_FEATURE_NAME, tokenString);
        newTokenFm.put(TOKEN_LENGTH_FEATURE_NAME, Integer.toString(tokenString.length()));

        for (int i = 1; i < lastMatchingState.getTokenDesc().length; i++) {
            newTokenFm.put(lastMatchingState.getTokenDesc()[i][0], lastMatchingState.getTokenDesc()[i][1]);
        }

        try {
            annotationSet.add(new Long(tokenStart), new Long(charIdx), lastMatchingState.getTokenDesc()[0][0],
                    newTokenFm);
        } catch (InvalidOffsetException ioe) {
            //This REALLY shouldn't happen!
            throw new GateRuntimeException(ioe.toString());
        }

    }

    reset();
    fireProcessFinished();
    fireStatusChanged("Tokenisation complete!");
}

From source file:nl.strohalm.cyclos.utils.StringHelper.java

/**
 * Replaces supplementary characters with a ? character
 * @param text/* w w  w  .j  a  v  a  2 s.c o  m*/
 * @return
 */
public static String replaceSupplementaryCharacters(final String text) {
    if (text == null) {
        return null;
    }
    final int len = text.length();
    boolean isSupplementary = false;
    final StringBuilder result = new StringBuilder();
    for (int i = 0; i < len; i++) {
        final int cp = Character.codePointAt(text, i);
        isSupplementary = Character.isSupplementaryCodePoint(cp);
        if (isSupplementary) {
            result.append("?");
            i++;
        } else {
            result.append(text.charAt(i));
        }
    }
    return result.toString();
}

From source file:org.eclipse.rdf4j.rio.turtle.TurtleParser.java

/**
 * Pushes back a single code point by copying it to the front of the buffer.
 * After this method returns, a call to {@link #readCodePoint()} will return
 * the same code point c again.//  w w  w  . ja  va2  s.co m
 *
 * @param codePoint
 *            a single Unicode code point.
 * @throws IOException
 */
protected void unread(int codePoint) throws IOException {
    if (codePoint != -1) {
        if (Character.isSupplementaryCodePoint(codePoint)) {
            final char[] surrogatePair = Character.toChars(codePoint);
            reader.unread(surrogatePair);
        } else {
            reader.unread(codePoint);
        }
    }
}

From source file:org.eclipse.rdf4j.rio.turtle.TurtleParser.java

/**
 * Pushes back the supplied string by copying it to the front of the buffer.
 * After this method returns, successive calls to {@link #readCodePoint()}
 * will return the code points in the supplied string again, starting at the
 * first in the String../*from   w ww  .  j a va 2s  .  co  m*/
 *
 * @param string
 *            the string to un-read.
 * @throws IOException
 */
protected void unread(String string) throws IOException {
    for (int i = string.codePointCount(0, string.length()); i >= 1; i--) {
        final int codePoint = string.codePointBefore(i);
        if (Character.isSupplementaryCodePoint(codePoint)) {
            final char[] surrogatePair = Character.toChars(codePoint);
            reader.unread(surrogatePair);
        } else {
            reader.unread(codePoint);
        }
    }
}

From source file:org.nuclos.common2.StringUtils.java

private static void makeSQLIdentifierFrom(StringBuilder result, String s, int maxLen) {
    if (maxLen < 1)
        throw new IllegalArgumentException();
    final int len = s.length();
    final int max = result.length() + maxLen;
    for (int i = 0; i < len; ++i) {
        final boolean accept;
        int c = s.codePointAt(i);
        if (Character.isSupplementaryCodePoint(c)) {
            ++i;//from  w  ww.  j  av  a  2s .  c  om
        }
        if (c >= 'A' && c <= 'Z') {
            accept = true;
        } else if (c >= 'a' && c <= 'z') {
            accept = true;
        } else if (c >= '0' && c <= '9') {
            accept = true;
        } else {
            switch (c) {
            case '_':
                accept = true;
                break;
            case ' ':
                c = '_';
                accept = true;
                break;
            // german umlaut support
            case '\u00e4':
                c = 'a';
                accept = true;
                break;
            case '\u00f6':
                c = 'o';
                accept = true;
                break;
            case '\u00fc':
                c = 'u';
                accept = true;
                break;
            case '\u00df':
                c = 's';
                accept = true;
                break;
            case '\u00c4':
                c = 'A';
                accept = true;
                break;
            case '\u00d6':
                c = 'O';
                accept = true;
                break;
            case '\u00dc':
                c = 'U';
                accept = true;
                break;
            default:
                accept = false;
            }
        }
        if (accept) {
            result.append((char) c);
        }
    }
    if (result.length() > max)
        result.setLength(max);
}