Example usage for java.lang Character charCount

Introduction

In this page you can find the example usage for java.lang Character charCount.

Prototype

public static int charCount(int codePoint)

Source Link

Document

Determines the number of char values needed to represent the specified character (Unicode code point).

Usage

From source file:bfile.util.StringUtils.java

/**
 * <p>Swaps the case of a String changing upper and title case to
 * lower case, and lower case to upper case.</p>
 *
 * <ul>// www. j a v a2 s  . c  om
 *  <li>Upper case character converts to Lower case</li>
 *  <li>Title case character converts to Lower case</li>
 *  <li>Lower case character converts to Upper case</li>
 * </ul>
 *
 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#swapCase(String)}.
 * A {@code null} input String returns {@code null}.</p>
 *
 * <pre>
 * StringUtils.swapCase(null)                 = null
 * StringUtils.swapCase("")                   = ""
 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
 * </pre>
 *
 * <p>NOTE: This method changed in Lang version 2.0.
 * It no longer performs a word based algorithm.
 * If you only use ASCII, you will notice no change.
 * That functionality is available in org.apache.commons.lang3.text.WordUtils.</p>
 *
 * @param str  the String to swap case, may be null
 * @return the changed String, {@code null} if null String input
 */
public static String swapCase(final String str) {
    if (StringUtils.isEmpty(str)) {
        return str;
    }

    final int strLen = str.length();
    int newCodePoints[] = new int[strLen]; // cannot be longer than the char array
    int outOffset = 0;
    for (int i = 0; i < strLen;) {
        final int oldCodepoint = str.codePointAt(i);
        final int newCodePoint;
        if (Character.isUpperCase(oldCodepoint)) {
            newCodePoint = Character.toLowerCase(oldCodepoint);
        } else if (Character.isTitleCase(oldCodepoint)) {
            newCodePoint = Character.toLowerCase(oldCodepoint);
        } else if (Character.isLowerCase(oldCodepoint)) {
            newCodePoint = Character.toUpperCase(oldCodepoint);
        } else {
            newCodePoint = oldCodepoint;
        }
        newCodePoints[outOffset++] = newCodePoint;
        i += Character.charCount(newCodePoint);
    }
    return new String(newCodePoints, 0, outOffset);
}

From source file:StreamFlusher.java

public Object visit(ASTtestTokensTextFile_statement node, Object data) {
    // Total: 11 regexp arguments, syntactically constrained
    // //from  w w  w  .  ja v  a 2s  .  com
    // 0.  the Fst to test

    node.jjtGetChild(0).jjtAccept(this, data);
    Fst testFst = (Fst) (stack.pop());

    // 1.  path of the input file

    node.jjtGetChild(1).jjtAccept(this, data);
    Fst tempFst = (Fst) (stack.pop());

    String inputFilePath = lib.GetSingleString(tempFst,
            "Second arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputFilePath.length() == 0) {
        throw new KleeneArgException(
                "Second arg to testTokensTextFile must denote a language of exactly one non-empty string");
    }

    // 2.  encoding of the input file

    node.jjtGetChild(2).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputFileEncoding = lib.GetSingleString(tempFst,
            "Third arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputFileEncoding.length() == 0) {
        throw new KleeneArgException("Third arg to testTokensTextFile must denote one non-empty string");
    }

    // 3.  path of the output file

    node.jjtGetChild(3).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFilePath = lib.GetSingleString(tempFst,
            "Fourth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputFilePath.length() == 0) {
        throw new KleeneArgException("Fourth arg to testTokensTextFile must denote one non-empty string");
    }

    // 4.  encoding of the output file

    node.jjtGetChild(4).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFileEncoding = lib.GetSingleString(tempFst,
            "Fifth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputFileEncoding.length() == 0) {
        throw new KleeneArgException("Fifth arg to testTokensTextFile must denote one non-empty string");
    }

    //          And for the XML output

    // 5.  name of the root element

    node.jjtGetChild(5).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String rootElmtName = lib.GetSingleString(tempFst,
            "Sixth arg to testTokensTextFile must denote a language of exactly one string.");

    if (rootElmtName.length() == 0) {
        throw new KleeneArgException("Sixth arg to testTokensTextFile must denote one non-empty string");
    }

    // 6.  name of the token element

    node.jjtGetChild(6).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String tokenElmtName = lib.GetSingleString(tempFst,
            "Seventh arg to testTokensTextFile must denote a language of exactly one string.");

    if (tokenElmtName.length() == 0) {
        throw new KleeneArgException("Seventh arg to testTokensTextFile must denote one non-empty string");
    }

    // 7.  name of the input element

    node.jjtGetChild(7).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputElmtName = lib.GetSingleString(tempFst,
            "Eighth arg to testTokensTextFile must denote a language of exactly one string.");

    if (inputElmtName.length() == 0) {
        throw new KleeneArgException("Eighth arg to testTokensTextFile must denote one non-empty string");
    }

    // 8.  name of the outputs element (N.B. plural)

    node.jjtGetChild(8).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputsElmtName = lib.GetSingleString(tempFst,
            "Ninth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputsElmtName.length() == 0) {
        throw new KleeneArgException("Ninth arg to testTokensTextFile must denote one non-empty string");
    }

    // 9.  name of the output element  (N.B. singular)

    node.jjtGetChild(9).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputElmtName = lib.GetSingleString(tempFst,
            "Tenth arg to testTokensTextFile must denote a language of exactly one string.");

    if (outputElmtName.length() == 0) {
        throw new KleeneArgException("Tenth arg to testTokensTextFile must denote one non-empty string");
    }

    // 10.  name of the weight attr in the output elmt

    node.jjtGetChild(10).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String weightAttrName = lib.GetSingleString(tempFst,
            "Eleventh arg to testTokensTextFile must denote a language of exactly one string.");

    if (weightAttrName.length() == 0) {
        throw new KleeneArgException("Eleventh arg to testTokensTextFile must denote one non-empty string");
    }

    String fullpath = getFullpath(inputFilePath);

    TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib);
    lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv());
    Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side

    try {
        BufferedReader in = null;
        if (inputFileEncoding.equals("default") || inputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            inputFileEncoding = System.getProperty("file.encoding");
        }
        if (inputFileEncoding.equals("UTF-8")) {
            in = new BufferedReader(new InputStreamReader(
                    new UTF8BOMStripperInputStream(new FileInputStream(fullpath)), inputFileEncoding));
        } else {
            in = new BufferedReader(new InputStreamReader(new FileInputStream(fullpath), inputFileEncoding));
        }

        // now try to open the output file 
        fullpath = getFullpath(outputFilePath);

        BufferedWriter out = null;
        if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            outputFileEncoding = System.getProperty("file.encoding");
        }
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding));

        out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>");
        out.newLine();
        out.write("<" + rootElmtName + ">");
        out.newLine();

        // read the input string/words, one per line, from the input file, write output to the output file

        XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName);

        String token; // one per line in the input file

        Fst modifiedTestFst;

        while ((token = in.readLine()) != null) {
            String cpvstr = trInput.transliterate(token);
            // converts cpvstr to a sequence of code pt values, and
            // each one could fill one or two 16-bit code units;
            // this is where multichar symbols are reduced to their
            // code point values

            // get length in Unicode characters (not code units)
            int inputlen = cpvstr.codePointCount(0, cpvstr.length());
            // allocate an int array to hold those code-point values,
            //    one int per code point value
            int[] cpvArray = new int[inputlen];

            // UCharacterIterator knows how to iterate over a String and
            // return the Unicode-Character code point values
            UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr);

            // we need to build each input string into a one-path Fst

            // store the codepoints in the int array (which will be passed to
            //    oneStringNativeFst(), a native method
            int codepoint;
            int index = 0;
            while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) {
                // any multichar symbols will already be in the
                // symmap, or they wouldn't have been identified;
                // but BMP characters may not yet be in the symmap
                if (Character.charCount(codepoint) == 1) {
                    symmap.putsym(String.valueOf((char) codepoint));
                }
                cpvArray[index++] = codepoint;
            }

            // 0 arg means generate
            Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0);

            // prepare to list the output strings (and their weights)
            long stringCount = lib.NumPaths(compFst);

            // XML output for this input token

            out.write("  <" + tokenElmtName + ">");
            out.newLine();

            // be careful to escape XML special chars in line; 
            // N.B. escapeXml also escapes non-ASCII Unicode letters
            //out.write("    <" + inputElmtName + ">" + 
            //  StringEscapeUtils.escapeXml(token) + "</" + 
            //  inputElmtName + ">") ;

            out.write("    <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">");
            out.newLine();

            out.write("    <" + outputsElmtName + ">");
            out.newLine();

            if (stringCount == 0) {
                // output nothing
            } else if (stringCount == -1) {
                // means that the composedFstPtr has loops, 
                //   denotes an infinite language
                out.write("      <infinite/>");
                out.newLine();
            } else {
                // native function listAllStrings will find all 
                //      strings in the Fst
                // and make callbacks to xmlOutputLister, 
                //      which knows how to output them as XML elements
                lib.ListAllStrings(compFst, 1, xmlOutputLister);
            }

            out.write("    </" + outputsElmtName + ">");
            out.newLine();

            out.write("  </" + tokenElmtName + ">");
            out.newLine();
        }
        in.close();

        out.write("</" + rootElmtName + ">");
        out.newLine();
        out.flush();
        out.close();
    } catch (Exception e) {
        System.out.println("Exception found while testing input from file.");
        e.printStackTrace();
    }
    return data;
}

From source file:StreamFlusher.java

public Object visit(ASTtestTokensXMLFile_statement node, Object data) {
    // Total: 11 regexp arguments, syntactically constrained
    // //from   w  w w  . j  a va 2 s  . c om
    // 0.  the Fst to test

    node.jjtGetChild(0).jjtAccept(this, data);
    Fst testFst = (Fst) (stack.pop());

    // 1.  path of the input file

    node.jjtGetChild(1).jjtAccept(this, data);
    Fst tempFst = (Fst) (stack.pop());

    String inputFilePath = lib.GetSingleString(tempFst,
            "Second arg to testTokensXMLFile must denote a language of exactly one string.");

    if (inputFilePath.length() == 0) {
        throw new KleeneArgException(
                "Second arg to testTokensXMLFile must denote exactly one non-empty string");
    }

    // 2. argument supplying the name of the element holding
    //      the input strings, by default, "input", i.e.
    //      <input>...</input>
    // N.B. in testTokensTextFile, this argument specifies the
    // encoding of the input file, which is not needed for XML,
    // which either has an explicit "encoding" specification, or
    // is UTF-8 by default

    node.jjtGetChild(2).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String srcInputElmtName = lib.GetSingleString(tempFst,
            "Third arg to testTokensXMLFile must denote a language of exactly one string.");

    if (srcInputElmtName.length() == 0) {
        throw new KleeneArgException("Third arg to testTokensXMLFile must denote one non-empty string");
    }

    // 3.  path of the output file

    node.jjtGetChild(3).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFilePath = lib.GetSingleString(tempFst,
            "Fourth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputFilePath.length() == 0) {
        throw new KleeneArgException("Fourth arg to testTokensXMLFile must denote one non-empty string");
    }

    // 4.  encoding of the output file

    node.jjtGetChild(4).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputFileEncoding = lib.GetSingleString(tempFst,
            "Fifth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputFileEncoding.length() == 0) {
        throw new KleeneArgException("Fifth arg to testTokensXMLFile must denote one non-empty string");
    }

    //          And for the XML output

    // 5.  name of the root element

    node.jjtGetChild(5).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String rootElmtName = lib.GetSingleString(tempFst,
            "Sixth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (rootElmtName.length() == 0) {
        throw new KleeneArgException("Sixth arg to testTokensXMLFile must denote one non-empty string");
    }

    // 6.  name of the token element

    node.jjtGetChild(6).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String tokenElmtName = lib.GetSingleString(tempFst,
            "Seventh arg to testTokensXMLFile must denote a language of exactly one string.");

    if (tokenElmtName.length() == 0) {
        throw new KleeneArgException("Seventh arg to testTokensXMLFile must denote one non-empty string");
    }

    // 7.  name of the input element

    node.jjtGetChild(7).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String inputElmtName = lib.GetSingleString(tempFst,
            "Eighth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (inputElmtName.length() == 0) {
        throw new KleeneArgException("Eighth arg to testTokensXMLFile must denote one non-empty string");
    }

    // 8.  name of the outputs element (N.B. plural)

    node.jjtGetChild(8).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputsElmtName = lib.GetSingleString(tempFst,
            "Ninth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputsElmtName.length() == 0) {
        throw new KleeneArgException("Ninth arg to testTokensXMLFile must denote one non-empty string");
    }

    // 9.  name of the output element  (N.B. singular)

    node.jjtGetChild(9).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String outputElmtName = lib.GetSingleString(tempFst,
            "Tenth arg to testTokensXMLFile must denote a language of exactly one string.");

    if (outputElmtName.length() == 0) {
        throw new KleeneArgException("Tenth arg to testTokensXMLFile must denote one non-empty string");
    }

    // 10.  name of the weight attr in the output elmt

    node.jjtGetChild(10).jjtAccept(this, data);
    tempFst = (Fst) (stack.pop());

    String weightAttrName = lib.GetSingleString(tempFst,
            "Eleventh arg to testTokensXMLFile must denote a language of exactly one string.");

    if (weightAttrName.length() == 0) {
        throw new KleeneArgException("Eleventh arg to testTokensXMLFile must denote one non-empty string");
    }

    String fullpath = getFullpath(inputFilePath);

    TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib);
    lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv());
    Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side

    try {
        // try to read/parse the XML input file

        Document doc = null;

        doc = parseXML(fullpath); // dom4j

        // Read all the <input></input> elements into a list
        // N.B. by default, the name of the element is "input",
        // but in general it is specified in arg srcInputElmtName
        List list = doc.selectNodes("//" + srcInputElmtName);

        // now try to open the output file 

        fullpath = getFullpath(outputFilePath);

        BufferedWriter out = null;
        if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) {
            // get the current default encoding of the operating system
            outputFileEncoding = System.getProperty("file.encoding");
        }
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding));

        out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>");
        out.newLine();
        out.write("<" + rootElmtName + ">");
        out.newLine();

        XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName);

        // Loop through the <input></input> elements, extracting and
        //   running the text string from each one; write output to
        //   the output file

        String token;

        Fst modifiedTestFst;

        for (Iterator it = list.iterator(); it.hasNext();) {
            Element inputElmt = (Element) it.next();
            token = inputElmt.getText();

            String cpvstr = trInput.transliterate(token);
            // converts cpvstr to a sequence of code pt values, and
            // each one could fill one or two 16-bit code units;
            // this is where multichar symbols are reduced to their
            // code point values

            // get length in Unicode characters (not code units)
            int inputlen = cpvstr.codePointCount(0, cpvstr.length());
            // allocate an int array to hold those code-point values,
            //    one int per code point value
            int[] cpvArray = new int[inputlen];

            // UCharacterIterator knows how to iterate over a 
            //   String and
            // return the Unicode-Character code point values
            UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr);

            // we need to build each input string into a one-path Fst

            // store the codepoints in the int array 
            //      (which will be passed to
            //    oneStringNativeFst(), a native method
            int codepoint;
            int index = 0;
            while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) {
                // any multichar symbols will already be in the
                // symmap, or they wouldn't have been identified;
                // but BMP characters may not yet be in the symmap
                if (Character.charCount(codepoint) == 1) {
                    symmap.putsym(String.valueOf((char) codepoint));
                }
                cpvArray[index++] = codepoint;
            }

            // 0 arg for generation, apply the inputFst to the "input"
            // side of testFst
            Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0);

            // prepare to list the output strings (and their weights)
            long stringCount = lib.NumPaths(compFst);

            // XML output for this input token

            out.write("  <" + tokenElmtName + ">");
            out.newLine();

            // be careful to escape XML special chars in line; 
            // N.B. escapeXml also escapes non-ASCII Unicode letters
            //out.write("    <" + inputElmtName + ">" + 
            //          StringEscapeUtils.escapeXml(token) + 
            //          "</" + inputElmtName + ">") ;

            out.write("    <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">");
            out.newLine();

            out.write("    <" + outputsElmtName + ">");
            out.newLine();

            if (stringCount == 0) {
                // output nothing
            } else if (stringCount == -1) {
                // means that the compFstPtr has loops, 
                //      denotes an infinite language
                out.write("      <infinite/>");
                out.newLine();
            } else {
                // native function listAllStrings will find all 
                //      strings in the Fst
                // and make callbacks to xmlOutputLister, 
                //      which knows how to output
                // them as XML elements
                lib.ListAllStrings(compFst, 1, xmlOutputLister);
            }

            out.write("    </" + outputsElmtName + ">");
            out.newLine();

            out.write("  </" + tokenElmtName + ">");
            out.newLine();
        }

        out.write("</" + rootElmtName + ">");
        out.newLine();
        out.flush();
        out.close();

    } catch (Exception e) {
        // KRB:  review this
        System.out.println("Exception found while testing input from file.");
        e.printStackTrace();
    }
    return data;
}