List of usage examples for java.lang Character charCount
public static int charCount(int codePoint)
From source file:bfile.util.StringUtils.java
/** * <p>Swaps the case of a String changing upper and title case to * lower case, and lower case to upper case.</p> * * <ul>// www. j a v a2 s . c om * <li>Upper case character converts to Lower case</li> * <li>Title case character converts to Lower case</li> * <li>Lower case character converts to Upper case</li> * </ul> * * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#swapCase(String)}. * A {@code null} input String returns {@code null}.</p> * * <pre> * StringUtils.swapCase(null) = null * StringUtils.swapCase("") = "" * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" * </pre> * * <p>NOTE: This method changed in Lang version 2.0. * It no longer performs a word based algorithm. * If you only use ASCII, you will notice no change. * That functionality is available in org.apache.commons.lang3.text.WordUtils.</p> * * @param str the String to swap case, may be null * @return the changed String, {@code null} if null String input */ public static String swapCase(final String str) { if (StringUtils.isEmpty(str)) { return str; } final int strLen = str.length(); int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; for (int i = 0; i < strLen;) { final int oldCodepoint = str.codePointAt(i); final int newCodePoint; if (Character.isUpperCase(oldCodepoint)) { newCodePoint = Character.toLowerCase(oldCodepoint); } else if (Character.isTitleCase(oldCodepoint)) { newCodePoint = Character.toLowerCase(oldCodepoint); } else if (Character.isLowerCase(oldCodepoint)) { newCodePoint = Character.toUpperCase(oldCodepoint); } else { newCodePoint = oldCodepoint; } newCodePoints[outOffset++] = newCodePoint; i += Character.charCount(newCodePoint); } return new String(newCodePoints, 0, outOffset); }
From source file:StreamFlusher.java
public Object visit(ASTtestTokensTextFile_statement node, Object data) { // Total: 11 regexp arguments, syntactically constrained // //from w w w . ja v a 2s . com // 0. the Fst to test node.jjtGetChild(0).jjtAccept(this, data); Fst testFst = (Fst) (stack.pop()); // 1. path of the input file node.jjtGetChild(1).jjtAccept(this, data); Fst tempFst = (Fst) (stack.pop()); String inputFilePath = lib.GetSingleString(tempFst, "Second arg to testTokensTextFile must denote a language of exactly one string."); if (inputFilePath.length() == 0) { throw new KleeneArgException( "Second arg to testTokensTextFile must denote a language of exactly one non-empty string"); } // 2. encoding of the input file node.jjtGetChild(2).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputFileEncoding = lib.GetSingleString(tempFst, "Third arg to testTokensTextFile must denote a language of exactly one string."); if (inputFileEncoding.length() == 0) { throw new KleeneArgException("Third arg to testTokensTextFile must denote one non-empty string"); } // 3. path of the output file node.jjtGetChild(3).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFilePath = lib.GetSingleString(tempFst, "Fourth arg to testTokensTextFile must denote a language of exactly one string."); if (outputFilePath.length() == 0) { throw new KleeneArgException("Fourth arg to testTokensTextFile must denote one non-empty string"); } // 4. encoding of the output file node.jjtGetChild(4).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFileEncoding = lib.GetSingleString(tempFst, "Fifth arg to testTokensTextFile must denote a language of exactly one string."); if (outputFileEncoding.length() == 0) { throw new KleeneArgException("Fifth arg to testTokensTextFile must denote one non-empty string"); } // And for the XML output // 5. name of the root element node.jjtGetChild(5).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String rootElmtName = lib.GetSingleString(tempFst, "Sixth arg to testTokensTextFile must denote a language of exactly one string."); if (rootElmtName.length() == 0) { throw new KleeneArgException("Sixth arg to testTokensTextFile must denote one non-empty string"); } // 6. name of the token element node.jjtGetChild(6).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String tokenElmtName = lib.GetSingleString(tempFst, "Seventh arg to testTokensTextFile must denote a language of exactly one string."); if (tokenElmtName.length() == 0) { throw new KleeneArgException("Seventh arg to testTokensTextFile must denote one non-empty string"); } // 7. name of the input element node.jjtGetChild(7).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputElmtName = lib.GetSingleString(tempFst, "Eighth arg to testTokensTextFile must denote a language of exactly one string."); if (inputElmtName.length() == 0) { throw new KleeneArgException("Eighth arg to testTokensTextFile must denote one non-empty string"); } // 8. name of the outputs element (N.B. plural) node.jjtGetChild(8).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputsElmtName = lib.GetSingleString(tempFst, "Ninth arg to testTokensTextFile must denote a language of exactly one string."); if (outputsElmtName.length() == 0) { throw new KleeneArgException("Ninth arg to testTokensTextFile must denote one non-empty string"); } // 9. name of the output element (N.B. singular) node.jjtGetChild(9).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputElmtName = lib.GetSingleString(tempFst, "Tenth arg to testTokensTextFile must denote a language of exactly one string."); if (outputElmtName.length() == 0) { throw new KleeneArgException("Tenth arg to testTokensTextFile must denote one non-empty string"); } // 10. name of the weight attr in the output elmt node.jjtGetChild(10).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String weightAttrName = lib.GetSingleString(tempFst, "Eleventh arg to testTokensTextFile must denote a language of exactly one string."); if (weightAttrName.length() == 0) { throw new KleeneArgException("Eleventh arg to testTokensTextFile must denote one non-empty string"); } String fullpath = getFullpath(inputFilePath); TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib); lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv()); Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side try { BufferedReader in = null; if (inputFileEncoding.equals("default") || inputFileEncoding.equals("-")) { // get the current default encoding of the operating system inputFileEncoding = System.getProperty("file.encoding"); } if (inputFileEncoding.equals("UTF-8")) { in = new BufferedReader(new InputStreamReader( new UTF8BOMStripperInputStream(new FileInputStream(fullpath)), inputFileEncoding)); } else { in = new BufferedReader(new InputStreamReader(new FileInputStream(fullpath), inputFileEncoding)); } // now try to open the output file fullpath = getFullpath(outputFilePath); BufferedWriter out = null; if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) { // get the current default encoding of the operating system outputFileEncoding = System.getProperty("file.encoding"); } out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding)); out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>"); out.newLine(); out.write("<" + rootElmtName + ">"); out.newLine(); // read the input string/words, one per line, from the input file, write output to the output file XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName); String token; // one per line in the input file Fst modifiedTestFst; while ((token = in.readLine()) != null) { String cpvstr = trInput.transliterate(token); // converts cpvstr to a sequence of code pt values, and // each one could fill one or two 16-bit code units; // this is where multichar symbols are reduced to their // code point values // get length in Unicode characters (not code units) int inputlen = cpvstr.codePointCount(0, cpvstr.length()); // allocate an int array to hold those code-point values, // one int per code point value int[] cpvArray = new int[inputlen]; // UCharacterIterator knows how to iterate over a String and // return the Unicode-Character code point values UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr); // we need to build each input string into a one-path Fst // store the codepoints in the int array (which will be passed to // oneStringNativeFst(), a native method int codepoint; int index = 0; while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) { // any multichar symbols will already be in the // symmap, or they wouldn't have been identified; // but BMP characters may not yet be in the symmap if (Character.charCount(codepoint) == 1) { symmap.putsym(String.valueOf((char) codepoint)); } cpvArray[index++] = codepoint; } // 0 arg means generate Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0); // prepare to list the output strings (and their weights) long stringCount = lib.NumPaths(compFst); // XML output for this input token out.write(" <" + tokenElmtName + ">"); out.newLine(); // be careful to escape XML special chars in line; // N.B. escapeXml also escapes non-ASCII Unicode letters //out.write(" <" + inputElmtName + ">" + // StringEscapeUtils.escapeXml(token) + "</" + // inputElmtName + ">") ; out.write(" <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">"); out.newLine(); out.write(" <" + outputsElmtName + ">"); out.newLine(); if (stringCount == 0) { // output nothing } else if (stringCount == -1) { // means that the composedFstPtr has loops, // denotes an infinite language out.write(" <infinite/>"); out.newLine(); } else { // native function listAllStrings will find all // strings in the Fst // and make callbacks to xmlOutputLister, // which knows how to output them as XML elements lib.ListAllStrings(compFst, 1, xmlOutputLister); } out.write(" </" + outputsElmtName + ">"); out.newLine(); out.write(" </" + tokenElmtName + ">"); out.newLine(); } in.close(); out.write("</" + rootElmtName + ">"); out.newLine(); out.flush(); out.close(); } catch (Exception e) { System.out.println("Exception found while testing input from file."); e.printStackTrace(); } return data; }
From source file:StreamFlusher.java
public Object visit(ASTtestTokensXMLFile_statement node, Object data) { // Total: 11 regexp arguments, syntactically constrained // //from w w w . j a va 2 s . c om // 0. the Fst to test node.jjtGetChild(0).jjtAccept(this, data); Fst testFst = (Fst) (stack.pop()); // 1. path of the input file node.jjtGetChild(1).jjtAccept(this, data); Fst tempFst = (Fst) (stack.pop()); String inputFilePath = lib.GetSingleString(tempFst, "Second arg to testTokensXMLFile must denote a language of exactly one string."); if (inputFilePath.length() == 0) { throw new KleeneArgException( "Second arg to testTokensXMLFile must denote exactly one non-empty string"); } // 2. argument supplying the name of the element holding // the input strings, by default, "input", i.e. // <input>...</input> // N.B. in testTokensTextFile, this argument specifies the // encoding of the input file, which is not needed for XML, // which either has an explicit "encoding" specification, or // is UTF-8 by default node.jjtGetChild(2).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String srcInputElmtName = lib.GetSingleString(tempFst, "Third arg to testTokensXMLFile must denote a language of exactly one string."); if (srcInputElmtName.length() == 0) { throw new KleeneArgException("Third arg to testTokensXMLFile must denote one non-empty string"); } // 3. path of the output file node.jjtGetChild(3).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFilePath = lib.GetSingleString(tempFst, "Fourth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputFilePath.length() == 0) { throw new KleeneArgException("Fourth arg to testTokensXMLFile must denote one non-empty string"); } // 4. encoding of the output file node.jjtGetChild(4).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFileEncoding = lib.GetSingleString(tempFst, "Fifth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputFileEncoding.length() == 0) { throw new KleeneArgException("Fifth arg to testTokensXMLFile must denote one non-empty string"); } // And for the XML output // 5. name of the root element node.jjtGetChild(5).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String rootElmtName = lib.GetSingleString(tempFst, "Sixth arg to testTokensXMLFile must denote a language of exactly one string."); if (rootElmtName.length() == 0) { throw new KleeneArgException("Sixth arg to testTokensXMLFile must denote one non-empty string"); } // 6. name of the token element node.jjtGetChild(6).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String tokenElmtName = lib.GetSingleString(tempFst, "Seventh arg to testTokensXMLFile must denote a language of exactly one string."); if (tokenElmtName.length() == 0) { throw new KleeneArgException("Seventh arg to testTokensXMLFile must denote one non-empty string"); } // 7. name of the input element node.jjtGetChild(7).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputElmtName = lib.GetSingleString(tempFst, "Eighth arg to testTokensXMLFile must denote a language of exactly one string."); if (inputElmtName.length() == 0) { throw new KleeneArgException("Eighth arg to testTokensXMLFile must denote one non-empty string"); } // 8. name of the outputs element (N.B. plural) node.jjtGetChild(8).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputsElmtName = lib.GetSingleString(tempFst, "Ninth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputsElmtName.length() == 0) { throw new KleeneArgException("Ninth arg to testTokensXMLFile must denote one non-empty string"); } // 9. name of the output element (N.B. singular) node.jjtGetChild(9).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputElmtName = lib.GetSingleString(tempFst, "Tenth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputElmtName.length() == 0) { throw new KleeneArgException("Tenth arg to testTokensXMLFile must denote one non-empty string"); } // 10. name of the weight attr in the output elmt node.jjtGetChild(10).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String weightAttrName = lib.GetSingleString(tempFst, "Eleventh arg to testTokensXMLFile must denote a language of exactly one string."); if (weightAttrName.length() == 0) { throw new KleeneArgException("Eleventh arg to testTokensXMLFile must denote one non-empty string"); } String fullpath = getFullpath(inputFilePath); TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib); lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv()); Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side try { // try to read/parse the XML input file Document doc = null; doc = parseXML(fullpath); // dom4j // Read all the <input></input> elements into a list // N.B. by default, the name of the element is "input", // but in general it is specified in arg srcInputElmtName List list = doc.selectNodes("//" + srcInputElmtName); // now try to open the output file fullpath = getFullpath(outputFilePath); BufferedWriter out = null; if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) { // get the current default encoding of the operating system outputFileEncoding = System.getProperty("file.encoding"); } out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding)); out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>"); out.newLine(); out.write("<" + rootElmtName + ">"); out.newLine(); XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName); // Loop through the <input></input> elements, extracting and // running the text string from each one; write output to // the output file String token; Fst modifiedTestFst; for (Iterator it = list.iterator(); it.hasNext();) { Element inputElmt = (Element) it.next(); token = inputElmt.getText(); String cpvstr = trInput.transliterate(token); // converts cpvstr to a sequence of code pt values, and // each one could fill one or two 16-bit code units; // this is where multichar symbols are reduced to their // code point values // get length in Unicode characters (not code units) int inputlen = cpvstr.codePointCount(0, cpvstr.length()); // allocate an int array to hold those code-point values, // one int per code point value int[] cpvArray = new int[inputlen]; // UCharacterIterator knows how to iterate over a // String and // return the Unicode-Character code point values UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr); // we need to build each input string into a one-path Fst // store the codepoints in the int array // (which will be passed to // oneStringNativeFst(), a native method int codepoint; int index = 0; while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) { // any multichar symbols will already be in the // symmap, or they wouldn't have been identified; // but BMP characters may not yet be in the symmap if (Character.charCount(codepoint) == 1) { symmap.putsym(String.valueOf((char) codepoint)); } cpvArray[index++] = codepoint; } // 0 arg for generation, apply the inputFst to the "input" // side of testFst Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0); // prepare to list the output strings (and their weights) long stringCount = lib.NumPaths(compFst); // XML output for this input token out.write(" <" + tokenElmtName + ">"); out.newLine(); // be careful to escape XML special chars in line; // N.B. escapeXml also escapes non-ASCII Unicode letters //out.write(" <" + inputElmtName + ">" + // StringEscapeUtils.escapeXml(token) + // "</" + inputElmtName + ">") ; out.write(" <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">"); out.newLine(); out.write(" <" + outputsElmtName + ">"); out.newLine(); if (stringCount == 0) { // output nothing } else if (stringCount == -1) { // means that the compFstPtr has loops, // denotes an infinite language out.write(" <infinite/>"); out.newLine(); } else { // native function listAllStrings will find all // strings in the Fst // and make callbacks to xmlOutputLister, // which knows how to output // them as XML elements lib.ListAllStrings(compFst, 1, xmlOutputLister); } out.write(" </" + outputsElmtName + ">"); out.newLine(); out.write(" </" + tokenElmtName + ">"); out.newLine(); } out.write("</" + rootElmtName + ">"); out.newLine(); out.flush(); out.close(); } catch (Exception e) { // KRB: review this System.out.println("Exception found while testing input from file."); e.printStackTrace(); } return data; }