List of usage examples for java.lang String codePointCount
public int codePointCount(int beginIndex, int endIndex)
From source file:Main.java
public static void main(String[] args) { String str = "java2s.com"; System.out.println("String = " + str); // codepoint from index 1 to index 8 int retval = str.codePointCount(1, 8); // prints character from index 1 to index 8 System.out.println("Codepoint count = " + retval); }
From source file:Main.java
public static final String filterUCS4(String str) { if (TextUtils.isEmpty(str)) { return str; }/* ww w.jav a2s . c om*/ if (str.codePointCount(0, str.length()) == str.length()) { return str; } StringBuilder sb = new StringBuilder(); int index = 0; while (index < str.length()) { int codePoint = str.codePointAt(index); index += Character.charCount(codePoint); if (Character.isSupplementaryCodePoint(codePoint)) { continue; } sb.appendCodePoint(codePoint); } return sb.toString(); }
From source file:SpinnerTest.java
private static int[] toCodePointArray(String str) { int[] codePoints = new int[str.codePointCount(0, str.length())]; for (int i = 0, j = 0; i < str.length(); i++, j++) { int cp = str.codePointAt(i); if (Character.isSupplementaryCodePoint(cp)) i++;//from w ww .j a v a2 s .c o m codePoints[j] = cp; } return codePoints; }
From source file:com.careerly.utils.TextUtils.java
/** * ?// w w w .ja v a2 s . c om * * @param text * @return */ public static String removeDoubleByte(String text) { if (StringUtils.isBlank(text)) { return StringUtils.EMPTY; } StringBuilder stringBuilder = new StringBuilder(); for (int i = 0; i < text.codePointCount(0, text.length()); i++) { char[] chars = Character.toChars(text.codePointAt(i)); if (chars.length == 1) { stringBuilder.append(String.valueOf(chars)); } } return stringBuilder.toString(); }
From source file:cherry.foundation.validator.CharTypeValidator.java
private int[] createAcceptable(String acceptable) { int[] result = new int[acceptable.codePointCount(0, acceptable.length())]; for (int i = 0, j = 0; i < acceptable.length(); i++) { if (Character.isLowSurrogate(acceptable.charAt(i))) { continue; }/*from w ww . j a v a2 s . com*/ result[j++] = Character.codePointAt(acceptable, i); } return result; }
From source file:com.github.fge.jsonschema.keyword.validator.common.MaxLengthValidator.java
@Override public void validate(final Processor<FullData, FullData> processor, final ProcessingReport report, final MessageBundle bundle, final FullData data) throws ProcessingException { final String value = data.getInstance().getNode().textValue(); final int size = value.codePointCount(0, value.length()); if (size > intValue) report.error(newMsg(data, bundle, "err.common.maxLength.tooLong").putArgument("value", value) .putArgument("found", size).putArgument(keyword, intValue)); }
From source file:com.github.fge.jsonschema.keyword.validator.common.MinLengthValidator.java
@Override public void validate(final Processor<FullData, FullData> processor, final ProcessingReport report, final MessageBundle bundle, final FullData data) throws ProcessingException { final String value = data.getInstance().getNode().textValue(); final int size = value.codePointCount(0, value.length()); if (size < intValue) report.error(newMsg(data, bundle, "err.common.minLength.tooShort").putArgument("value", value) .putArgument("found", size).putArgument(keyword, intValue)); }
From source file:com.ebuddy.cassandra.cql.dao.CqlStructuredDataSupport.java
private String getFinishString(String start) { int startCodePointCount = start.codePointCount(0, start.length()); int finishCodePointCount = startCodePointCount + 1; int[] finishCodePoints = new int[finishCodePointCount]; for (int i = 0; i < startCodePointCount; i++) { finishCodePoints[i] = start.codePointAt(i); }/*from w w w . j ava 2 s . com*/ finishCodePoints[finishCodePointCount - 1] = MAX_CODE_POINT; return new String(finishCodePoints, 0, finishCodePointCount); }
From source file:StreamFlusher.java
public Object visit(ASTtestTokensTextFile_statement node, Object data) { // Total: 11 regexp arguments, syntactically constrained // //from w w w .j a v a 2s .c om // 0. the Fst to test node.jjtGetChild(0).jjtAccept(this, data); Fst testFst = (Fst) (stack.pop()); // 1. path of the input file node.jjtGetChild(1).jjtAccept(this, data); Fst tempFst = (Fst) (stack.pop()); String inputFilePath = lib.GetSingleString(tempFst, "Second arg to testTokensTextFile must denote a language of exactly one string."); if (inputFilePath.length() == 0) { throw new KleeneArgException( "Second arg to testTokensTextFile must denote a language of exactly one non-empty string"); } // 2. encoding of the input file node.jjtGetChild(2).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputFileEncoding = lib.GetSingleString(tempFst, "Third arg to testTokensTextFile must denote a language of exactly one string."); if (inputFileEncoding.length() == 0) { throw new KleeneArgException("Third arg to testTokensTextFile must denote one non-empty string"); } // 3. path of the output file node.jjtGetChild(3).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFilePath = lib.GetSingleString(tempFst, "Fourth arg to testTokensTextFile must denote a language of exactly one string."); if (outputFilePath.length() == 0) { throw new KleeneArgException("Fourth arg to testTokensTextFile must denote one non-empty string"); } // 4. encoding of the output file node.jjtGetChild(4).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFileEncoding = lib.GetSingleString(tempFst, "Fifth arg to testTokensTextFile must denote a language of exactly one string."); if (outputFileEncoding.length() == 0) { throw new KleeneArgException("Fifth arg to testTokensTextFile must denote one non-empty string"); } // And for the XML output // 5. name of the root element node.jjtGetChild(5).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String rootElmtName = lib.GetSingleString(tempFst, "Sixth arg to testTokensTextFile must denote a language of exactly one string."); if (rootElmtName.length() == 0) { throw new KleeneArgException("Sixth arg to testTokensTextFile must denote one non-empty string"); } // 6. name of the token element node.jjtGetChild(6).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String tokenElmtName = lib.GetSingleString(tempFst, "Seventh arg to testTokensTextFile must denote a language of exactly one string."); if (tokenElmtName.length() == 0) { throw new KleeneArgException("Seventh arg to testTokensTextFile must denote one non-empty string"); } // 7. name of the input element node.jjtGetChild(7).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputElmtName = lib.GetSingleString(tempFst, "Eighth arg to testTokensTextFile must denote a language of exactly one string."); if (inputElmtName.length() == 0) { throw new KleeneArgException("Eighth arg to testTokensTextFile must denote one non-empty string"); } // 8. name of the outputs element (N.B. plural) node.jjtGetChild(8).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputsElmtName = lib.GetSingleString(tempFst, "Ninth arg to testTokensTextFile must denote a language of exactly one string."); if (outputsElmtName.length() == 0) { throw new KleeneArgException("Ninth arg to testTokensTextFile must denote one non-empty string"); } // 9. name of the output element (N.B. singular) node.jjtGetChild(9).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputElmtName = lib.GetSingleString(tempFst, "Tenth arg to testTokensTextFile must denote a language of exactly one string."); if (outputElmtName.length() == 0) { throw new KleeneArgException("Tenth arg to testTokensTextFile must denote one non-empty string"); } // 10. name of the weight attr in the output elmt node.jjtGetChild(10).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String weightAttrName = lib.GetSingleString(tempFst, "Eleventh arg to testTokensTextFile must denote a language of exactly one string."); if (weightAttrName.length() == 0) { throw new KleeneArgException("Eleventh arg to testTokensTextFile must denote one non-empty string"); } String fullpath = getFullpath(inputFilePath); TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib); lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv()); Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side try { BufferedReader in = null; if (inputFileEncoding.equals("default") || inputFileEncoding.equals("-")) { // get the current default encoding of the operating system inputFileEncoding = System.getProperty("file.encoding"); } if (inputFileEncoding.equals("UTF-8")) { in = new BufferedReader(new InputStreamReader( new UTF8BOMStripperInputStream(new FileInputStream(fullpath)), inputFileEncoding)); } else { in = new BufferedReader(new InputStreamReader(new FileInputStream(fullpath), inputFileEncoding)); } // now try to open the output file fullpath = getFullpath(outputFilePath); BufferedWriter out = null; if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) { // get the current default encoding of the operating system outputFileEncoding = System.getProperty("file.encoding"); } out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding)); out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>"); out.newLine(); out.write("<" + rootElmtName + ">"); out.newLine(); // read the input string/words, one per line, from the input file, write output to the output file XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName); String token; // one per line in the input file Fst modifiedTestFst; while ((token = in.readLine()) != null) { String cpvstr = trInput.transliterate(token); // converts cpvstr to a sequence of code pt values, and // each one could fill one or two 16-bit code units; // this is where multichar symbols are reduced to their // code point values // get length in Unicode characters (not code units) int inputlen = cpvstr.codePointCount(0, cpvstr.length()); // allocate an int array to hold those code-point values, // one int per code point value int[] cpvArray = new int[inputlen]; // UCharacterIterator knows how to iterate over a String and // return the Unicode-Character code point values UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr); // we need to build each input string into a one-path Fst // store the codepoints in the int array (which will be passed to // oneStringNativeFst(), a native method int codepoint; int index = 0; while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) { // any multichar symbols will already be in the // symmap, or they wouldn't have been identified; // but BMP characters may not yet be in the symmap if (Character.charCount(codepoint) == 1) { symmap.putsym(String.valueOf((char) codepoint)); } cpvArray[index++] = codepoint; } // 0 arg means generate Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0); // prepare to list the output strings (and their weights) long stringCount = lib.NumPaths(compFst); // XML output for this input token out.write(" <" + tokenElmtName + ">"); out.newLine(); // be careful to escape XML special chars in line; // N.B. escapeXml also escapes non-ASCII Unicode letters //out.write(" <" + inputElmtName + ">" + // StringEscapeUtils.escapeXml(token) + "</" + // inputElmtName + ">") ; out.write(" <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">"); out.newLine(); out.write(" <" + outputsElmtName + ">"); out.newLine(); if (stringCount == 0) { // output nothing } else if (stringCount == -1) { // means that the composedFstPtr has loops, // denotes an infinite language out.write(" <infinite/>"); out.newLine(); } else { // native function listAllStrings will find all // strings in the Fst // and make callbacks to xmlOutputLister, // which knows how to output them as XML elements lib.ListAllStrings(compFst, 1, xmlOutputLister); } out.write(" </" + outputsElmtName + ">"); out.newLine(); out.write(" </" + tokenElmtName + ">"); out.newLine(); } in.close(); out.write("</" + rootElmtName + ">"); out.newLine(); out.flush(); out.close(); } catch (Exception e) { System.out.println("Exception found while testing input from file."); e.printStackTrace(); } return data; }
From source file:StreamFlusher.java
public Object visit(ASTtestTokensXMLFile_statement node, Object data) { // Total: 11 regexp arguments, syntactically constrained // // w w w.j ava 2 s . c om // 0. the Fst to test node.jjtGetChild(0).jjtAccept(this, data); Fst testFst = (Fst) (stack.pop()); // 1. path of the input file node.jjtGetChild(1).jjtAccept(this, data); Fst tempFst = (Fst) (stack.pop()); String inputFilePath = lib.GetSingleString(tempFst, "Second arg to testTokensXMLFile must denote a language of exactly one string."); if (inputFilePath.length() == 0) { throw new KleeneArgException( "Second arg to testTokensXMLFile must denote exactly one non-empty string"); } // 2. argument supplying the name of the element holding // the input strings, by default, "input", i.e. // <input>...</input> // N.B. in testTokensTextFile, this argument specifies the // encoding of the input file, which is not needed for XML, // which either has an explicit "encoding" specification, or // is UTF-8 by default node.jjtGetChild(2).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String srcInputElmtName = lib.GetSingleString(tempFst, "Third arg to testTokensXMLFile must denote a language of exactly one string."); if (srcInputElmtName.length() == 0) { throw new KleeneArgException("Third arg to testTokensXMLFile must denote one non-empty string"); } // 3. path of the output file node.jjtGetChild(3).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFilePath = lib.GetSingleString(tempFst, "Fourth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputFilePath.length() == 0) { throw new KleeneArgException("Fourth arg to testTokensXMLFile must denote one non-empty string"); } // 4. encoding of the output file node.jjtGetChild(4).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputFileEncoding = lib.GetSingleString(tempFst, "Fifth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputFileEncoding.length() == 0) { throw new KleeneArgException("Fifth arg to testTokensXMLFile must denote one non-empty string"); } // And for the XML output // 5. name of the root element node.jjtGetChild(5).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String rootElmtName = lib.GetSingleString(tempFst, "Sixth arg to testTokensXMLFile must denote a language of exactly one string."); if (rootElmtName.length() == 0) { throw new KleeneArgException("Sixth arg to testTokensXMLFile must denote one non-empty string"); } // 6. name of the token element node.jjtGetChild(6).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String tokenElmtName = lib.GetSingleString(tempFst, "Seventh arg to testTokensXMLFile must denote a language of exactly one string."); if (tokenElmtName.length() == 0) { throw new KleeneArgException("Seventh arg to testTokensXMLFile must denote one non-empty string"); } // 7. name of the input element node.jjtGetChild(7).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String inputElmtName = lib.GetSingleString(tempFst, "Eighth arg to testTokensXMLFile must denote a language of exactly one string."); if (inputElmtName.length() == 0) { throw new KleeneArgException("Eighth arg to testTokensXMLFile must denote one non-empty string"); } // 8. name of the outputs element (N.B. plural) node.jjtGetChild(8).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputsElmtName = lib.GetSingleString(tempFst, "Ninth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputsElmtName.length() == 0) { throw new KleeneArgException("Ninth arg to testTokensXMLFile must denote one non-empty string"); } // 9. name of the output element (N.B. singular) node.jjtGetChild(9).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String outputElmtName = lib.GetSingleString(tempFst, "Tenth arg to testTokensXMLFile must denote a language of exactly one string."); if (outputElmtName.length() == 0) { throw new KleeneArgException("Tenth arg to testTokensXMLFile must denote one non-empty string"); } // 10. name of the weight attr in the output elmt node.jjtGetChild(10).jjtAccept(this, data); tempFst = (Fst) (stack.pop()); String weightAttrName = lib.GetSingleString(tempFst, "Eleventh arg to testTokensXMLFile must denote a language of exactly one string."); if (weightAttrName.length() == 0) { throw new KleeneArgException("Eleventh arg to testTokensXMLFile must denote one non-empty string"); } String fullpath = getFullpath(inputFilePath); TranslitTokenizerBuilder ttb = new TranslitTokenizerBuilder(symmap, testFst.getSigma(), lib); lib.Iterate4mcs(testFst, ttb, symmap.getStartPuaCpv()); Transliterator trInput = ttb.getTranslitTokenizer(true); // true for input side try { // try to read/parse the XML input file Document doc = null; doc = parseXML(fullpath); // dom4j // Read all the <input></input> elements into a list // N.B. by default, the name of the element is "input", // but in general it is specified in arg srcInputElmtName List list = doc.selectNodes("//" + srcInputElmtName); // now try to open the output file fullpath = getFullpath(outputFilePath); BufferedWriter out = null; if (outputFileEncoding.equals("default") || outputFileEncoding.equals("-")) { // get the current default encoding of the operating system outputFileEncoding = System.getProperty("file.encoding"); } out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fullpath), outputFileEncoding)); out.write("<?xml version=\"1.0\" encoding=\"" + outputFileEncoding + "\"?>"); out.newLine(); out.write("<" + rootElmtName + ">"); out.newLine(); XMLOutputLister xmlOutputLister = new XMLOutputLister(symmap, out, outputElmtName, weightAttrName); // Loop through the <input></input> elements, extracting and // running the text string from each one; write output to // the output file String token; Fst modifiedTestFst; for (Iterator it = list.iterator(); it.hasNext();) { Element inputElmt = (Element) it.next(); token = inputElmt.getText(); String cpvstr = trInput.transliterate(token); // converts cpvstr to a sequence of code pt values, and // each one could fill one or two 16-bit code units; // this is where multichar symbols are reduced to their // code point values // get length in Unicode characters (not code units) int inputlen = cpvstr.codePointCount(0, cpvstr.length()); // allocate an int array to hold those code-point values, // one int per code point value int[] cpvArray = new int[inputlen]; // UCharacterIterator knows how to iterate over a // String and // return the Unicode-Character code point values UCharacterIterator iter = UCharacterIterator.getInstance(cpvstr); // we need to build each input string into a one-path Fst // store the codepoints in the int array // (which will be passed to // oneStringNativeFst(), a native method int codepoint; int index = 0; while ((codepoint = iter.nextCodePoint()) != UCharacterIterator.DONE) { // any multichar symbols will already be in the // symmap, or they wouldn't have been identified; // but BMP characters may not yet be in the symmap if (Character.charCount(codepoint) == 1) { symmap.putsym(String.valueOf((char) codepoint)); } cpvArray[index++] = codepoint; } // 0 arg for generation, apply the inputFst to the "input" // side of testFst Fst compFst = lib.ApplyToOneString(testFst, cpvArray, 0); // prepare to list the output strings (and their weights) long stringCount = lib.NumPaths(compFst); // XML output for this input token out.write(" <" + tokenElmtName + ">"); out.newLine(); // be careful to escape XML special chars in line; // N.B. escapeXml also escapes non-ASCII Unicode letters //out.write(" <" + inputElmtName + ">" + // StringEscapeUtils.escapeXml(token) + // "</" + inputElmtName + ">") ; out.write(" <" + inputElmtName + ">" + EscapeXML.escapeXML(token) + "</" + inputElmtName + ">"); out.newLine(); out.write(" <" + outputsElmtName + ">"); out.newLine(); if (stringCount == 0) { // output nothing } else if (stringCount == -1) { // means that the compFstPtr has loops, // denotes an infinite language out.write(" <infinite/>"); out.newLine(); } else { // native function listAllStrings will find all // strings in the Fst // and make callbacks to xmlOutputLister, // which knows how to output // them as XML elements lib.ListAllStrings(compFst, 1, xmlOutputLister); } out.write(" </" + outputsElmtName + ">"); out.newLine(); out.write(" </" + tokenElmtName + ">"); out.newLine(); } out.write("</" + rootElmtName + ">"); out.newLine(); out.flush(); out.close(); } catch (Exception e) { // KRB: review this System.out.println("Exception found while testing input from file."); e.printStackTrace(); } return data; }