List of usage examples for java.nio CharBuffer length
public final int length()
From source file:Main.java
public static void main(String[] args) { CharBuffer cb1 = CharBuffer.allocate(5); cb1.put(2, 'j'); cb1.rewind();/*from ww w. ja va2s. com*/ System.out.println(cb1.length()); }
From source file:MainClass.java
public static void main(String[] args) { String[] phrases = { "A", "B 1", "C 1.3" }; String dirname = "C:/test"; String filename = "Phrases.txt"; File dir = new File(dirname); File aFile = new File(dir, filename); FileOutputStream outputFile = null; try {//from www . j a v a2 s .co m outputFile = new FileOutputStream(aFile, true); } catch (FileNotFoundException e) { e.printStackTrace(System.err); } FileChannel outChannel = outputFile.getChannel(); ByteBuffer buf = ByteBuffer.allocate(1024); System.out.println(buf.position()); System.out.println(buf.limit()); System.out.println(buf.capacity()); CharBuffer charBuf = buf.asCharBuffer(); System.out.println(charBuf.position()); System.out.println(charBuf.limit()); System.out.println(charBuf.capacity()); Formatter formatter = new Formatter(charBuf); int number = 0; for (String phrase : phrases) { formatter.format("%n %s", ++number, phrase); System.out.println(charBuf.position()); System.out.println(charBuf.limit()); System.out.println(charBuf.capacity()); charBuf.flip(); System.out.println(charBuf.position()); System.out.println(charBuf.limit()); System.out.println(charBuf.length()); buf.limit(2 * charBuf.length()); // Set byte buffer limit System.out.println(buf.position()); System.out.println(buf.limit()); System.out.println(buf.remaining()); try { outChannel.write(buf); buf.clear(); charBuf.clear(); } catch (IOException e) { e.printStackTrace(System.err); } } try { outputFile.close(); } catch (IOException e) { e.printStackTrace(System.err); } }
From source file:Main.java
private static String decode(Charset charset, byte[] b) { if (b == null) { return null; }// w w w .java 2 s . c o m final CharBuffer cb = charset.decode(ByteBuffer.wrap(b)); return new String(cb.array(), 0, cb.length()); }
From source file:eu.stratosphere.types.StringValue.java
/** * Sets the contents of this string to the contents of the given <tt>CharBuffer</tt>. * The characters between the buffer's current position (inclusive) and the buffer's * limit (exclusive) will be stored in this string. * //from w ww . j a v a 2 s. co m * @param buffer The character buffer to read the characters from. */ public void setValue(CharBuffer buffer) { Validate.notNull(buffer); final int len = buffer.length(); ensureSize(len); buffer.get(this.value, 0, len); this.len = len; this.hashCode = 0; }
From source file:com.cloudera.sqoop.lib.RecordParser.java
/** * Return a list of strings representing the fields of the input line. * This list is backed by an internal buffer which is cleared by the * next call to parseRecord()./*from w w w.j a v a2 s . co m*/ */ public List<String> parseRecord(CharBuffer input) throws ParseError { if (null == input) { throw new ParseError("null input string"); } /* This method implements the following state machine to perform parsing. Note that there are no restrictions on whether particular characters (e.g., field-sep, record-sep, etc) are distinct or the same. The state transitions are processed in the order seen in this comment. Starting state is FIELD_START encloser -> ENCLOSED_FIELD escape char -> UNENCLOSED_ESCAPE field delim -> FIELD_START (for a new field) record delim -> stops processing all other letters get added to current field, -> UNENCLOSED FIELD ENCLOSED_FIELD state: escape char goes to ENCLOSED_ESCAPE encloser goes to ENCLOSED_EXPECT_DELIMITER field sep or record sep gets added to the current string normal letters get added to the current string ENCLOSED_ESCAPE state: any character seen here is added literally, back to ENCLOSED_FIELD ENCLOSED_EXPECT_DELIMITER state: field sep goes to FIELD_START record sep halts processing. all other characters are errors. UNENCLOSED_FIELD state: ESCAPE char goes to UNENCLOSED_ESCAPE FIELD_SEP char goes to FIELD_START RECORD_SEP char halts processing normal chars or the enclosing char get added to the current string UNENCLOSED_ESCAPE: add charater literal to current string, return to UNENCLOSED_FIELD */ char curChar = DelimiterSet.NULL_CHAR; ParseState state = ParseState.FIELD_START; int len = input.length(); StringBuilder sb = null; outputs.clear(); char enclosingChar = delimiters.getEnclosedBy(); char fieldDelim = delimiters.getFieldsTerminatedBy(); char recordDelim = delimiters.getLinesTerminatedBy(); char escapeChar = delimiters.getEscapedBy(); boolean enclosingRequired = delimiters.isEncloseRequired(); for (int pos = 0; pos < len; pos++) { curChar = input.get(); switch (state) { case FIELD_START: // ready to start processing a new field. if (null != sb) { // We finished processing a previous field. Add to the list. outputs.add(sb.toString()); } sb = new StringBuilder(); if (enclosingChar == curChar) { // got an opening encloser. state = ParseState.ENCLOSED_FIELD; } else if (escapeChar == curChar) { state = ParseState.UNENCLOSED_ESCAPE; } else if (fieldDelim == curChar) { // we have a zero-length field. This is a no-op. continue; } else if (recordDelim == curChar) { // we have a zero-length field, that ends processing. pos = len; } else { // current char is part of the field. state = ParseState.UNENCLOSED_FIELD; sb.append(curChar); if (enclosingRequired) { throw new ParseError("Opening field-encloser expected at position " + pos); } } break; case ENCLOSED_FIELD: if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.ENCLOSED_ESCAPE; } else if (enclosingChar == curChar) { // we're at the end of the enclosing field. Expect an EOF or EOR char. state = ParseState.ENCLOSED_EXPECT_DELIMITER; } else { // this is a regular char, or an EOF / EOR inside an encloser. Add to // the current field string, and remain in this state. sb.append(curChar); } break; case UNENCLOSED_FIELD: if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.UNENCLOSED_ESCAPE; } else if (fieldDelim == curChar) { // we're at the end of this field; may be the start of another one. state = ParseState.FIELD_START; } else if (recordDelim == curChar) { pos = len; // terminate processing immediately. } else { // this is a regular char. Add to the current field string, // and remain in this state. sb.append(curChar); } break; case ENCLOSED_ESCAPE: // Treat this character literally, whatever it is, and return to // enclosed field processing. sb.append(curChar); state = ParseState.ENCLOSED_FIELD; break; case ENCLOSED_EXPECT_DELIMITER: // We were in an enclosed field, but got the final encloser. Now we // expect either an end-of-field or an end-of-record. if (fieldDelim == curChar) { // end of one field is the beginning of the next. state = ParseState.FIELD_START; } else if (recordDelim == curChar) { // stop processing. pos = len; } else { // Don't know what to do with this character. throw new ParseError("Expected delimiter at position " + pos); } break; case UNENCLOSED_ESCAPE: // Treat this character literally, whatever it is, and return to // non-enclosed field processing. sb.append(curChar); state = ParseState.UNENCLOSED_FIELD; break; default: throw new ParseError("Unexpected parser state: " + state); } } if (state == ParseState.FIELD_START && curChar == fieldDelim) { // we hit an EOF/EOR as the last legal character and we need to mark // that string as recorded. This if block is outside the for-loop since // we don't have a physical 'epsilon' token in our string. if (null != sb) { outputs.add(sb.toString()); sb = new StringBuilder(); } } if (null != sb) { // There was a field that terminated by running out of chars or an EOR // character. Add to the list. outputs.add(sb.toString()); } return outputs; }
From source file:org.apache.sqoop.lib.RecordParser.java
/** * Return a list of strings representing the fields of the input line. * This list is backed by an internal buffer which is cleared by the * next call to parseRecord().//w w w. j a va2 s . c om */ public List<String> parseRecord(CharBuffer input) throws com.cloudera.sqoop.lib.RecordParser.ParseError { if (null == input) { throw new com.cloudera.sqoop.lib.RecordParser.ParseError("null input string"); } /* This method implements the following state machine to perform parsing. Note that there are no restrictions on whether particular characters (e.g., field-sep, record-sep, etc) are distinct or the same. The state transitions are processed in the order seen in this comment. Starting state is FIELD_START encloser -> ENCLOSED_FIELD escape char -> UNENCLOSED_ESCAPE field delim -> FIELD_START (for a new field) record delim -> stops processing all other letters get added to current field, -> UNENCLOSED FIELD ENCLOSED_FIELD state: escape char goes to ENCLOSED_ESCAPE encloser goes to ENCLOSED_EXPECT_DELIMITER field sep or record sep gets added to the current string normal letters get added to the current string ENCLOSED_ESCAPE state: any character seen here is added literally, back to ENCLOSED_FIELD ENCLOSED_EXPECT_DELIMITER state: field sep goes to FIELD_START record sep halts processing. all other characters are errors. UNENCLOSED_FIELD state: ESCAPE char goes to UNENCLOSED_ESCAPE FIELD_SEP char goes to FIELD_START RECORD_SEP char halts processing normal chars or the enclosing char get added to the current string UNENCLOSED_ESCAPE: add charater literal to current string, return to UNENCLOSED_FIELD */ char curChar = com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR; ParseState state = ParseState.FIELD_START; int len = input.length(); StringBuilder sb = null; outputs.clear(); char enclosingChar = delimiters.getEnclosedBy(); char fieldDelim = delimiters.getFieldsTerminatedBy(); char recordDelim = delimiters.getLinesTerminatedBy(); char escapeChar = delimiters.getEscapedBy(); boolean enclosingRequired = delimiters.isEncloseRequired(); for (int pos = 0; pos < len; pos++) { curChar = input.get(); switch (state) { case FIELD_START: // ready to start processing a new field. if (null != sb) { // We finished processing a previous field. Add to the list. outputs.add(sb.toString()); } sb = new StringBuilder(); if (enclosingChar == curChar) { // got an opening encloser. state = ParseState.ENCLOSED_FIELD; } else if (escapeChar == curChar) { state = ParseState.UNENCLOSED_ESCAPE; } else if (fieldDelim == curChar) { // we have a zero-length field. This is a no-op. continue; } else if (recordDelim == curChar) { // we have a zero-length field, that ends processing. pos = len; } else { // current char is part of the field. state = ParseState.UNENCLOSED_FIELD; sb.append(curChar); if (enclosingRequired) { throw new com.cloudera.sqoop.lib.RecordParser.ParseError( "Opening field-encloser expected at position " + pos); } } break; case ENCLOSED_FIELD: if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.ENCLOSED_ESCAPE; } else if (enclosingChar == curChar) { // we're at the end of the enclosing field. Expect an EOF or EOR char. state = ParseState.ENCLOSED_EXPECT_DELIMITER; } else { // this is a regular char, or an EOF / EOR inside an encloser. Add to // the current field string, and remain in this state. sb.append(curChar); } break; case UNENCLOSED_FIELD: if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.UNENCLOSED_ESCAPE; } else if (fieldDelim == curChar) { // we're at the end of this field; may be the start of another one. state = ParseState.FIELD_START; } else if (recordDelim == curChar) { pos = len; // terminate processing immediately. } else { // this is a regular char. Add to the current field string, // and remain in this state. sb.append(curChar); } break; case ENCLOSED_ESCAPE: // Treat this character literally, whatever it is, and return to // enclosed field processing. sb.append(curChar); state = ParseState.ENCLOSED_FIELD; break; case ENCLOSED_EXPECT_DELIMITER: // We were in an enclosed field, but got the final encloser. Now we // expect either an end-of-field or an end-of-record. if (fieldDelim == curChar) { // end of one field is the beginning of the next. state = ParseState.FIELD_START; } else if (recordDelim == curChar) { // stop processing. pos = len; } else { // Don't know what to do with this character. throw new com.cloudera.sqoop.lib.RecordParser.ParseError( "Expected delimiter at position " + pos); } break; case UNENCLOSED_ESCAPE: // Treat this character literally, whatever it is, and return to // non-enclosed field processing. sb.append(curChar); state = ParseState.UNENCLOSED_FIELD; break; default: throw new com.cloudera.sqoop.lib.RecordParser.ParseError("Unexpected parser state: " + state); } } if (state == ParseState.FIELD_START && curChar == fieldDelim) { // we hit an EOF/EOR as the last legal character and we need to mark // that string as recorded. This if block is outside the for-loop since // we don't have a physical 'epsilon' token in our string. if (null != sb) { outputs.add(sb.toString()); sb = new StringBuilder(); } } if (null != sb) { // There was a field that terminated by running out of chars or an EOR // character. Add to the list. outputs.add(sb.toString()); } return outputs; }
From source file:org.nuxeo.ecm.platform.filemanager.service.extension.NoteImporter.java
protected static String guessEncoding(Blob blob) throws IOException { // encoding already known? if (blob.getEncoding() != null) { return null; }/* ww w . ja va 2s . c om*/ // bad mime type? String mimeType = blob.getMimeType(); if (mimeType == null) { return null; } if (!mimeType.startsWith("text/") && !mimeType.startsWith("application/xhtml")) { // not a text file, we shouldn't be in the Note importer return null; } byte[] bytes = blob.getByteArray(); List<String> charsets = new ArrayList<>(Arrays.asList("utf-8", "iso-8859-1")); String CSEQ = "charset="; int i = mimeType.indexOf(CSEQ); if (i > 0) { // charset specified in MIME type String onlyMimeType = mimeType.substring(0, i).replace(";", "").trim(); blob.setMimeType(onlyMimeType); String charset = mimeType.substring(i + CSEQ.length()); i = charset.indexOf(";"); if (i > 0) { charset = charset.substring(0, i); } charset = charset.trim().replace("\"", ""); charsets.add(0, charset); } else { // charset detected from the actual bytes CharsetMatch charsetMatch = new CharsetDetector().setText(bytes).detect(); if (charsetMatch != null) { String charset = charsetMatch.getName(); charsets.add(0, charset); } } // now convert the string according to the charset, and fallback on others if not possible for (String charset : charsets) { try { Charset cs = Charset.forName(charset); CharsetDecoder d = cs.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); CharBuffer cb = d.decode(ByteBuffer.wrap(bytes)); if (cb.length() != 0 && cb.charAt(0) == '\ufeff') { // remove BOM cb = cb.subSequence(1, cb.length()); } return cb.toString(); } catch (IllegalArgumentException e) { // illegal charset } catch (CharacterCodingException e) { // could not decode } } // nothing worked, use platform return null; }
From source file:org.opencb.hpg.bigdata.core.io.VcfBlockIterator.java
public List<CharBuffer> next(long blockSize) { long cnt = 0L; List<CharBuffer> next = new LinkedList<>(); // linked list faster at creation time while (iter.hasNext() && cnt < blockSize) { String line = iter.next(); CharBuffer buff = CharBuffer.wrap(line.toCharArray()); //FIXME! Avoid char array copy next.add(buff);//from w ww . ja v a 2 s .co m cnt += buff.length(); } return next; }
From source file:org.opencms.i18n.CmsEncoder.java
/** * Encodes all characters that are contained in the String which can not displayed * in the given encodings charset with HTML entity references * like <code>&#8364;</code>.<p> * /* w ww. j a va2 s . c om*/ * This is required since a Java String is * internally always stored as Unicode, meaning it can contain almost every character, but * the HTML charset used might not support all such characters.<p> * * @param input the input to encode for HTML * @param encoding the charset to encode the result with * * @return the input with the encoded HTML entities * * @see #decodeHtmlEntities(String, String) */ public static String encodeHtmlEntities(String input, String encoding) { StringBuffer result = new StringBuffer(input.length() * 2); CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); Charset charset = Charset.forName(encoding); CharsetEncoder encoder = charset.newEncoder(); for (int i = 0; i < buffer.length(); i++) { int c = buffer.get(i); if (c < 128) { // first 128 chars are contained in almost every charset result.append((char) c); // this is intended as performance improvement since // the canEncode() operation appears quite CPU heavy } else if (encoder.canEncode((char) c)) { // encoder can encode this char result.append((char) c); } else { // append HTML entity reference result.append(ENTITY_PREFIX); result.append(c); result.append(";"); } } return result.toString(); }
From source file:org.opencms.i18n.CmsEncoder.java
/** * Encodes all characters that are contained in the String which can not displayed * in the given encodings charset with Java escaping like <code>\u20ac</code>.<p> * /* ww w . j a v a 2s . co m*/ * This can be used to escape values used in Java property files.<p> * * @param input the input to encode for Java * @param encoding the charset to encode the result with * * @return the input with the encoded Java entities */ public static String encodeJavaEntities(String input, String encoding) { StringBuffer result = new StringBuffer(input.length() * 2); CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); Charset charset = Charset.forName(encoding); CharsetEncoder encoder = charset.newEncoder(); for (int i = 0; i < buffer.length(); i++) { int c = buffer.get(i); if (c < 128) { // first 128 chars are contained in almost every charset result.append((char) c); // this is intended as performance improvement since // the canEncode() operation appears quite CPU heavy } else if (encoder.canEncode((char) c)) { // encoder can encode this char result.append((char) c); } else { // append Java entity reference result.append("\\u"); String hex = Integer.toHexString(c); int pad = 4 - hex.length(); for (int p = 0; p < pad; p++) { result.append('0'); } result.append(hex); } } return result.toString(); }