Example usage for java.nio CharBuffer length

Introduction

In this page you can find the example usage for java.nio CharBuffer length.

Prototype

public final int length()

Source Link

Document

Returns the number of remaining chars.

Usage

From source file:Main.java

public static void main(String[] args) {
    CharBuffer cb1 = CharBuffer.allocate(5);
    cb1.put(2, 'j');
    cb1.rewind();/*from ww  w. ja  va2s.  com*/

    System.out.println(cb1.length());
}

From source file:MainClass.java

public static void main(String[] args) {
    String[] phrases = { "A", "B 1", "C 1.3" };
    String dirname = "C:/test";
    String filename = "Phrases.txt";
    File dir = new File(dirname);
    File aFile = new File(dir, filename);
    FileOutputStream outputFile = null;
    try {//from   www  .  j  a v  a2  s  .co  m
        outputFile = new FileOutputStream(aFile, true);
    } catch (FileNotFoundException e) {
        e.printStackTrace(System.err);
    }
    FileChannel outChannel = outputFile.getChannel();
    ByteBuffer buf = ByteBuffer.allocate(1024);
    System.out.println(buf.position());
    System.out.println(buf.limit());
    System.out.println(buf.capacity());
    CharBuffer charBuf = buf.asCharBuffer();
    System.out.println(charBuf.position());
    System.out.println(charBuf.limit());
    System.out.println(charBuf.capacity());
    Formatter formatter = new Formatter(charBuf);
    int number = 0;
    for (String phrase : phrases) {
        formatter.format("%n %s", ++number, phrase);
        System.out.println(charBuf.position());
        System.out.println(charBuf.limit());
        System.out.println(charBuf.capacity());
        charBuf.flip();
        System.out.println(charBuf.position());
        System.out.println(charBuf.limit());
        System.out.println(charBuf.length());
        buf.limit(2 * charBuf.length()); // Set byte buffer limit
        System.out.println(buf.position());
        System.out.println(buf.limit());
        System.out.println(buf.remaining());
        try {
            outChannel.write(buf);
            buf.clear();
            charBuf.clear();
        } catch (IOException e) {
            e.printStackTrace(System.err);
        }
    }
    try {
        outputFile.close();
    } catch (IOException e) {
        e.printStackTrace(System.err);
    }
}

From source file:Main.java

private static String decode(Charset charset, byte[] b) {
    if (b == null) {
        return null;
    }//  w  w  w  .java 2  s  . c o  m
    final CharBuffer cb = charset.decode(ByteBuffer.wrap(b));
    return new String(cb.array(), 0, cb.length());
}

From source file:eu.stratosphere.types.StringValue.java

/**
 * Sets the contents of this string to the contents of the given <tt>CharBuffer</tt>.
 * The characters between the buffer's current position (inclusive) and the buffer's
 * limit (exclusive) will be stored in this string.
 *  //from  w ww .  j a  v a  2  s.  co m
 * @param buffer The character buffer to read the characters from.
 */
public void setValue(CharBuffer buffer) {
    Validate.notNull(buffer);
    final int len = buffer.length();
    ensureSize(len);
    buffer.get(this.value, 0, len);
    this.len = len;
    this.hashCode = 0;
}

From source file:com.cloudera.sqoop.lib.RecordParser.java

/**
 * Return a list of strings representing the fields of the input line.
 * This list is backed by an internal buffer which is cleared by the
 * next call to parseRecord()./*from w  w w.j a v  a2 s  .  co  m*/
 */
public List<String> parseRecord(CharBuffer input) throws ParseError {
    if (null == input) {
        throw new ParseError("null input string");
    }

    /*
      This method implements the following state machine to perform
      parsing.
            
      Note that there are no restrictions on whether particular characters
      (e.g., field-sep, record-sep, etc) are distinct or the same. The
      state transitions are processed in the order seen in this comment.
            
      Starting state is FIELD_START
        encloser -> ENCLOSED_FIELD
        escape char -> UNENCLOSED_ESCAPE
        field delim -> FIELD_START (for a new field)
        record delim -> stops processing
        all other letters get added to current field, -> UNENCLOSED FIELD
            
      ENCLOSED_FIELD state:
        escape char goes to ENCLOSED_ESCAPE
        encloser goes to ENCLOSED_EXPECT_DELIMITER
        field sep or record sep gets added to the current string
        normal letters get added to the current string
            
      ENCLOSED_ESCAPE state:
        any character seen here is added literally, back to ENCLOSED_FIELD
            
      ENCLOSED_EXPECT_DELIMITER state:
        field sep goes to FIELD_START
        record sep halts processing.
        all other characters are errors.
            
      UNENCLOSED_FIELD state:
        ESCAPE char goes to UNENCLOSED_ESCAPE
        FIELD_SEP char goes to FIELD_START
        RECORD_SEP char halts processing
        normal chars or the enclosing char get added to the current string
            
      UNENCLOSED_ESCAPE:
        add charater literal to current string, return to UNENCLOSED_FIELD
    */

    char curChar = DelimiterSet.NULL_CHAR;
    ParseState state = ParseState.FIELD_START;
    int len = input.length();
    StringBuilder sb = null;

    outputs.clear();

    char enclosingChar = delimiters.getEnclosedBy();
    char fieldDelim = delimiters.getFieldsTerminatedBy();
    char recordDelim = delimiters.getLinesTerminatedBy();
    char escapeChar = delimiters.getEscapedBy();
    boolean enclosingRequired = delimiters.isEncloseRequired();

    for (int pos = 0; pos < len; pos++) {
        curChar = input.get();
        switch (state) {
        case FIELD_START:
            // ready to start processing a new field.
            if (null != sb) {
                // We finished processing a previous field. Add to the list.
                outputs.add(sb.toString());
            }

            sb = new StringBuilder();
            if (enclosingChar == curChar) {
                // got an opening encloser.
                state = ParseState.ENCLOSED_FIELD;
            } else if (escapeChar == curChar) {
                state = ParseState.UNENCLOSED_ESCAPE;
            } else if (fieldDelim == curChar) {
                // we have a zero-length field. This is a no-op.
                continue;
            } else if (recordDelim == curChar) {
                // we have a zero-length field, that ends processing.
                pos = len;
            } else {
                // current char is part of the field.
                state = ParseState.UNENCLOSED_FIELD;
                sb.append(curChar);

                if (enclosingRequired) {
                    throw new ParseError("Opening field-encloser expected at position " + pos);
                }
            }

            break;

        case ENCLOSED_FIELD:
            if (escapeChar == curChar) {
                // the next character is escaped. Treat it literally.
                state = ParseState.ENCLOSED_ESCAPE;
            } else if (enclosingChar == curChar) {
                // we're at the end of the enclosing field. Expect an EOF or EOR char.
                state = ParseState.ENCLOSED_EXPECT_DELIMITER;
            } else {
                // this is a regular char, or an EOF / EOR inside an encloser. Add to
                // the current field string, and remain in this state.
                sb.append(curChar);
            }

            break;

        case UNENCLOSED_FIELD:
            if (escapeChar == curChar) {
                // the next character is escaped. Treat it literally.
                state = ParseState.UNENCLOSED_ESCAPE;
            } else if (fieldDelim == curChar) {
                // we're at the end of this field; may be the start of another one.
                state = ParseState.FIELD_START;
            } else if (recordDelim == curChar) {
                pos = len; // terminate processing immediately.
            } else {
                // this is a regular char. Add to the current field string,
                // and remain in this state.
                sb.append(curChar);
            }

            break;

        case ENCLOSED_ESCAPE:
            // Treat this character literally, whatever it is, and return to
            // enclosed field processing.
            sb.append(curChar);
            state = ParseState.ENCLOSED_FIELD;
            break;

        case ENCLOSED_EXPECT_DELIMITER:
            // We were in an enclosed field, but got the final encloser. Now we
            // expect either an end-of-field or an end-of-record.
            if (fieldDelim == curChar) {
                // end of one field is the beginning of the next.
                state = ParseState.FIELD_START;
            } else if (recordDelim == curChar) {
                // stop processing.
                pos = len;
            } else {
                // Don't know what to do with this character.
                throw new ParseError("Expected delimiter at position " + pos);
            }

            break;

        case UNENCLOSED_ESCAPE:
            // Treat this character literally, whatever it is, and return to
            // non-enclosed field processing.
            sb.append(curChar);
            state = ParseState.UNENCLOSED_FIELD;
            break;

        default:
            throw new ParseError("Unexpected parser state: " + state);
        }
    }

    if (state == ParseState.FIELD_START && curChar == fieldDelim) {
        // we hit an EOF/EOR as the last legal character and we need to mark
        // that string as recorded. This if block is outside the for-loop since
        // we don't have a physical 'epsilon' token in our string.
        if (null != sb) {
            outputs.add(sb.toString());
            sb = new StringBuilder();
        }
    }

    if (null != sb) {
        // There was a field that terminated by running out of chars or an EOR
        // character. Add to the list.
        outputs.add(sb.toString());
    }

    return outputs;
}

From source file:org.apache.sqoop.lib.RecordParser.java

/**
 * Return a list of strings representing the fields of the input line.
 * This list is backed by an internal buffer which is cleared by the
 * next call to parseRecord().//w  w w. j  a va2 s  .  c om
 */
public List<String> parseRecord(CharBuffer input) throws com.cloudera.sqoop.lib.RecordParser.ParseError {
    if (null == input) {
        throw new com.cloudera.sqoop.lib.RecordParser.ParseError("null input string");
    }

    /*
      This method implements the following state machine to perform
      parsing.
            
      Note that there are no restrictions on whether particular characters
      (e.g., field-sep, record-sep, etc) are distinct or the same. The
      state transitions are processed in the order seen in this comment.
            
      Starting state is FIELD_START
        encloser -> ENCLOSED_FIELD
        escape char -> UNENCLOSED_ESCAPE
        field delim -> FIELD_START (for a new field)
        record delim -> stops processing
        all other letters get added to current field, -> UNENCLOSED FIELD
            
      ENCLOSED_FIELD state:
        escape char goes to ENCLOSED_ESCAPE
        encloser goes to ENCLOSED_EXPECT_DELIMITER
        field sep or record sep gets added to the current string
        normal letters get added to the current string
            
      ENCLOSED_ESCAPE state:
        any character seen here is added literally, back to ENCLOSED_FIELD
            
      ENCLOSED_EXPECT_DELIMITER state:
        field sep goes to FIELD_START
        record sep halts processing.
        all other characters are errors.
            
      UNENCLOSED_FIELD state:
        ESCAPE char goes to UNENCLOSED_ESCAPE
        FIELD_SEP char goes to FIELD_START
        RECORD_SEP char halts processing
        normal chars or the enclosing char get added to the current string
            
      UNENCLOSED_ESCAPE:
        add charater literal to current string, return to UNENCLOSED_FIELD
    */

    char curChar = com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR;
    ParseState state = ParseState.FIELD_START;
    int len = input.length();
    StringBuilder sb = null;

    outputs.clear();

    char enclosingChar = delimiters.getEnclosedBy();
    char fieldDelim = delimiters.getFieldsTerminatedBy();
    char recordDelim = delimiters.getLinesTerminatedBy();
    char escapeChar = delimiters.getEscapedBy();
    boolean enclosingRequired = delimiters.isEncloseRequired();

    for (int pos = 0; pos < len; pos++) {
        curChar = input.get();
        switch (state) {
        case FIELD_START:
            // ready to start processing a new field.
            if (null != sb) {
                // We finished processing a previous field. Add to the list.
                outputs.add(sb.toString());
            }

            sb = new StringBuilder();
            if (enclosingChar == curChar) {
                // got an opening encloser.
                state = ParseState.ENCLOSED_FIELD;
            } else if (escapeChar == curChar) {
                state = ParseState.UNENCLOSED_ESCAPE;
            } else if (fieldDelim == curChar) {
                // we have a zero-length field. This is a no-op.
                continue;
            } else if (recordDelim == curChar) {
                // we have a zero-length field, that ends processing.
                pos = len;
            } else {
                // current char is part of the field.
                state = ParseState.UNENCLOSED_FIELD;
                sb.append(curChar);

                if (enclosingRequired) {
                    throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
                            "Opening field-encloser expected at position " + pos);
                }
            }

            break;

        case ENCLOSED_FIELD:
            if (escapeChar == curChar) {
                // the next character is escaped. Treat it literally.
                state = ParseState.ENCLOSED_ESCAPE;
            } else if (enclosingChar == curChar) {
                // we're at the end of the enclosing field. Expect an EOF or EOR char.
                state = ParseState.ENCLOSED_EXPECT_DELIMITER;
            } else {
                // this is a regular char, or an EOF / EOR inside an encloser. Add to
                // the current field string, and remain in this state.
                sb.append(curChar);
            }

            break;

        case UNENCLOSED_FIELD:
            if (escapeChar == curChar) {
                // the next character is escaped. Treat it literally.
                state = ParseState.UNENCLOSED_ESCAPE;
            } else if (fieldDelim == curChar) {
                // we're at the end of this field; may be the start of another one.
                state = ParseState.FIELD_START;
            } else if (recordDelim == curChar) {
                pos = len; // terminate processing immediately.
            } else {
                // this is a regular char. Add to the current field string,
                // and remain in this state.
                sb.append(curChar);
            }

            break;

        case ENCLOSED_ESCAPE:
            // Treat this character literally, whatever it is, and return to
            // enclosed field processing.
            sb.append(curChar);
            state = ParseState.ENCLOSED_FIELD;
            break;

        case ENCLOSED_EXPECT_DELIMITER:
            // We were in an enclosed field, but got the final encloser. Now we
            // expect either an end-of-field or an end-of-record.
            if (fieldDelim == curChar) {
                // end of one field is the beginning of the next.
                state = ParseState.FIELD_START;
            } else if (recordDelim == curChar) {
                // stop processing.
                pos = len;
            } else {
                // Don't know what to do with this character.
                throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
                        "Expected delimiter at position " + pos);
            }

            break;

        case UNENCLOSED_ESCAPE:
            // Treat this character literally, whatever it is, and return to
            // non-enclosed field processing.
            sb.append(curChar);
            state = ParseState.UNENCLOSED_FIELD;
            break;

        default:
            throw new com.cloudera.sqoop.lib.RecordParser.ParseError("Unexpected parser state: " + state);
        }
    }

    if (state == ParseState.FIELD_START && curChar == fieldDelim) {
        // we hit an EOF/EOR as the last legal character and we need to mark
        // that string as recorded. This if block is outside the for-loop since
        // we don't have a physical 'epsilon' token in our string.
        if (null != sb) {
            outputs.add(sb.toString());
            sb = new StringBuilder();
        }
    }

    if (null != sb) {
        // There was a field that terminated by running out of chars or an EOR
        // character. Add to the list.
        outputs.add(sb.toString());
    }

    return outputs;
}

From source file:org.nuxeo.ecm.platform.filemanager.service.extension.NoteImporter.java

protected static String guessEncoding(Blob blob) throws IOException {
    // encoding already known?
    if (blob.getEncoding() != null) {
        return null;
    }/* ww w  . ja  va 2s  .  c  om*/

    // bad mime type?
    String mimeType = blob.getMimeType();
    if (mimeType == null) {
        return null;
    }
    if (!mimeType.startsWith("text/") && !mimeType.startsWith("application/xhtml")) {
        // not a text file, we shouldn't be in the Note importer
        return null;
    }

    byte[] bytes = blob.getByteArray();

    List<String> charsets = new ArrayList<>(Arrays.asList("utf-8", "iso-8859-1"));

    String CSEQ = "charset=";
    int i = mimeType.indexOf(CSEQ);
    if (i > 0) {
        // charset specified in MIME type
        String onlyMimeType = mimeType.substring(0, i).replace(";", "").trim();
        blob.setMimeType(onlyMimeType);
        String charset = mimeType.substring(i + CSEQ.length());
        i = charset.indexOf(";");
        if (i > 0) {
            charset = charset.substring(0, i);
        }
        charset = charset.trim().replace("\"", "");
        charsets.add(0, charset);
    } else {
        // charset detected from the actual bytes
        CharsetMatch charsetMatch = new CharsetDetector().setText(bytes).detect();
        if (charsetMatch != null) {
            String charset = charsetMatch.getName();
            charsets.add(0, charset);
        }
    }

    // now convert the string according to the charset, and fallback on others if not possible
    for (String charset : charsets) {
        try {
            Charset cs = Charset.forName(charset);
            CharsetDecoder d = cs.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
                    .onUnmappableCharacter(CodingErrorAction.REPORT);
            CharBuffer cb = d.decode(ByteBuffer.wrap(bytes));
            if (cb.length() != 0 && cb.charAt(0) == '\ufeff') {
                // remove BOM
                cb = cb.subSequence(1, cb.length());
            }
            return cb.toString();
        } catch (IllegalArgumentException e) {
            // illegal charset
        } catch (CharacterCodingException e) {
            // could not decode
        }
    }
    // nothing worked, use platform
    return null;
}

From source file:org.opencb.hpg.bigdata.core.io.VcfBlockIterator.java

public List<CharBuffer> next(long blockSize) {
    long cnt = 0L;
    List<CharBuffer> next = new LinkedList<>(); // linked list faster at creation time
    while (iter.hasNext() && cnt < blockSize) {
        String line = iter.next();
        CharBuffer buff = CharBuffer.wrap(line.toCharArray()); //FIXME! Avoid char array copy
        next.add(buff);//from w ww  . ja v a 2  s .co m
        cnt += buff.length();
    }
    return next;
}

From source file:org.opencms.i18n.CmsEncoder.java

/**
 * Encodes all characters that are contained in the String which can not displayed 
 * in the given encodings charset with HTML entity references
 * like <code>&amp;#8364;</code>.<p>
 * /*  w  ww.  j  a va2  s  . c  om*/
 * This is required since a Java String is 
 * internally always stored as Unicode, meaning it can contain almost every character, but 
 * the HTML charset used might not support all such characters.<p>
 * 
 * @param input the input to encode for HTML
 * @param encoding the charset to encode the result with
 * 
 * @return the input with the encoded HTML entities
 * 
 * @see #decodeHtmlEntities(String, String)
 */
public static String encodeHtmlEntities(String input, String encoding) {

    StringBuffer result = new StringBuffer(input.length() * 2);
    CharBuffer buffer = CharBuffer.wrap(input.toCharArray());
    Charset charset = Charset.forName(encoding);
    CharsetEncoder encoder = charset.newEncoder();
    for (int i = 0; i < buffer.length(); i++) {
        int c = buffer.get(i);
        if (c < 128) {
            // first 128 chars are contained in almost every charset
            result.append((char) c);
            // this is intended as performance improvement since 
            // the canEncode() operation appears quite CPU heavy
        } else if (encoder.canEncode((char) c)) {
            // encoder can encode this char
            result.append((char) c);
        } else {
            // append HTML entity reference
            result.append(ENTITY_PREFIX);
            result.append(c);
            result.append(";");
        }
    }
    return result.toString();
}

From source file:org.opencms.i18n.CmsEncoder.java

/**
 * Encodes all characters that are contained in the String which can not displayed 
 * in the given encodings charset with Java escaping like <code>\u20ac</code>.<p>
 * /*  ww w  .  j  a  v  a 2s .  co m*/
 * This can be used to escape values used in Java property files.<p>
 * 
 * @param input the input to encode for Java
 * @param encoding the charset to encode the result with
 * 
 * @return the input with the encoded Java entities
 */
public static String encodeJavaEntities(String input, String encoding) {

    StringBuffer result = new StringBuffer(input.length() * 2);
    CharBuffer buffer = CharBuffer.wrap(input.toCharArray());
    Charset charset = Charset.forName(encoding);
    CharsetEncoder encoder = charset.newEncoder();
    for (int i = 0; i < buffer.length(); i++) {
        int c = buffer.get(i);
        if (c < 128) {
            // first 128 chars are contained in almost every charset
            result.append((char) c);
            // this is intended as performance improvement since 
            // the canEncode() operation appears quite CPU heavy
        } else if (encoder.canEncode((char) c)) {
            // encoder can encode this char
            result.append((char) c);
        } else {
            // append Java entity reference
            result.append("\\u");
            String hex = Integer.toHexString(c);
            int pad = 4 - hex.length();
            for (int p = 0; p < pad; p++) {
                result.append('0');
            }
            result.append(hex);
        }
    }
    return result.toString();
}