Example usage for java.nio.charset CodingErrorAction REPORT

Introduction

In this page you can find the example usage for java.nio.charset CodingErrorAction REPORT.

Prototype

CodingErrorAction REPORT

To view the source code for java.nio.charset CodingErrorAction REPORT.

Click Source Link

Document

Action indicating that a coding error is to be reported, either by returning a CoderResult object or by throwing a CharacterCodingException , whichever is appropriate for the method implementing the coding process.

Usage

From source file:com.github.rwitzel.streamflyer.experimental.bytestream.ByteStreamTest.java

private void assertOutputConversion_viaCharsetEncoder(String charsetName, boolean conversionErrorsExpected)
        throws Exception {

    // find charset
    Charset charset = Charset.forName(charsetName);

    // // configure decoder
    // CharsetDecoder decoder = charset.newDecoder();
    // decoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    // configure encoder
    CharsetEncoder encoder = charset.newEncoder();
    encoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    byte[] originalBytes = createBytes();
    boolean conversionErrorsFound;
    try {//from w ww .jav  a2s. com
        // byte array as byte stream
        ByteArrayOutputStream targetByteStream = new ByteArrayOutputStream();
        // byte stream as character stream
        Writer targetWriter = new OutputStreamWriter(targetByteStream, encoder);
        // modifying writer (we don't modify here)
        Writer modifyingWriter = new ModifyingWriter(targetWriter, new RegexModifier("a", 0, "a"));
        // character stream as byte stream
        OutputStream modifyingByteStream = new WriterOutputStream(modifyingWriter, charset); // encoder
                                                                                             // not
                                                                                             // supported
                                                                                             // here!!!
                                                                                             // byte stream as byte array
        IOUtils.write(originalBytes, modifyingByteStream);
        modifyingByteStream.close();

        assertBytes(originalBytes, targetByteStream.toByteArray(), conversionErrorsExpected);

        conversionErrorsFound = false;
    } catch (MalformedInputException e) {
        conversionErrorsFound = true;
    }
    assertEquals(conversionErrorsExpected, conversionErrorsFound);
}

From source file:FileUtil.java

/**
 *  Reads in file contents.//from   w  w  w . j  a  v  a 2s . com
 *  <P>
 *  This method is smart and falls back to ISO-8859-1 if the input stream does not
 *  seem to be in the specified encoding.
 *
 *  @param input The InputStream to read from.
 *  @param encoding The encoding to assume at first.
 *  @return A String, interpreted in the "encoding", or, if it fails, in Latin1.
 *  @throws IOException If the stream cannot be read or the stream cannot be
 *          decoded (even) in Latin1
 */
public static String readContents(InputStream input, String encoding) throws IOException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    FileUtil.copyContents(input, out);

    ByteBuffer bbuf = ByteBuffer.wrap(out.toByteArray());

    Charset cset = Charset.forName(encoding);
    CharsetDecoder csetdecoder = cset.newDecoder();

    csetdecoder.onMalformedInput(CodingErrorAction.REPORT);
    csetdecoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    try {
        CharBuffer cbuf = csetdecoder.decode(bbuf);

        return cbuf.toString();
    } catch (CharacterCodingException e) {
        Charset latin1 = Charset.forName("ISO-8859-1");
        CharsetDecoder l1decoder = latin1.newDecoder();

        l1decoder.onMalformedInput(CodingErrorAction.REPORT);
        l1decoder.onUnmappableCharacter(CodingErrorAction.REPORT);

        try {
            bbuf = ByteBuffer.wrap(out.toByteArray());

            CharBuffer cbuf = l1decoder.decode(bbuf);

            return cbuf.toString();
        } catch (CharacterCodingException ex) {
            throw (CharacterCodingException) ex.fillInStackTrace();
        }
    }
}

From source file:com.zimbra.cs.util.ZipUtil.java

private static CharsetDecoder reportingDecoder(Charset cset) {
    return cset.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);
}

From source file:com.bah.culvert.data.index.Index.java

/**
 * Used to set a key indicating if the string value held by another
 * configuration key is a base64 encoded binary or not.
 * @param isValueBinaryEncodedSetting The key telling weather or not the other
 *        key (setting) is base64.//from  w w w  .  ja v a2  s  .c  om
 * @param potentiallyEncodedSetting The actual key that might be base64
 *        encoded.
 * @param data The data to set as base64.
 * @param conf The configuration to do the setting on.
 */
private static void setBinaryConfSetting(String isValueBinaryEncodedSetting, String potentiallyEncodedSetting,
        byte[] data, Configuration conf) {
    CharsetDecoder decoder = UTF_8.newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    try {
        CharBuffer colFamString = decoder.decode(ByteBuffer.wrap(data));
        conf.setBoolean(isValueBinaryEncodedSetting, false);
        conf.set(potentiallyEncodedSetting, colFamString.toString());
    } catch (CharacterCodingException e) {
        conf.setBoolean(isValueBinaryEncodedSetting, true);
        conf.set(potentiallyEncodedSetting, new String(Base64.encodeBase64(data), UTF_8));
    }
}

From source file:com.github.rwitzel.streamflyer.experimental.bytestream.ByteStreamTest.java

private void assertInputConversion_viaCharsetDecoder(String charsetName, boolean conversionErrorsExpected)
        throws Exception {

    // find charset
    Charset charset = Charset.forName(charsetName);

    // configure decoder
    CharsetDecoder decoder = charset.newDecoder();
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    // // configure encoder
    // CharsetEncoder encoder = charset.newEncoder();
    // encoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    byte[] originalBytes = createBytes();

    boolean conversionErrorsFound;
    try {/*from w ww .j  a  v a  2s .com*/
        // byte array as byte stream
        InputStream originalByteStream = new ByteArrayInputStream(originalBytes);
        // byte stream as character stream
        Reader originalReader = new InputStreamReader(originalByteStream, decoder);
        // modifying reader (we don't modify anything here)
        Reader modifyingReader = new ModifyingReader(originalReader, new RegexModifier("a", 0, "a"));
        // character stream as byte stream
        InputStream modifyingByteStream = new ReaderInputStream(modifyingReader, charset); // encoder
                                                                                           // not
                                                                                           // supported
                                                                                           // byte stream as byte array
        byte[] modifiedBytes = IOUtils.toByteArray(modifyingByteStream);

        assertBytes(originalBytes, modifiedBytes, conversionErrorsExpected);

        conversionErrorsFound = false;
    } catch (MalformedInputException e) {
        conversionErrorsFound = true;
    }
    assertEquals(conversionErrorsExpected, conversionErrorsFound);
}

From source file:com.microsoft.tfs.core.util.FileEncodingDetector.java

/**
 * Tests whether the given byte array looks like an ANSI text file with the
 * default text encoding, i.e. can be decoded with the current ANSI
 * character set. In multi-byte character sets (like Japanese, for example)
 * the entire byte array might not be converted entirely, because at the end
 * of array it might contain a broken multi-byte character. We still accept
 * this kind of files as ANSI ones if the not converted reminder of the
 * array is short enough.// ww w.j a va  2s.  c om
 *
 * @param bytes
 *        the bytes to check for ANSI-ness (must not be <code>null</code>)
 * @param limit
 *        the maximum number of bytes to read.
 * @return true if the given bytes look like part of an ANSI text file,
 *         false if they do not (because they contain control characters or
 *         other patterns).
 */
protected static boolean looksLikeANSI(final byte[] bytes, final int limit) {
    final Charset charSet = CodePageMapping.getCharset(FileEncoding.getDefaultTextEncoding().getCodePage());

    final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes, 0, limit);
    final CharBuffer charBuffer = CharBuffer.allocate(limit);

    final CharsetDecoder decoder = charSet.newDecoder();
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    decoder.onMalformedInput(CodingErrorAction.REPORT);

    final CoderResult rc = decoder.decode(byteBuffer, charBuffer, true);

    if (!rc.isError()) {
        return true;
    } else {
        return byteBuffer.position() > limit - 5;
    }
}

From source file:org.apache.arrow.vector.util.Text.java

private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException {
    CharsetDecoder decoder = DECODER_FACTORY.get();
    if (replace) {
        decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }// w  w w  .  j a  va 2  s. co  m
    String str = decoder.decode(utf8).toString();
    // set decoder back to its default value: REPORT
    if (replace) {
        decoder.onMalformedInput(CodingErrorAction.REPORT);
        decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return str;
}

From source file:org.apache.arrow.vector.util.Text.java

/**
 * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed
 * input is replaced with the substitution character, which is U+FFFD. Otherwise the method throws a
 * MalformedInputException.//from   w w  w .j  a  v  a2 s .  co  m
 *
 * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
 */
public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException {
    CharsetEncoder encoder = ENCODER_FACTORY.get();
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPLACE);
        encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
    if (replace) {
        encoder.onMalformedInput(CodingErrorAction.REPORT);
        encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    }
    return bytes;
}

From source file:com.albert.util.StringUtilCommon.java

public static boolean isISO88593(String v) {
    if (v == null || v.length() == 0) {
        return true;
    }//w  w w  .jav a2 s.  co  m
    CharsetEncoder d = Charset.forName("ISO-8859-3").newEncoder();
    d.onMalformedInput(CodingErrorAction.REPORT);
    d.onUnmappableCharacter(CodingErrorAction.REPORT);
    try {
        ByteBuffer bb = d.encode(CharBuffer.wrap(v.toCharArray()));
        bb.toString();
    } catch (CharacterCodingException e) {
        return false;
    }
    return true;
}