List of usage examples for java.nio.charset CodingErrorAction REPORT
CodingErrorAction REPORT
To view the source code for java.nio.charset CodingErrorAction REPORT.
Click Source Link
From source file:com.github.rwitzel.streamflyer.experimental.bytestream.ByteStreamTest.java
private void assertOutputConversion_viaCharsetEncoder(String charsetName, boolean conversionErrorsExpected) throws Exception { // find charset Charset charset = Charset.forName(charsetName); // // configure decoder // CharsetDecoder decoder = charset.newDecoder(); // decoder.onUnmappableCharacter(CodingErrorAction.REPORT); // configure encoder CharsetEncoder encoder = charset.newEncoder(); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); byte[] originalBytes = createBytes(); boolean conversionErrorsFound; try {//from w ww .jav a2s. com // byte array as byte stream ByteArrayOutputStream targetByteStream = new ByteArrayOutputStream(); // byte stream as character stream Writer targetWriter = new OutputStreamWriter(targetByteStream, encoder); // modifying writer (we don't modify here) Writer modifyingWriter = new ModifyingWriter(targetWriter, new RegexModifier("a", 0, "a")); // character stream as byte stream OutputStream modifyingByteStream = new WriterOutputStream(modifyingWriter, charset); // encoder // not // supported // here!!! // byte stream as byte array IOUtils.write(originalBytes, modifyingByteStream); modifyingByteStream.close(); assertBytes(originalBytes, targetByteStream.toByteArray(), conversionErrorsExpected); conversionErrorsFound = false; } catch (MalformedInputException e) { conversionErrorsFound = true; } assertEquals(conversionErrorsExpected, conversionErrorsFound); }
From source file:FileUtil.java
/** * Reads in file contents.//from w w w . j a v a 2s . com * <P> * This method is smart and falls back to ISO-8859-1 if the input stream does not * seem to be in the specified encoding. * * @param input The InputStream to read from. * @param encoding The encoding to assume at first. * @return A String, interpreted in the "encoding", or, if it fails, in Latin1. * @throws IOException If the stream cannot be read or the stream cannot be * decoded (even) in Latin1 */ public static String readContents(InputStream input, String encoding) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); FileUtil.copyContents(input, out); ByteBuffer bbuf = ByteBuffer.wrap(out.toByteArray()); Charset cset = Charset.forName(encoding); CharsetDecoder csetdecoder = cset.newDecoder(); csetdecoder.onMalformedInput(CodingErrorAction.REPORT); csetdecoder.onUnmappableCharacter(CodingErrorAction.REPORT); try { CharBuffer cbuf = csetdecoder.decode(bbuf); return cbuf.toString(); } catch (CharacterCodingException e) { Charset latin1 = Charset.forName("ISO-8859-1"); CharsetDecoder l1decoder = latin1.newDecoder(); l1decoder.onMalformedInput(CodingErrorAction.REPORT); l1decoder.onUnmappableCharacter(CodingErrorAction.REPORT); try { bbuf = ByteBuffer.wrap(out.toByteArray()); CharBuffer cbuf = l1decoder.decode(bbuf); return cbuf.toString(); } catch (CharacterCodingException ex) { throw (CharacterCodingException) ex.fillInStackTrace(); } } }
From source file:com.zimbra.cs.util.ZipUtil.java
private static CharsetDecoder reportingDecoder(Charset cset) { return cset.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); }
From source file:com.bah.culvert.data.index.Index.java
/** * Used to set a key indicating if the string value held by another * configuration key is a base64 encoded binary or not. * @param isValueBinaryEncodedSetting The key telling weather or not the other * key (setting) is base64.//from w w w . ja v a2 s .c om * @param potentiallyEncodedSetting The actual key that might be base64 * encoded. * @param data The data to set as base64. * @param conf The configuration to do the setting on. */ private static void setBinaryConfSetting(String isValueBinaryEncodedSetting, String potentiallyEncodedSetting, byte[] data, Configuration conf) { CharsetDecoder decoder = UTF_8.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); try { CharBuffer colFamString = decoder.decode(ByteBuffer.wrap(data)); conf.setBoolean(isValueBinaryEncodedSetting, false); conf.set(potentiallyEncodedSetting, colFamString.toString()); } catch (CharacterCodingException e) { conf.setBoolean(isValueBinaryEncodedSetting, true); conf.set(potentiallyEncodedSetting, new String(Base64.encodeBase64(data), UTF_8)); } }
From source file:com.github.rwitzel.streamflyer.experimental.bytestream.ByteStreamTest.java
private void assertInputConversion_viaCharsetDecoder(String charsetName, boolean conversionErrorsExpected) throws Exception { // find charset Charset charset = Charset.forName(charsetName); // configure decoder CharsetDecoder decoder = charset.newDecoder(); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); // // configure encoder // CharsetEncoder encoder = charset.newEncoder(); // encoder.onUnmappableCharacter(CodingErrorAction.REPORT); byte[] originalBytes = createBytes(); boolean conversionErrorsFound; try {/*from w ww .j a v a 2s .com*/ // byte array as byte stream InputStream originalByteStream = new ByteArrayInputStream(originalBytes); // byte stream as character stream Reader originalReader = new InputStreamReader(originalByteStream, decoder); // modifying reader (we don't modify anything here) Reader modifyingReader = new ModifyingReader(originalReader, new RegexModifier("a", 0, "a")); // character stream as byte stream InputStream modifyingByteStream = new ReaderInputStream(modifyingReader, charset); // encoder // not // supported // byte stream as byte array byte[] modifiedBytes = IOUtils.toByteArray(modifyingByteStream); assertBytes(originalBytes, modifiedBytes, conversionErrorsExpected); conversionErrorsFound = false; } catch (MalformedInputException e) { conversionErrorsFound = true; } assertEquals(conversionErrorsExpected, conversionErrorsFound); }
From source file:com.microsoft.tfs.core.util.FileEncodingDetector.java
/** * Tests whether the given byte array looks like an ANSI text file with the * default text encoding, i.e. can be decoded with the current ANSI * character set. In multi-byte character sets (like Japanese, for example) * the entire byte array might not be converted entirely, because at the end * of array it might contain a broken multi-byte character. We still accept * this kind of files as ANSI ones if the not converted reminder of the * array is short enough.// ww w.j a va 2s. c om * * @param bytes * the bytes to check for ANSI-ness (must not be <code>null</code>) * @param limit * the maximum number of bytes to read. * @return true if the given bytes look like part of an ANSI text file, * false if they do not (because they contain control characters or * other patterns). */ protected static boolean looksLikeANSI(final byte[] bytes, final int limit) { final Charset charSet = CodePageMapping.getCharset(FileEncoding.getDefaultTextEncoding().getCodePage()); final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes, 0, limit); final CharBuffer charBuffer = CharBuffer.allocate(limit); final CharsetDecoder decoder = charSet.newDecoder(); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); decoder.onMalformedInput(CodingErrorAction.REPORT); final CoderResult rc = decoder.decode(byteBuffer, charBuffer, true); if (!rc.isError()) { return true; } else { return byteBuffer.position() > limit - 5; } }
From source file:org.apache.arrow.vector.util.Text.java
private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }// w w w . j a va 2 s. co m String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
From source file:org.apache.arrow.vector.util.Text.java
/** * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is true, then malformed * input is replaced with the substitution character, which is U+FFFD. Otherwise the method throws a * MalformedInputException.//from w w w .j a v a2 s . co m * * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit() */ public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { CharsetEncoder encoder = ENCODER_FACTORY.get(); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray())); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPORT); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return bytes; }
From source file:com.albert.util.StringUtilCommon.java
public static boolean isISO88593(String v) { if (v == null || v.length() == 0) { return true; }//w w w .jav a2 s. co m CharsetEncoder d = Charset.forName("ISO-8859-3").newEncoder(); d.onMalformedInput(CodingErrorAction.REPORT); d.onUnmappableCharacter(CodingErrorAction.REPORT); try { ByteBuffer bb = d.encode(CharBuffer.wrap(v.toCharArray())); bb.toString(); } catch (CharacterCodingException e) { return false; } return true; }