List of usage examples for javax.mail.internet MimeUtility getEncoding
public static String getEncoding(DataHandler dh)
getEncoding(DataSource)
except that instead of reading the data from an InputStream
it uses the writeTo
method to examine the data. From source file:org.nuxeo.ecm.platform.mail.listener.action.ExtractMessageInformationAction.java
/** * Interprets the body accordingly to the charset used. It relies on the content type being * ****;charset={charset};******//from w w w .j a v a2 s . c o m * * @return the decoded String */ protected static String decodeMailBody(Part part) throws MessagingException, IOException { String encoding = null; // try to get encoding from header rather than from Stream ! // unfortunately, this does not seem to be reliable ... /* * String[] cteHeader = part.getHeader("Content-Transfer-Encoding"); if (cteHeader!=null && cteHeader.length>0) * { encoding = cteHeader[0].toLowerCase(); } */ // fall back to default sniffing // that will actually read the stream from server if (encoding == null) { encoding = MimeUtility.getEncoding(part.getDataHandler()); } InputStream is = null; try { is = MimeUtility.decode(part.getInputStream(), encoding); } catch (IOException ex) { log.error("Unable to read content", ex); return ""; } String contType = part.getContentType(); final String charsetIdentifier = "charset="; final String ISO88591 = "iso-8859-1"; final String WINDOWS1252 = "windows-1252"; int offset = contType.indexOf(charsetIdentifier); String charset = ""; if (offset >= 0) { charset = contType.substring(offset + charsetIdentifier.length()); offset = charset.indexOf(";"); if (offset > 0) { charset = charset.substring(0, offset); } } // Charset could be like "utf-8" or utf-8 if (!"".equals(charset)) { charset = charset.replaceAll("\"", ""); } log.debug("Content type: " + contType + "; charset: " + charset); if (charset.equalsIgnoreCase(ISO88591)) { // see // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#character1 // for more details see http://en.wikipedia.org/wiki/ISO_8859-1 // section "ISO-8859-1 and Windows-1252 confusion" charset = WINDOWS1252; log.debug("Using replacing charset: " + charset); } String ret; byte[] streamContent = FileUtils.readBytes(is); if ("".equals(charset)) { ret = new String(streamContent); } else { try { ret = new String(streamContent, charset); } catch (UnsupportedEncodingException e) { // try without encoding ret = new String(streamContent); } } return ret; }