Java tutorial
/* Copyright (C) 2005-2007 Jamie Angus Band * MailArchiva Open Source Edition Copyright (c) 2005-2007 Jamie Angus Band * This program is free software; you can redistribute it and/or modify it under the terms of * the GNU General Public License as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with this program; * if not, see http://www.gnu.org/licenses or write to the Free Software Foundation,Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package com.stimulus.archiva.extraction; import net.freeutils.tnef.mime.TNEFMime; import java.io.*; import java.text.DecimalFormat; import java.util.*; import javax.mail.MessagingException; import javax.mail.Multipart; import javax.mail.Part; import javax.mail.internet.MimeBodyPart; import javax.mail.internet.MimeUtility; import org.apache.commons.logging.*; import com.stimulus.archiva.domain.Config; import com.stimulus.archiva.domain.Email; import com.stimulus.archiva.domain.EmailID; import com.stimulus.archiva.exception.MessageExtractionException; import com.stimulus.util.DecodingUtil; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.stimulus.util.*; public class MessageExtraction implements Serializable { /** * */ private static final long serialVersionUID = 5021107690811863057L; protected static final Log logger = LogFactory.getLog(MessageExtraction.class.getName()); protected static final String serverEncoding = "UTF-8"; protected HashMap<String, AttachmentInfo> attachments = new HashMap<String, AttachmentInfo>(); protected String fileName; protected String baseURL; protected String viewFileName; protected String messageId; public MessageExtraction(Email message, InputStream originalMessageStream, String baseURL) throws MessageExtractionException { this.baseURL = baseURL; if (originalMessageStream != null) setFileName(writeOriginalMessage(message.getEmailID(), originalMessageStream)); setViewFileName(extractMessage(message)); } protected String extractMessage(Email message) throws MessageExtractionException { if (message == null) throw new MessageExtractionException("assertion failure: null message", logger); logger.debug("extracted message {" + message + "}"); Hashtable<String, Part> att = new Hashtable<String, Part>(); Hashtable<String, String> inl = new Hashtable<String, String>(); Hashtable<String, String> imgs = new Hashtable<String, String>(); Hashtable<String, String> nonImgs = new Hashtable<String, String>(); Hashtable<String, String> ready = new Hashtable<String, String>(); ArrayList<String> mimeTypes = new ArrayList<String>(); String viewFileName; try { dumpPart(message, att, inl, imgs, nonImgs, mimeTypes, message.getSubject()); for (String attachFileName : att.keySet()) { Part p = (Part) att.get(attachFileName); writeAttachment(p, attachFileName); ready.put(attachFileName, attachFileName); //if (!imgs.containsKey(attachFileName) && !nonImgs.containsKey(attachFileName)) addAttachment(attachFileName, p); } if (inl.containsKey("text/html")) { viewFileName = prepareHTMLMessage(baseURL, inl, imgs, nonImgs, ready, mimeTypes); } else if (inl.containsKey("text/plain")) { viewFileName = preparePlaintextMessage(inl, imgs, nonImgs, ready, mimeTypes); } else { logger.debug( "unable to extract message since the content type is unsupported. returning empty message body."); return writeTempMessage("<html><head><META http-equiv=Content-Type content=\"text/html; charset=" + serverEncoding + "\"></head><body></body></html>", ".html"); } } catch (Exception ex) { throw new MessageExtractionException(ex.getMessage(), ex, logger); } logger.debug("message successfully extracted {filename='" + fileName + "' " + message + "}"); return viewFileName; } private static void dumpPart(Part p, Hashtable<String, Part> attachments, Hashtable<String, String> inlines, Hashtable<String, String> images, Hashtable<String, String> nonImages, ArrayList<String> mimeTypes, String subject) throws Exception { mimeTypes.add(p.getContentType()); if (!p.isMimeType("multipart/*")) { String disp = p.getDisposition(); String fname = p.getFileName(); if (fname != null) { try { fname = MimeUtility.decodeText(fname); } catch (Exception e) { logger.debug("cannot decode filename:" + e.getMessage()); } } if ((disp != null && Compare.equalsIgnoreCase(disp, Part.ATTACHMENT)) || fname != null) { String filename = getFilename(subject, p); if (!filename.equalsIgnoreCase("winmail.dat")) { attachments.put(filename, p); } /*if (p.isMimeType("image/*")) { String str[] = p.getHeader("Content-ID"); if (str != null) images.put(filename,str[0]); else images.put(filename, filename); } return;*/ } } if (p.isMimeType("text/plain")) { String str = ""; if (inlines.containsKey("text/plain")) str = (String) inlines.get("text/plain") + "\n\n-------------------------------\n"; inlines.put("text/plain", str + getTextContent(p)); } else if (p.isMimeType("text/html")) { inlines.put("text/html", getTextContent(p)); } else if (p.isMimeType("text/xml")) { attachments.put(getFilename(subject, p), p); } else if (p.isMimeType("multipart/*")) { Multipart mp = (Multipart) p.getContent(); for (int i = 0; i < mp.getCount(); i++) dumpPart(mp.getBodyPart(i), attachments, inlines, images, nonImages, mimeTypes, subject); } else if (p.isMimeType("message/rfc822")) { dumpPart((Part) p.getContent(), attachments, inlines, images, nonImages, mimeTypes, subject); } else if (p.isMimeType("application/ms-tnef")) { Part tnefpart = TNEFMime.convert(null, p, false); if (tnefpart != null) { dumpPart((Part) tnefpart, attachments, inlines, images, nonImages, mimeTypes, subject); } } else if (p.isMimeType("application/*")) { String filename = getFilename("application", p); attachments.put(filename, p); String str[] = p.getHeader("Content-ID"); if (str != null) nonImages.put(filename, str[0]); } else if (p.isMimeType("image/*")) { String fileName = getFilename("image", p); attachments.put(fileName, p); String str[] = p.getHeader("Content-ID"); if (str != null) images.put(fileName, str[0]); else images.put(fileName, fileName); } else { String contentType = p.getContentType(); Object o = p.getContent(); if (o instanceof String) { String str = ""; if (inlines.containsKey("text/plain")) str = (String) inlines.get("text/plain") + "\n\n-------------------------------\n"; inlines.put(contentType, str + (String) o); } else { String fileName = getFilenameFromContentType("attach", contentType); attachments.put(fileName, p); } } } private static String getTextContent(Part p) throws IOException, MessagingException { try { return (String) p.getContent(); } catch (UnsupportedEncodingException e) { OutputStream os = new ByteArrayOutputStream(); p.writeTo(os); String raw = os.toString(); os.close(); //cp932 -> Windows-31J raw = raw.replaceAll("cp932", "Windows-31J"); InputStream is = new ByteArrayInputStream(raw.getBytes()); Part newPart = new MimeBodyPart(is); is.close(); return (String) newPart.getContent(); } } // Find the next position of a URI in input text. private int getNextURIPos(String input, int pos) { String protocols[] = { "http://", "https://", "mailto:" }; int retVal = -1; for (int i = 0; i < protocols.length; i++) { int index = input.indexOf(protocols[i], pos); if (index > -1 && (index < retVal || retVal == -1)) retVal = index; } return retVal; } // encode HTML characters so they do not get interpreted as markup. // Double spaces get changed to use so the spaces are preserved. public static String encode(String src) { src = src.replaceAll("&", "&"); src = src.replaceAll("\"", """); src = src.replaceAll("<", "<"); src = src.replaceAll(">", ">"); StringBuffer converted = new StringBuffer(""); int idx = src.indexOf(" "); int oldIdx = 0; while (idx >= oldIdx) { converted.append(src.substring(oldIdx, idx) + " "); oldIdx = idx + 1; idx = src.indexOf(" ", oldIdx); } converted.append(src.substring(oldIdx)); src = converted.toString(); return src; } private String activateURLs(String input) { if (input.length() == 0) return ""; String output = ""; String lowerInput = input.toLowerCase(Locale.ENGLISH); int index = getNextURIPos(lowerInput, 0); int endIndex = 0; while (index > -1) { if (index > endIndex) output = output + encode(input.substring(endIndex, index)); endIndex = input.indexOf("\n", index); if (endIndex == -1) endIndex = input.length(); String lastGood = ""; while (endIndex-- > index + 7) { String slice = input.substring(index, endIndex); if (slice.endsWith(".") || slice.endsWith("(") || slice.endsWith(")") || slice.endsWith("[") || slice.endsWith("]") || slice.endsWith(",") || slice.endsWith(";") || slice.endsWith(":") || slice.endsWith("{") || slice.endsWith("}")) continue; try { java.net.URI testIt = new java.net.URI(slice); if (Compare.equalsIgnoreCase(testIt.toString(), slice)) { lastGood = input.substring(index, endIndex); break; } } catch (java.net.URISyntaxException ex) { } } if (lastGood.equals("")) output = output + input.substring(index, endIndex); else output = output + "<a href=\"" + lastGood + "\" target=\"_blank\">" + encode(lastGood) + "</a>"; index = getNextURIPos(lowerInput, endIndex); } if (endIndex < input.length()) output = output + encode(input.substring(endIndex)); return output; } private void writeAttachment(Part p, String filename) { filename = getFilename(filename, "attachment.att"); File attachFile = new File(Config.getFileSystem().getViewPath() + File.separatorChar + filename); OutputStream os = null; InputStream is = null; try { logger.debug("writing attachment {filename='" + attachFile.getAbsolutePath() + "'}"); os = new BufferedOutputStream(new FileOutputStream(attachFile)); is = p.getInputStream(); BufferedInputStream bis = new BufferedInputStream(is); int c = 0; while ((c = bis.read()) != -1) { os.write(c); } os.close(); bis.close(); Config.getFileSystem().getTempFiles().markForDeletion(attachFile); } catch (Exception ex) { logger.error("failed to write attachment {filename='" + attachFile + "'}", ex); } finally { try { if (os != null) os.close(); if (is != null) is.close(); } catch (Exception e) { } } } private String writeTempMessage(String toWrite, String ext) { BufferedWriter bw = null; try { File attachFile = File.createTempFile("temp", ext, new File(Config.getFileSystem().getViewPath())); logger.debug("writing temporary message {fileName='" + attachFile + "'}"); bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(attachFile), "UTF8")); bw.write(toWrite); Config.getFileSystem().getTempFiles().markForDeletion(attachFile); return attachFile.getName(); } catch (IOException ex) { logger.error("failed to write temporary message {fileName='" + fileName + "'}", ex); } finally { if (bw != null) { try { bw.close(); } catch (Exception e) { } } } return ""; } private String writeOriginalMessage(EmailID emailId, InputStream originalMessageStream) { File file = new File( Config.getFileSystem().getViewPath() + File.separatorChar + emailId.getUniqueID() + ".eml"); logger.debug("writeOriginalMessage() {fileName='" + file.getAbsolutePath() + "'}"); OutputStream os = null; try { os = new BufferedOutputStream(new FileOutputStream(file)); BufferedInputStream bis = new BufferedInputStream(originalMessageStream); int c = 0; while ((c = bis.read()) != -1) { os.write(c); } Config.getFileSystem().getTempFiles().markForDeletion(file); return emailId.getUniqueID() + ".eml"; } catch (Exception ex) { logger.error("failed to write original message, {fileName='" + file.getAbsolutePath() + "'}", ex); } finally { try { if (os != null) { os.close(); } if (originalMessageStream != null) { originalMessageStream.close(); } } catch (Exception e) { } } return ""; } private String preparePlaintextMessage(Hashtable<String, String> inl, Hashtable<String, String> imgs, Hashtable<String, String> nonImgs, Hashtable<String, String> ready, ArrayList<String> mimeTypes) { String str = (String) inl.get("text/plain"); str = activateURLs(str); Enumeration enuma = imgs.keys(); StringBuffer buff = new StringBuffer(); while (enuma.hasMoreElements()) { String fl = (String) enuma.nextElement(); fl = (String) ready.get(fl); if (fl.endsWith(".tif") || fl.endsWith(".tiff")) { buff.append("<BR><BR><EMBED SRC=\"" + fl + "\" TYPE=\"image/tiff\" >" + System.getProperty("line.separator")); } else { buff.append("<BR><BR><IMG SRC=\"" + fl + "\">" + System.getProperty("line.separator")); } } str = str.replaceAll("\r", "").replaceAll("\n", "<br>" + System.getProperty("line.separator")); return writeTempMessage("<html><head><META http-equiv=Content-Type content=\"text/html; charset=" + serverEncoding + "\"></head><body>" + str + System.getProperty("line.separator") + buff.toString() + "</body></html>", ".html"); } private String prepareHTMLMessage(String baseURL, Hashtable<String, String> inl, Hashtable<String, String> imgs, Hashtable<String, String> nonImgs, Hashtable<String, String> ready, ArrayList<String> mimeTypes) { String str = (String) inl.get("text/html"); boolean alternative = false; for (int i = 0; i < mimeTypes.size(); i++) { if (((String) mimeTypes.get(i)).toLowerCase(Locale.ENGLISH).indexOf("multipart/alternative") > -1) { alternative = true; break; } } if (!alternative && inl.containsKey("text/plain")) { String plain = activateURLs((String) inl.get("text/plain")).replaceAll("\r", "").replaceAll("\n", "<br>" + System.getProperty("line.separator")) + "<br><br>" + System.getProperty("line.separator") + "<hr><br>"; int bestStart = 0; int next = str.toLowerCase(Locale.ENGLISH).indexOf("<body"); if (next > 0) next = str.indexOf(">", next) + 1; if (next > 0 && next < str.length()) bestStart = next; if (bestStart > 0) str = str.substring(0, bestStart) + plain + str.substring(bestStart); else str = plain + str; } HashSet<String> alreadyUsed = new HashSet<String>(); Enumeration enuma = imgs.keys(); while (enuma.hasMoreElements()) { String repl = (String) enuma.nextElement(); String cidTag = (String) imgs.get(repl); if (cidTag.startsWith("<") && cidTag.endsWith(">")) { cidTag = cidTag.substring(1, cidTag.length() - 1); } if (str.indexOf("cid:" + cidTag) > -1) { alreadyUsed.add(repl); } String st = (String) ready.get(repl); str = Pattern.compile("cid:" + cidTag, Pattern.CASE_INSENSITIVE).matcher(str) .replaceAll(ready.get(repl)); } enuma = nonImgs.keys(); while (enuma.hasMoreElements()) { String repl = (String) enuma.nextElement(); String cidTag = (String) nonImgs.get(repl); if (cidTag.startsWith("<") && cidTag.endsWith(">")) cidTag = cidTag.substring(1, cidTag.length() - 1); if (str.indexOf("cid:" + cidTag) > -1) alreadyUsed.add(repl); String st = (String) ready.get(repl); str = Pattern.compile("cid:" + cidTag, Pattern.CASE_INSENSITIVE).matcher(str) .replaceAll(ready.get(repl)); } StringBuffer buff = new StringBuffer(); enuma = imgs.keys(); while (enuma.hasMoreElements()) { String fl = (String) enuma.nextElement(); if (!alreadyUsed.contains(fl)) { fl = (String) ready.get(fl); if (fl.endsWith(".tif") || fl.endsWith(".tiff")) { buff.append(System.getProperty("line.separator") + "<BR><BR><EMBED SRC=\"" + baseURL.replaceAll("\\\\", "/") + "/temp/" + fl + "\" TYPE=\"image/tiff\">"); } else { buff.append(System.getProperty("line.separator") + "<BR><BR><IMG SRC=\"" + baseURL.replaceAll("\\\\", "/") + "/temp/" + fl + "\">"); } } } String output = ""; int bestStart = 0; int next = str.toLowerCase(Locale.ENGLISH).indexOf("</body>"); if (next > 0 && next < str.length()) bestStart = next; if (bestStart > 0) output = str.substring(0, bestStart) + buff.toString() + str.substring(bestStart); else output = str + buff.toString(); if (output.indexOf("charset=") < 0) { next = output.toLowerCase(Locale.ENGLISH).indexOf("</head>"); if (next > 0) output = output.substring(0, next) + "<META http-equiv=Content-Type content=\"text/html; charset=" + serverEncoding + "\">" + output.substring(next); } else output = output.replaceFirst("charset=.*\"", "charset=" + serverEncoding + "\""); output = output.replaceAll("FONT SIZE=\\d", "FONT"); output = output.replaceAll("font size=\\d", "font"); return writeTempMessage(output, ".html"); } private static String getFilenameFromContentType(String defaultFileName, String contentType) { if (contentType == null) return defaultFileName; Pattern pattern = Pattern.compile("name=\"(.*)\""); Matcher matcher = pattern.matcher(contentType); if (matcher.find()) return matcher.group(1); else return defaultFileName; } private static String getFilename(String defaultFileName, Part p) throws Exception { if (defaultFileName == "" || defaultFileName == null) defaultFileName = "attachment.att"; String fName = p.getFileName(); try { fName = MimeUtility.decodeText(fName); } catch (Exception e) { logger.debug("cannot decode filename:" + e.getMessage()); fName = p.getFileName(); } return getFilename(fName, defaultFileName); } private static String getFilename(String fileName, String defaultFileName) { try { fileName = new File(fileName).getName(); char illegalChars[] = { '\'', '\\', ':', '*', '?', '<', '>', '|', '\n', '\r', '\t', ';', ':', '=', ':', '+', '<', '>', '|', '[', ']', '\"', '?', '*', '#' }; for (int i = 0; i < illegalChars.length; i++) { fileName = fileName.replace(illegalChars[i], '_'); } if (fileName.length() < 3) fileName = defaultFileName; } catch (Exception ex) { fileName = defaultFileName; } return fileName; } public void addAttachment(String fileName, Part part) { attachments.put(fileName, new AttachmentInfo(fileName, part)); } public AttachmentInfo getAttachment(String fileName) { return attachments.get(fileName); } public List<AttachmentInfo> getAttachments() { return (List<AttachmentInfo>) new ArrayList<AttachmentInfo>(attachments.values()); } public String getFilePath() { String path = Config.getFileSystem().getViewPath() + File.separatorChar + fileName; logger.debug("getOriginalMessageFilePath() {fileName='" + path + "'}"); return path; } public String getFileSize() { double size = new File(getFilePath()).length() / 1024.0; DecimalFormat df = new DecimalFormat("0.##"); return df.format(size) + "k"; } public String getFileName() { logger.debug("getOriginalMessageFileName() {fileName='" + fileName + "'}"); return fileName; } public String getBaseExtractionURL() { return baseURL + "/temp"; } public String getViewFileName() { return viewFileName; } public String getFileURL() { logger.debug("getOriginalMessageURL() {URL='" + getBaseExtractionURL() + "/" + getFileName() + "'}"); return getBaseExtractionURL() + "/" + getFileName(); } public void setViewFileName(String fileName) { this.viewFileName = fileName; } public void setFileName(String fileName) { this.fileName = fileName; } public String getViewURL() { return getBaseExtractionURL() + "/" + getViewFileName(); } public class AttachmentInfo { protected String name; protected Part part; public AttachmentInfo(String name, Part part) { this.name = name; this.part = part; } public Part getPart() { return part; } public String getName() { return name; } public String getContentType() { try { return part.getContentType(); } catch (Exception e) { return ""; } } public boolean getIsEmail() { return getContentType().toLowerCase(Locale.ENGLISH).contains("message/rfc822"); } public String getURL() { return getBaseExtractionURL() + "/" + name; } public String getFilePath() { return Config.getFileSystem().getViewPath() + File.separatorChar + name; } public String getFileSize() { double size = new File(getFilePath()).length() / 1024.0; DecimalFormat df = new DecimalFormat("0.##"); return df.format(size) + "k"; } public InputStream getInputStream() throws FileNotFoundException { return new FileInputStream(getFilePath()); } } }