List of usage examples for javax.mail Multipart getBodyPart
public synchronized BodyPart getBodyPart(int index) throws MessagingException
From source file:org.sakaiproject.james.SakaiMailet.java
/** * Breaks email messages into parts which can be saved as files (saves as attachments) or viewed as plain text (added to body of message). * /*from w ww . j av a 2s. c o m*/ * @param siteId * Site associated with attachments, if any * @param p * The message-part embedded in a message.. * @param id * The string containing the message's id. * @param bodyBuf * The string-buffers in which the plain/text and/or html/text message body is being built. * @param bodyContentType * The value of the Content-Type header for the mesage body. * @param attachments * The ReferenceVector in which references to attachments are collected. * @param embedCount * An Integer that counts embedded messages (outer message is zero). * @return Value of embedCount (updated if this call processed any embedded messages). */ protected Integer parseParts(String siteId, Part p, String id, StringBuilder bodyBuf[], StringBuilder bodyContentType, List attachments, Integer embedCount) throws MessagingException, IOException { // increment embedded message counter if (p instanceof Message) { embedCount = Integer.valueOf(embedCount.intValue() + 1); } String type = p.getContentType(); // discard if content-type is unknown if (type == null || "".equals(type)) { M_log.warn(this + " message with unknown content-type discarded"); } // add plain text to bodyBuf[0] else if (p.isMimeType("text/plain") && p.getFileName() == null) { Object o = null; // let them convert to text if possible // but if bad encaps get the stream and do it ourselves try { o = p.getContent(); } catch (java.io.UnsupportedEncodingException ignore) { o = p.getInputStream(); } String txt = null; String innerContentType = p.getContentType(); if (o instanceof String) { txt = (String) p.getContent(); if (bodyContentType != null && bodyContentType.length() == 0) bodyContentType.append(innerContentType); } else if (o instanceof InputStream) { InputStream in = (InputStream) o; ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buf = new byte[in.available()]; for (int len = in.read(buf); len != -1; len = in.read(buf)) out.write(buf, 0, len); String charset = (new ContentType(innerContentType)).getParameter("charset"); // RFC 2045 says if no char set specified use US-ASCII. // If specified but illegal that's less clear. The common case is X-UNKNOWN. // my sense is that UTF-8 is most likely these days but the sample we got // was actually ISO 8859-1. Could also justify using US-ASCII. Duh... if (charset == null) charset = "us-ascii"; try { txt = out.toString(MimeUtility.javaCharset(charset)); } catch (java.io.UnsupportedEncodingException ignore) { txt = out.toString("UTF-8"); } if (bodyContentType != null && bodyContentType.length() == 0) bodyContentType.append(innerContentType); } // remove extra line breaks added by mac Mail, perhaps others // characterized by a space followed by a line break if (txt != null) { txt = txt.replaceAll(" \n", " "); } // make sure previous message parts ended with newline if (bodyBuf[0].length() > 0 && bodyBuf[0].charAt(bodyBuf[0].length() - 1) != '\n') bodyBuf[0].append("\n"); bodyBuf[0].append(txt); } // add html text to bodyBuf[1] else if (p.isMimeType("text/html") && p.getFileName() == null) { Object o = null; // let them convert to text if possible // but if bad encaps get the stream and do it ourselves try { o = p.getContent(); } catch (java.io.UnsupportedEncodingException ignore) { o = p.getInputStream(); } String txt = null; String innerContentType = p.getContentType(); if (o instanceof String) { txt = (String) p.getContent(); if (bodyContentType != null && bodyContentType.length() == 0) bodyContentType.append(innerContentType); } else if (o instanceof InputStream) { InputStream in = (InputStream) o; ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buf = new byte[in.available()]; for (int len = in.read(buf); len != -1; len = in.read(buf)) out.write(buf, 0, len); String charset = (new ContentType(innerContentType)).getParameter("charset"); if (charset == null) charset = "us-ascii"; try { txt = out.toString(MimeUtility.javaCharset(charset)); } catch (java.io.UnsupportedEncodingException ignore) { txt = out.toString("UTF-8"); } if (bodyContentType != null && bodyContentType.length() == 0) bodyContentType.append(innerContentType); } // remove bad image tags and naughty javascript if (txt != null) { txt = Web.cleanHtml(txt); } bodyBuf[1].append(txt); } // process subparts of multiparts else if (p.isMimeType("multipart/*")) { Multipart mp = (Multipart) p.getContent(); int count = mp.getCount(); for (int i = 0; i < count; i++) { embedCount = parseParts(siteId, mp.getBodyPart(i), id, bodyBuf, bodyContentType, attachments, embedCount); } } // Discard parts with mime-type application/applefile. If an e-mail message contains an attachment is sent from // a macintosh, you may get two parts, one for the data fork and one for the resource fork. The part that // corresponds to the resource fork confuses users, this has mime-type application/applefile. The best thing // is to discard it. else if (p.isMimeType("application/applefile")) { M_log.warn(this + " message with application/applefile discarded"); } // discard enriched text version of the message. // Sakai only uses the plain/text or html/text version of the message. else if (p.isMimeType("text/enriched") && p.getFileName() == null) { M_log.warn(this + " message with text/enriched discarded"); } // everything else gets treated as an attachment else { String name = p.getFileName(); // look for filenames not parsed by getFileName() if (name == null && type.indexOf(NAME_PREFIX) != -1) { name = type.substring(type.indexOf(NAME_PREFIX) + NAME_PREFIX.length()); } // ContentType can't handle filenames with spaces or UTF8 characters if (name != null) { String decodedName = MimeUtility.decodeText(name); // first decode RFC 2047 type = type.replace(name, URLEncoder.encode(decodedName, "UTF-8")); name = decodedName; } ContentType cType = new ContentType(type); String disposition = p.getDisposition(); int approxSize = p.getSize(); if (name == null) { name = "unknown"; // if file's parent is multipart/alternative, // provide a better name for the file if (p instanceof BodyPart) { Multipart parent = ((BodyPart) p).getParent(); if (parent != null) { String pType = parent.getContentType(); ContentType pcType = new ContentType(pType); if (pcType.getBaseType().equalsIgnoreCase("multipart/alternative")) { name = "message" + embedCount; } } } if (p.isMimeType("text/html")) { name += ".html"; } else if (p.isMimeType("text/richtext")) { name += ".rtx"; } else if (p.isMimeType("text/rtf")) { name += ".rtf"; } else if (p.isMimeType("text/enriched")) { name += ".etf"; } else if (p.isMimeType("text/plain")) { name += ".txt"; } else if (p.isMimeType("text/xml")) { name += ".xml"; } else if (p.isMimeType("message/rfc822")) { name += ".txt"; } } // read the attachments bytes, and create it as an attachment in content hosting byte[] bodyBytes = readBody(approxSize, p.getInputStream()); if ((bodyBytes != null) && (bodyBytes.length > 0)) { // can we ignore the attachment it it's just whitespace chars?? Reference attachment = createAttachment(siteId, attachments, cType.getBaseType(), name, bodyBytes, id); // add plain/text attachment reference (if plain/text message) if (attachment != null && bodyBuf[0].length() > 0) bodyBuf[0] .append("[see attachment: \"" + name + "\", size: " + bodyBytes.length + " bytes]\n\n"); // add html/text attachment reference (if html/text message) if (attachment != null && bodyBuf[1].length() > 0) bodyBuf[1].append( "<p>[see attachment: \"" + name + "\", size: " + bodyBytes.length + " bytes]</p>"); // add plain/text attachment reference (if no plain/text and no html/text) if (attachment != null && bodyBuf[0].length() == 0 && bodyBuf[1].length() == 0) bodyBuf[0] .append("[see attachment: \"" + name + "\", size: " + bodyBytes.length + " bytes]\n\n"); } } return embedCount; }
From source file:de.mendelson.comm.as2.message.AS2MessageParser.java
/**Returns a compressed part of this container if it exists, else null. If the container itself *is compressed it is returned./*from w w w .ja v a 2 s. c om*/ */ public Part getCompressedEmbeddedPart(Part part) throws MessagingException, IOException { if (this.contentTypeIndicatesCompression(part.getContentType())) { return (part); } if (part.isMimeType("multipart/*")) { Multipart multiPart = (Multipart) part.getContent(); int count = multiPart.getCount(); for (int i = 0; i < count; i++) { BodyPart bodyPart = multiPart.getBodyPart(i); Part compressedEmbeddedPart = this.getCompressedEmbeddedPart(bodyPart); if (compressedEmbeddedPart != null) { return (compressedEmbeddedPart); } } } return (null); }
From source file:mitm.application.djigzo.james.mailets.PDFEncrypt.java
private void addEncryptedPDF(MimeMessage message, byte[] pdf) throws MessagingException { /*/*from w w w . j a v a 2 s .c o m*/ * Find the existing PDF. The expect that the message is a multipart/mixed. */ if (!message.isMimeType("multipart/mixed")) { throw new MessagingException("Content-type should have been multipart/mixed."); } Multipart mp; try { mp = (Multipart) message.getContent(); } catch (IOException e) { throw new MessagingException("Error getting message content.", e); } BodyPart pdfPart = null; /* * Fallback in case the template does not contain a DjigzoHeader.MARKER */ BodyPart fallbackPart = null; for (int i = 0; i < mp.getCount(); i++) { BodyPart part = mp.getBodyPart(i); if (ArrayUtils.contains(part.getHeader(DjigzoHeader.MARKER), DjigzoHeader.ATTACHMENT_MARKER_VALUE)) { pdfPart = part; break; } /* * Fallback scanning for application/pdf in case the template does not contain a DjigzoHeader.MARKER */ if (part.isMimeType("application/pdf")) { fallbackPart = part; } } if (pdfPart == null) { if (fallbackPart != null) { getLogger().info("Marker not found. Using ocet-stream instead."); /* * Use the octet-stream part */ pdfPart = fallbackPart; } else { throw new MessagingException("Unable to find the attachment part in the template."); } } pdfPart.setDataHandler(new DataHandler(new ByteArrayDataSource(pdf, "application/pdf"))); }
From source file:com.duroty.utils.mail.MessageUtilities.java
/** * Given a message that we are replying to, or forwarding, * * @param part The part to decode./*from w w w. j a v a 2 s . com*/ * @param buffer The new message body text buffer. * @param dmailParts Vector for new message's attachments. * * @return The buffer being filled in with the body. * * @throws MessagingException DOCUMENT ME! * @throws IOException */ protected static StringBuffer subDecodeContent(Part part, StringBuffer buffer, Vector dmailParts, boolean chooseHtml, String breakLine) throws MessagingException, IOException { boolean attachIt = true; // decode based on content type and disposition ContentType xctype = MessageUtilities.getContentType(part); ContentDisposition xcdisposition = MessageUtilities.getContentDisposition(part); if (xctype.match("multipart/*")) { attachIt = false; Multipart xmulti = (Multipart) MessageUtilities.getPartContent(part); int xparts = 0; try { xparts = xmulti.getCount(); } catch (MessagingException e) { attachIt = true; xparts = 0; } for (int xindex = 0; xindex < xparts; xindex++) { MessageUtilities.subDecodeContent(xmulti.getBodyPart(xindex), buffer, dmailParts, chooseHtml, breakLine); } } else if (xctype.match("message/rfc822")) { MimeMessage newMessage = new MimeMessage((Session) null, part.getInputStream()); decodeContent(newMessage, buffer, dmailParts, chooseHtml, breakLine); } else if (xctype.match("text/plain") && !chooseHtml) { if (xcdisposition.match("inline")) { attachIt = false; String xjcharset = xctype.getParameter("charset"); if (xjcharset == null) { // not present, assume ASCII character encoding try { Header xheader; Enumeration xe = part.getAllHeaders(); for (; xe.hasMoreElements();) { xheader = (Header) xe.nextElement(); String aux = xheader.getName().toLowerCase().trim(); if (aux.indexOf("subject") > -1) { int pos1 = aux.indexOf("=?"); int pos2 = aux.indexOf("?q?"); if ((pos1 > -1) && (pos2 > -1)) { xjcharset = aux.substring(pos1, pos2); } break; } } } catch (Exception ex) { System.out.print(ex.getMessage()); } if (xjcharset == null) { xjcharset = Charset.defaultCharset().displayName(); // US-ASCII in JAVA terms } } MessageUtilities.decodeTextPlain(buffer, part, breakLine, xjcharset); } } else if (xctype.match("text/html") && chooseHtml) { if (xcdisposition.match("inline")) { attachIt = false; String xjcharset = xctype.getParameter("charset"); if (xjcharset == null) { // not present, assume ASCII character encoding try { Header xheader; Enumeration xe = part.getAllHeaders(); for (; xe.hasMoreElements();) { xheader = (Header) xe.nextElement(); String aux = xheader.getName().toLowerCase().trim(); if (aux.indexOf("subject") > -1) { int pos1 = aux.indexOf("=?"); int pos2 = aux.indexOf("?q?"); if ((pos1 > -1) && (pos2 > -1)) { xjcharset = aux.substring(pos1, pos2); } break; } } } catch (Exception ex) { } if (xjcharset == null) { xjcharset = Charset.defaultCharset().displayName(); // US-ASCII in JAVA terms } } MessageUtilities.decodeTextHtml(buffer, part, xjcharset); } } if (attachIt) { // UNDONE should simple single line entries be // created for other types and attachments? // // UNDONE should attachements only be created for "attachments" or all // unknown content types? if (dmailParts != null) { MailPart aux = new MailPart(); aux.setPart(part); aux.setId(dmailParts.size()); aux.setName(MessageUtilities.encodeStringToXml(MessageUtilities.getPartName(part))); aux.setContentType(xctype.getBaseType()); aux.setSize(part.getSize()); dmailParts.addElement(aux); } } return buffer; }
From source file:org.pentaho.di.job.entries.getpop.MailConnection.java
public int getAttachedFilesCount(Message message, Pattern pattern) throws KettleException { Object content = null;/*from w w w. j av a2 s .c om*/ int retval = 0; try { content = message.getContent(); if (content instanceof Multipart) { Multipart multipart = (Multipart) content; for (int i = 0, n = multipart.getCount(); i < n; i++) { Part part = multipart.getBodyPart(i); String disposition = part.getDisposition(); if ((disposition != null) && (disposition.equalsIgnoreCase(Part.ATTACHMENT) || disposition.equalsIgnoreCase(Part.INLINE))) { String MimeText = null; try { MimeText = MimeUtility.decodeText(part.getFileName()); } catch (Exception e) { // Ignore errors } if (MimeText != null) { String filename = MimeUtility.decodeText(part.getFileName()); if (isWildcardMatch(filename, pattern)) { retval++; } } } } } } catch (Exception e) { throw new KettleException(BaseMessages.getString(PKG, "MailConnection.Error.CountingAttachedFiles", "" + this.message.getMessageNumber()), e); } finally { if (content != null) { content = null; } } return retval; }
From source file:org.pentaho.di.job.entries.getpop.MailConnection.java
private String getMessageBodyOrContentType(Part p, final boolean returnContentType) throws MessagingException, IOException { if (p.isMimeType("text/*")) { String s = (String) p.getContent(); return returnContentType ? p.getContentType() : s; }//ww w. j a v a2 s . c o m if (p.isMimeType("multipart/alternative")) { // prefer html text over plain text Multipart mp = (Multipart) p.getContent(); String text = null; for (int i = 0; i < mp.getCount(); i++) { Part bp = mp.getBodyPart(i); if (bp.isMimeType("text/plain")) { if (text == null) { text = getMessageBodyOrContentType(bp, returnContentType); } } } return text; } else if (p.isMimeType("multipart/*")) { Multipart mp = (Multipart) p.getContent(); for (int i = 0; i < mp.getCount(); i++) { String s = getMessageBodyOrContentType(mp.getBodyPart(i), returnContentType); if (s != null) { return s; } } } return null; }
From source file:org.pentaho.di.job.entries.getpop.MailConnection.java
private void handleMultipart(String foldername, Multipart multipart, Pattern pattern) throws KettleException { try {/* ww w .ja v a 2 s . co m*/ for (int i = 0, n = multipart.getCount(); i < n; i++) { handlePart(foldername, multipart.getBodyPart(i), pattern); } } catch (Exception e) { throw new KettleException(e); } }
From source file:mitm.application.djigzo.james.mailets.PDFEncryptTest.java
private void checkEncryption(MimeMessage message, String password, boolean hasReplyLink) throws Exception { /*//from ww w.j a v a 2 s. co m * The message should be a mime multipart mixed with two parts. The first part should be readable text * and the second part should be the encrypted PDF */ assertTrue(message.isMimeType("multipart/mixed")); Multipart mp = (Multipart) message.getContent(); assertEquals(2, mp.getCount()); BodyPart textPart = mp.getBodyPart(0); assertTrue(textPart.isMimeType("text/plain")); BodyPart pdfPart = mp.getBodyPart(1); assertTrue(pdfPart.isMimeType("application/pdf")); PdfReader reader = new PdfReader(pdfPart.getInputStream(), password.getBytes(CharacterEncoding.US_ASCII)); String firstPageContent = new String(reader.getPageContent(1), CharacterEncoding.US_ASCII); /* * We just check whether the raw content contains (Reply) or not. */ if (hasReplyLink) { assertTrue(firstPageContent.contains("(Reply)")); assertTrue(((String) textPart.getContent()).contains("reply URL: http://127.0.0.1?env=")); } else { assertFalse(firstPageContent.contains("(Reply)")); } }
From source file:mitm.application.djigzo.james.mailets.PDFEncryptTest.java
@Test public void testEncryptPDFFromPersonalUTF8() throws Exception { MockMailetConfig mailetConfig = new MockMailetConfig("test"); SendMailEventListenerImpl listener = new SendMailEventListenerImpl(); mailetConfig.getMailetContext().setSendMailEventListener(listener); PDFEncrypt mailet = new PDFEncrypt(); mailetConfig.setInitParameter("template", "encrypted-pdf.ftl"); mailetConfig.setInitParameter("encryptedProcessor", "encryptedProcessor"); mailetConfig.setInitParameter("notEncryptedProcessor", "notEncryptedProcessor"); mailetConfig.setInitParameter("passwordMode", "single"); mailetConfig.setInitParameter("passThrough", "false"); mailet.init(mailetConfig);/*from w w w .j a v a2 s .co m*/ MockMail mail = new MockMail(); MimeMessage message = MailUtils.loadMessage(new File(testBase, "mail/normal-message-with-attach.eml")); message.setFrom(new InternetAddress("test@example.com", "=?UTF-8?B?w6TDtsO8IMOEw5bDnA==?=")); mail.setMessage(message); Set<MailAddress> recipients = new HashSet<MailAddress>(); recipients.add(new MailAddress("m.brinkers@pobox.com")); recipients.add(new MailAddress("123@example.com")); mail.setRecipients(recipients); mail.setSender(new MailAddress("sender@example.com")); // password is test when encrypted with password 'djigzo' new DjigzoMailAttributesImpl(mail).setEncryptedPassword(Base64.decodeBase64(MiscStringUtils .toAsciiBytes("lklfx6SWxIkAAAAQ1VTbMJjznNZjVvdggckSPQAACAAAAAAQKAxcw630UmyVhyZPiW9xhg=="))); mailet.service(mail); MailUtils.validateMessage(mail.getMessage()); TestUtils.saveMessages(tempDir, "testEncryptPDFFromPersonalUTF8", listener.getMessages()); assertEquals(1, listener.getMessages().size()); assertEquals("encryptedProcessor", listener.getStates().get(0)); assertEquals(2, listener.getRecipients().get(0).size()); assertTrue(listener.getRecipients().get(0).contains(new MailAddress("123@example.com"))); assertTrue(listener.getRecipients().get(0).contains(new MailAddress("m.brinkers@pobox.com"))); assertEquals("sender@example.com", listener.getSenders().get(0).toString()); assertEquals(Mail.GHOST, mail.getState()); assertNotNull(listener.getMessages().get(0)); assertTrue(message != listener.getMessages().get(0)); MimeMessage encrypted = listener.getMessages().get(0); assertTrue(encrypted.isMimeType("multipart/mixed")); Multipart mp = (Multipart) encrypted.getContent(); assertEquals(2, mp.getCount()); BodyPart messagePart = mp.getBodyPart(0); BodyPart pdfPart = mp.getBodyPart(1); assertTrue(messagePart.isMimeType("text/plain")); assertTrue(pdfPart.isMimeType("application/pdf")); // check if the body contains (which is the decoded from personal name) String text = (String) messagePart.getContent(); assertTrue(text.contains(" ")); MailUtils.validateMessage(listener.getMessages().get(0)); }
From source file:edu.stanford.muse.email.EmailFetcherStats.java
/** * fetch given message idx's in given folder -- @performance critical * * @param offset - the original offset of the first message in the messages array, important to initialize * for proper assignment of unique id or doc Id *///from w w w .j a va 2 s. c o m //private void fetchUncachedMessages(String sanitizedFName, Folder folder, DocCache cache, List<Integer> msgIdxs) throws MessagingException, FileNotFoundException, IOException, GeneralSecurityException { private void fetchAndIndexMessages(Folder folder, Message[] messages, int offset, int totalMessages) throws MessagingException, IOException, GeneralSecurityException { //mark the processing of new batch if (offset == 0) fetchStartTime = System.currentTimeMillis(); currentStatus = JSONUtils.getStatusJSON( (emailStore instanceof MboxEmailStore) ? "Parsing " + folder.getName() + " (can take a while)..." : "Reading " + folder.getName() + "..."); // bulk fetch of all message headers int n = messages.length; // eliminate any messages the archive already has messages = removeMessagesAlreadyInArchive(archive, messages); log.info(n - messages.length + " message(s) already in the archive"); ArrayList<EmailDocument> emails = new ArrayList<EmailDocument>(); // for performance, we need to do bulk prefetches, instead of fetching 1 message at a time // prefetchedMessages will be a temp cache of prefetched messages int first_i_prefetched = -1, last_i_prefetched = -1; List<?> prefetchedMessages = null; // the type of this can be either list<string> if text only, otherwise list<mimemmessage> long highestUID = archive.getLastUIDForFolder(fetchedFolderInfo.accountKey, fetchedFolderInfo.longName); long lastAssignedUID = highestUID; boolean bodyTextOnly = !fetchConfig.downloadAttachments; try { archive.openForWrite(); for (int i = 0; i < messages.length; i++) { // critical step: (thanks, yourkit!) // null out the ref to the previous message, otherwise it stays in memory, and the heap effectively needs to be as big as the size of all messages if (i > 0) messages[i - 1] = null; if (isCancelled) break; Message m = messages[i]; MimeMessage mm = (MimeMessage) m; if (i >= last_i_prefetched) { // critical perf. step: do a bulk imap prefetch // the prefetch will fetch as many messages as possible up to a max buffer size, and return the messages prefetched // last_i_prefetched tracks what is the last index into idxs that we have prefetched. // when we run out of prefetched messages, we do another bulk prefetch prefetchedMessages = do_imap_prefetch(messages, i, folder, bodyTextOnly); if (prefetchedMessages != null) { first_i_prefetched = i; last_i_prefetched = i + prefetchedMessages.size(); } } int pctDone = ((i + offset) * 100) / totalMessages; long elapsedMillis = System.currentTimeMillis() - fetchStartTime; long unprocessedSecs = Util.getUnprocessedMessage(i + offset, totalMessages, elapsedMillis); int N_TEASERS = 50; // 50 ok here, because it takes a long time to fetch and process messages, so teaser computation is relatively not expensive int nTriesForThisMessage = 0; currentStatus = getStatusJSONWithTeasers( "Reading " + Util.commatize(totalMessages) + " messages from " + folder.getName() + "...", pctDone, elapsedMillis / 1000, unprocessedSecs, emails, N_TEASERS); int messageNum = mm.getMessageNumber(); try { long unique_id; // if we have uid, that's even better // don't use uid's for mbox, it has a bug and always gives -1 // see http://james.apache.org/server/rfclist/imap4/rfc2060.txt for uid spec if (folder instanceof UIDFolder && !(emailStore instanceof MboxEmailStore)) { long uid = ((UIDFolder) folder).getUID(m); unique_id = uid; } else unique_id = lastAssignedUID + 1 + i + offset; // +1 since i starts from 0 (but lastAssignedUID can be -1 -- is that safe? -sgh) if (unique_id > highestUID) highestUID = unique_id; String unique_id_as_string = Long.toString(unique_id); // well, we already converted to emaildoc above during removeMessagesAlreadyInArchive // not a serious perf. concern now, but revisit if needed EmailDocument ed = convertToEmailDocument(mm, unique_id_as_string); // this messageNum is mostly for debugging, it should not be used for equals etc. // need to check this again, because there might be duplicates such within the set we are currently processing. if (archive.containsDoc(ed)) { stats.nMessagesAlreadyPresent++; dataErrors.add("Duplicate message: " + ed); // note: report.jsp depends on this specific string continue; } MimeMessage originalMessage = mm; // this is the mm that has all the headers etc. List<Blob> attachmentsList = new ArrayList<Blob>(); // if we already have it prefetched, use the prefetched version List<String> contents = null; if (first_i_prefetched >= 0 && prefetchedMessages != null) { if (!fetchConfig.downloadAttachments) { // text only means the prefetchedMessages are stored directly as a list of strings String content = (String) prefetchedMessages.get(i - first_i_prefetched); // note: this_mm only has the prefetched content, but not the headers contents = new ArrayList<String>(); try { // a special for yahoo which routinely uses quoted-printable. content looks like =0A0D.... = etc. if (mm.isMimeType("multipart/alternative")) { Multipart mm_mp = (Multipart) mm.getContent(); Part p0 = mm_mp.getBodyPart(0); if (p0 instanceof com.sun.mail.imap.IMAPBodyPart) { String encoding = ((com.sun.mail.imap.IMAPBodyPart) p0).getEncoding(); if ("quoted-printable".equals(encoding)) { content = new String( Util.getBytesFromStream(javax.mail.internet.MimeUtility.decode( new java.io.ByteArrayInputStream(content.getBytes()), "quoted-printable"))); } } } } catch (Exception e) { Util.print_exception("Error trying to parse encoding of multipart", e, log); } contents.add(content); } else { // subtle issue here: the contentType of the prefetchedMessage needs to be be set to the original_mm's content-type. // this was found for cases where the original message is multipart-alternative with a text and html part. // if we don't set prefetchedMessage's content type, it gets a mime type of text/plain and a body = the entire multipart including both parts. // found on sgh's sent mail w/subject: "text to add in help" from Fri, 7 Jun 2013 MimeMessage prefetchedMessage = (MimeMessage) prefetchedMessages .get(i - first_i_prefetched); String contentTypeHeaders[] = originalMessage.getHeader("Content-Type"); String contentTypeHeader = null; if (contentTypeHeaders != null && contentTypeHeaders.length == 1) contentTypeHeader = contentTypeHeaders[0]; if (!Util.nullOrEmpty(contentTypeHeader)) // we do care about body structure, hang on to it prefetchedMessage.setHeader("Content-Type", contentTypeHeader); mm = prefetchedMessage; } prefetchedMessages.set(i - first_i_prefetched, null); // null out to save memory } if (contents == null) contents = processMessagePart(messageNum, originalMessage, mm, attachmentsList); // if mm is not prefetched, it is the same as original_mm // will also work, but will be slow as javamail accesses and fetches each mm separately, instead of using the bulk prefetched version // even when prefetched, the processMessagePart is somewhat expensive because the attachments have to be extracted etc. // we could overlap processMessagePart with do_imap_prefetch by prefetching in a separate thread, since prefetch is network limited. // but profiling shows processMessagePart takes only 1/4th the time of do_imap_prefetch so overlapping would be a relatively small gain. // not worth the effort right now. ed.attachments = attachmentsList; if (fetchConfig.downloadAttachments) ed.attachmentsYetToBeDownloaded = false; // we've already downloaded our attachments // concat all the contents parts StringBuilder sb = new StringBuilder(); for (String s : contents) { sb.append(s); sb.append("\n"); } String contentStr = sb.toString(); if (!messageLooksOk(contentStr)) { dataErrors.add("Skipping message as it seems to have very long words: " + ed); continue; } if (contentStr.length() > Config.MAX_TEXT_SIZE_TO_ANNOTATE) { dataErrors.add("Skipping message as it seems to be very long: " + contentStr.length() + " chars, while the max size message that will be annotated for display is " + Config.MAX_TEXT_SIZE_TO_ANNOTATE + " chars. Message = " + ed); // but we continue, don't skip the message entirely. See issue #111 } contentStr = IndexUtils.normalizeNewlines(contentStr); // just get rid of \r's archive.addDoc(ed, contentStr); List<LinkInfo> linkList = new ArrayList<LinkInfo>(); // linkList might be used only for slant IndexUtils.populateDocLinks(ed, contentStr, linkList, true); ed.links = linkList; stats.nMessagesAdded++; } catch (Exception ex) { // sometimes we get unexpected folder closed, so try again boolean retry = false; if (ex instanceof javax.mail.FolderClosedException) { log.warn("Oops, thread " + threadID + " got the folder closed in its face! " + ex.getMessage()); // sometimes we get this exception about folder closed // retry up to 3 times, then give up if (nTriesForThisMessage < 3) { retry = true; log.info("Re-opening email store; attempt #" + (nTriesForThisMessage + 1) + " for message " + i); nTriesForThisMessage++; messages = openFolderAndGetMessages(); fetchHeaders(messages); --i; // adjust the message index n try again } } if (!retry) { // we sometimes see UnsupportedEncodingException with x-utf8utf8 mime type and ParseException // nothing much can be done, just create a dummy doc and add it to the cache nErrors++; stats.nErrors++; EmailDocument ed = new EmailDocument(Integer.toString(messageNum)); log.warn("Exception reading message from " + folder_name() + " Message #" + messageNum + " " + ex.getMessage() + "\n" + Util.stackTrace(ex)); ed.setErrorString(Util.stackTrace(ex)); } } } } catch (Throwable t) { Util.print_exception(t, log); } finally { // if (cancelled && false) // TODO: disable for now as currently only indexes are rolled back and allDocs/blobs are not rolled back in sync yet // archive.rollbackIndexWrites(); // else currentStatus = JSONUtils.getStatusJSON("Saving archive..."); archive.close(); } fetchedFolderInfo.lastSeenUID = highestUID; log.info("at end of fetch, folder info is " + fetchedFolderInfo); log.info("emailfetcher thread completed, archive has " + archive.getAllDocs().size() + " docs"); }