List of usage examples for org.apache.poi.hsmf MAPIMessage getConversationTopic
public String getConversationTopic() throws ChunkNotFoundException
From source file:com.jaeksoft.searchlib.parser.MapiMsgParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException, SearchLibException { MAPIMessage msg = new MAPIMessage(streamLimiter.getNewInputStream()); msg.setReturnNullOnMissingChunk(true); ParserResultItem result = getNewParserResultItem(); try {/*from ww w . ja va2s . c o m*/ result.addField(ParserFieldEnum.email_display_from, msg.getDisplayFrom()); result.addField(ParserFieldEnum.email_display_to, msg.getDisplayTo()); result.addField(ParserFieldEnum.email_display_cc, msg.getDisplayCC()); result.addField(ParserFieldEnum.email_display_bcc, msg.getDisplayBCC()); result.addField(ParserFieldEnum.subject, msg.getSubject()); result.addField(ParserFieldEnum.htmlSource, msg.getHtmlBody()); result.addField(ParserFieldEnum.content, msg.getTextBody()); result.addField(ParserFieldEnum.creation_date, msg.getMessageDate()); result.addField(ParserFieldEnum.email_conversation_topic, msg.getConversationTopic()); RecipientChunks[] recipientChuncksList = msg.getRecipientDetailsChunks(); if (recipientChuncksList != null) { for (RecipientChunks recipientChunks : recipientChuncksList) { result.addField(ParserFieldEnum.email_recipient_name, recipientChunks.getRecipientName()); result.addField(ParserFieldEnum.email_recipient_address, recipientChunks.getRecipientEmailAddress()); } } if (StringUtils.isEmpty(msg.getHtmlBody())) result.langDetection(10000, ParserFieldEnum.content); else result.langDetection(10000, ParserFieldEnum.htmlSource); } catch (ChunkNotFoundException e) { Logging.warn(e); } }
From source file:com.opensearchserver.extractor.parser.MapiMsg.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { MAPIMessage msg = new MAPIMessage(inputStream); msg.setReturnNullOnMissingChunk(true); ParserDocument document = getNewParserDocument(); document.add(FROM, msg.getDisplayFrom()); document.add(RECIPIENT_TO, msg.getDisplayTo()); document.add(RECIPIENT_CC, msg.getDisplayCC()); document.add(RECIPIENT_BCC, msg.getDisplayBCC()); document.add(SUBJECT, msg.getSubject()); document.add(HTML_CONTENT, msg.getHtmlBody()); document.add(PLAIN_CONTENT, msg.getTextBody()); document.add(MESSAGE_DATE, msg.getMessageDate()); document.add(CONVERSATION_TOPIC, msg.getConversationTopic()); if (StringUtils.isEmpty(msg.getHtmlBody())) document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000)); else//from ww w. j a v a 2 s . co m document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000)); // TODO manage attachments }
From source file:com.qwazr.library.poi.MapiMsgParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final MAPIMessage msg = new MAPIMessage(inputStream); msg.setReturnNullOnMissingChunk(true); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); final ParserFieldsBuilder document = resultBuilder.newDocument(); document.add(FROM, msg.getDisplayFrom()); document.add(RECIPIENT_TO, msg.getDisplayTo()); document.add(RECIPIENT_CC, msg.getDisplayCC()); document.add(RECIPIENT_BCC, msg.getDisplayBCC()); document.add(SUBJECT, msg.getSubject()); document.add(HTML_CONTENT, msg.getHtmlBody()); document.add(PLAIN_CONTENT, msg.getTextBody()); document.add(MESSAGE_DATE, msg.getMessageDate()); document.add(CONVERSATION_TOPIC, msg.getConversationTopic()); if (StringUtils.isEmpty(msg.getHtmlBody())) document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000)); else/*ww w . j a v a 2 s . c om*/ document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000)); // TODO manage attachments }
From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java
License:Open Source License
private static String extractInfoSubEmail(MAPIMessage msg, File curDir, Element root, VitamArgument argument, ConfigLoader config) {/*from w w w. j a va 2 s . co m*/ File curPath = null; Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name); Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name); String id = config.addRankId(root); curPath = new File(curDir, "MSG_" + id); //System.out.println("start of "+id); String[] values = new String[Keywords.values().length]; for (int i = 0; i < Keywords.values().length; i++) { values[i] = null; } String[] test = null; try { test = msg.getHeaders(); } catch (ChunkNotFoundException e4) { // TODO Auto-generated catch block e4.printStackTrace(); } int lastRank = -1; for (String string : test) { if (string.startsWith(Keywords.NextOne.name) && lastRank >= 0) { String recv = string.substring(Keywords.NextOne.name.length()); if (values[lastRank] == null) { values[lastRank] = recv; } else { values[lastRank] += (Keywords.Received.ordinal() == lastRank ? "\n" : " ") + recv; } } else { if (string.startsWith(Keywords.Date.name)) { values[Keywords.Date.ordinal()] = string.substring(Keywords.Date.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.XOriginalArrivalTime.name)) { values[Keywords.XOriginalArrivalTime.ordinal()] = string .substring(Keywords.XOriginalArrivalTime.name.length()); int pos = values[Keywords.XOriginalArrivalTime.ordinal()].indexOf(" FILETIME="); if (pos > 0) { values[Keywords.XOriginalArrivalTime .ordinal()] = values[Keywords.XOriginalArrivalTime.ordinal()].substring(0, pos); } lastRank = -1; } else if (string.startsWith(Keywords.MessageId.name)) { values[Keywords.MessageId.ordinal()] = string.substring(Keywords.MessageId.name.length()); values[Keywords.MessageId.ordinal()] = StringUtils .removeChevron( StringUtils.unescapeHTML(values[Keywords.MessageId.ordinal()], true, false)) .trim(); lastRank = -1; } else if (string.startsWith(Keywords.InReplyTo.name)) { String reply = StringUtils.removeChevron(StringUtils .unescapeHTML(string.substring(Keywords.InReplyTo.name.length()), true, false)); if (values[Keywords.InReplyTo.ordinal()] == null) { values[Keywords.InReplyTo.ordinal()] = reply; } else { values[Keywords.InReplyTo.ordinal()] += " " + reply; } lastRank = Keywords.InReplyTo.ordinal(); } else if (string.startsWith(Keywords.Received.name)) { String recv = string.substring(Keywords.Received.name.length()); if (values[Keywords.Received.ordinal()] == null) { values[Keywords.Received.ordinal()] = recv; } else { values[Keywords.Received.ordinal()] += "\n" + recv; } lastRank = Keywords.Received.ordinal(); } else if (string.startsWith(Keywords.From.name)) { values[Keywords.From.ordinal()] = string.substring(Keywords.From.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.To.name)) { if (values[Keywords.To.ordinal()] == null) { values[Keywords.To.ordinal()] = string.substring(Keywords.To.name.length()); } else { values[Keywords.To.ordinal()] += " " + string.substring(Keywords.To.name.length()); } lastRank = Keywords.To.ordinal(); } else if (string.startsWith(Keywords.Cc.name)) { if (values[Keywords.Cc.ordinal()] == null) { values[Keywords.Cc.ordinal()] = string.substring(Keywords.Cc.name.length()); } else { values[Keywords.Cc.ordinal()] += " " + string.substring(Keywords.Cc.name.length()); } lastRank = Keywords.Cc.ordinal(); } else if (string.startsWith(Keywords.Bcc.name)) { if (values[Keywords.Bcc.ordinal()] == null) { values[Keywords.Bcc.ordinal()] = string.substring(Keywords.Bcc.name.length()); } else { values[Keywords.Bcc.ordinal()] += " " + string.substring(Keywords.Bcc.name.length()); } lastRank = Keywords.Bcc.ordinal(); } else if (string.startsWith(Keywords.ReturnPath.name)) { if (values[Keywords.ReturnPath.ordinal()] == null) { values[Keywords.ReturnPath.ordinal()] = string.substring(Keywords.ReturnPath.name.length()); } else { values[Keywords.ReturnPath.ordinal()] += " " + string.substring(Keywords.ReturnPath.name.length()); } lastRank = Keywords.ReturnPath.ordinal(); } else if (string.startsWith(Keywords.Importance.name)) { values[Keywords.Importance.ordinal()] = string.substring(Keywords.Importance.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.Priority.name)) { values[Keywords.Priority.ordinal()] = string.substring(Keywords.Priority.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.XFolder.name)) { values[Keywords.XFolder.ordinal()] = string.substring(Keywords.XFolder.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.XSDOC.name)) { values[Keywords.XSDOC.ordinal()] = string.substring(Keywords.XSDOC.name.length()); lastRank = -1; } else if (string.startsWith(Keywords.Sensitivity.name)) { values[Keywords.Sensitivity.ordinal()] = string.substring(Keywords.Sensitivity.name.length()); lastRank = -1; } else { lastRank = -1; } } } /*for (int i = 0; i < Keywords.values().length; i++) { System.out.println(Keywords.values()[i].name()+": "+values[i]); }*/ if (values[Keywords.XFolder.ordinal()] != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name); sub.addAttribute(EMAIL_FIELDS.folderName.name, values[Keywords.XFolder.ordinal()]); metadata.add(sub); } String fromEmail = values[Keywords.From.ordinal()]; if (fromEmail != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name); addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail); String fromEmail2 = values[Keywords.ReturnPath.ordinal()]; if (fromEmail2 != null && !fromEmail.contains(fromEmail2)) { addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2); } metadata.add(sub); } else { String fromEmail2 = values[Keywords.ReturnPath.ordinal()]; if (fromEmail2 != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name); addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2); metadata.add(sub); } } fromEmail = values[Keywords.To.ordinal()]; if (fromEmail != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name); String[] to = fromEmail.split(","); for (String string2 : to) { addAddress(sub, EMAIL_FIELDS.toUnit.name, string2); } metadata.add(sub); } fromEmail = values[Keywords.Cc.ordinal()]; if (fromEmail != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name); String[] to = fromEmail.split(","); for (String string2 : to) { addAddress(sub, EMAIL_FIELDS.ccUnit.name, string2); } metadata.add(sub); } fromEmail = values[Keywords.Bcc.ordinal()]; if (fromEmail != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name); String[] to = fromEmail.split(","); for (String string2 : to) { addAddress(sub, EMAIL_FIELDS.bccUnit.name, string2); } metadata.add(sub); } String subject = null; try { subject = msg.getSubject(); } catch (ChunkNotFoundException e3) { // TODO Auto-generated catch block e3.printStackTrace(); } if (subject != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name); sub.setText(StringUtils.unescapeHTML(subject, true, false)); metadata.add(sub); } subject = null; try { subject = msg.getConversationTopic(); } catch (ChunkNotFoundException e3) { //System.err.println(e3.getMessage()); } if (subject != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.conversationTopic.name); sub.setText(StringUtils.unescapeHTML(subject, true, false)); metadata.add(sub); } if (values[Keywords.Date.ordinal()] != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name); sub.setText(values[Keywords.Date.ordinal()]); metadata.add(sub); } if (values[Keywords.XOriginalArrivalTime.ordinal()] != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name); sub.setText(values[Keywords.XOriginalArrivalTime.ordinal()]); metadata.add(sub); } if (values[Keywords.Received.ordinal()] != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name); String[] traces = values[Keywords.Received.ordinal()].split("\n"); for (String string : traces) { Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name); sub3.setText(string); sub.add(sub3); } metadata.add(sub); } if (values[Keywords.XSDOC.ordinal()] != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name); sub.setText(values[Keywords.XSDOC.ordinal()]); metadata.add(sub); } String messageId = values[Keywords.MessageId.ordinal()]; if (messageId != null) { messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim(); if (messageId.length() > 1) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name); sub.setText(messageId); metadata.add(sub); } } String InReplyToId = values[Keywords.InReplyTo.ordinal()]; if (InReplyToId != null) { InReplyToId = StringUtils.removeChevron(StringUtils.unescapeHTML(InReplyToId, true, false)).trim(); if (InReplyToId.length() > 1) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name); sub.setText(InReplyToId); if (messageId != null && messageId.length() > 1) { String old = EmlExtract.filEmls.get(InReplyToId); if (old == null) { old = messageId; } else { old += "," + messageId; } EmlExtract.filEmls.put(InReplyToId, old); } metadata.add(sub); } InReplyToId = null; } Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name); String imp = values[Keywords.Importance.ordinal()]; if (imp != null && imp.length() > 0) { try { int Priority = Integer.parseInt(imp); switch (Priority) { case 5: imp = "LOWEST"; break; case 4: imp = "LOW"; break; case 3: imp = "NORMAL"; break; case 2: imp = "HIGH"; break; case 1: imp = "HIGHEST"; break; default: imp = "LEV" + Priority; } } catch (NumberFormatException e) { // ignore since imp will be used as returned } prop.addAttribute(EMAIL_FIELDS.importance.name, imp); } imp = values[Keywords.Priority.ordinal()]; if (imp != null && imp.length() > 0) { try { int Priority = Integer.parseInt(imp); switch (Priority) { case 5: imp = "LOWEST"; break; case 4: imp = "LOW"; break; case 3: imp = "NORMAL"; break; case 2: imp = "HIGH"; break; case 1: imp = "HIGHEST"; break; default: imp = "LEV" + Priority; } } catch (NumberFormatException e) { // ignore since imp will be used as returned } prop.addAttribute(EMAIL_FIELDS.priority.name, imp); } if (values[Keywords.Sensitivity.ordinal()] != null) { prop.addAttribute(EMAIL_FIELDS.sensitivity.name, values[Keywords.Sensitivity.ordinal()]); } AttachmentChunks[] files = msg.getAttachmentFiles(); boolean Attachments = (files != null && files.length > 0); prop.addAttribute(EMAIL_FIELDS.hasAttachment.name, Boolean.toString(Attachments)); metadata.add(prop); String result = ""; Element identification = null; if (Attachments) { File oldPath = curPath; if (config.extractFile) { File newDir = new File(curPath, id); newDir.mkdir(); curPath = newDir; } identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name); // get the number of attachments for this message int NumberOfAttachments = files.length; identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(NumberOfAttachments)); // get a specific attachment from this email. for (int attachmentNumber = 0; attachmentNumber < NumberOfAttachments; attachmentNumber++) { AttachmentChunks attachment = files[attachmentNumber]; if (argument.extractKeyword) { result += " " + extractInfoAttachment(attachment, identification, argument, config, curPath); } else { extractInfoAttachment(attachment, identification, argument, config, curPath); } } curPath = oldPath; } // Plain text e-mail body String body = ""; if (argument.extractKeyword || config.extractFile) { try { body = msg.getTextBody(); } catch (ChunkNotFoundException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } boolean isTxt = true; boolean isHttp = false; if (body == null || body.isEmpty()) { isTxt = false; try { body = msg.getHtmlBody(); } catch (ChunkNotFoundException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } isHttp = true; if (body == null || body.isEmpty()) { isHttp = false; try { body = msg.getRtfBody(); } catch (ChunkNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } if (body != null && !body.isEmpty()) { if (config.extractFile) { // XXX FIXME could saved email from HTML Body (clearer) if possible // use curRank in name, and attachment will be under directory named // add currank in field File newDir = new File(curPath, id); newDir.mkdir(); String filenamebody = messageId; if (filenamebody == null || !filenamebody.isEmpty()) { filenamebody = id; } String html = null; if (isHttp) { html = body; } String rtf = null; if (!isTxt && !isHttp) { rtf = body; } if (isTxt) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".txt")); byte[] bb = body.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } try { html = msg.getHtmlBody(); } catch (ChunkNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (html != null && !html.isEmpty()) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".html")); byte[] bb = html.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } html = null; } if (isTxt || isHttp) { try { rtf = msg.getRtfBody(); } catch (ChunkNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (rtf != null && !rtf.isEmpty()) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".rtf")); byte[] bb = rtf.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } rtf = null; } } } } if (metadata.hasContent()) { root.add(metadata); } if (identification != null && identification.hasContent()) { root.add(identification); } if (argument.extractKeyword) { result = body + " " + result; body = null; ExtractInfo.exportMetadata(keywords, result, "", config, null); if (keywords.hasContent()) { root.add(keywords); } } root.addAttribute(EMAIL_FIELDS.status.name, "ok"); //System.out.println("end of "+id); return result; }
From source file:org.alfresco.repo.content.transform.MSGParser.java
License:Apache License
/** * Process header./*from w ww . ja v a 2s .c o m*/ * * @param msg * the msg * @param metadata * the metadata * @param xhtml * the xhtml * @throws Exception * the exception */ private void processHeader(MAPIMessage msg, Metadata metadata, XHTMLContentHandler xhtml) throws Exception { StringChunk subjectChunk = msg.getMainChunks().subjectChunk; if (msg.has7BitEncodingStrings()) { CharsetDetector detector = new CharsetDetector(); detector.setText(subjectChunk.getRawValue()); CharsetMatch detect = detector.detect(); if (detect.getConfidence() >= 20) { subjectChunk.set7BitEncoding(detect.getName()); } } String subject = subjectChunk.getValue(); String from = msg.getDisplayFrom(); metadata.set(DublinCore.CREATOR, from); metadata.set(Metadata.MESSAGE_FROM, from); metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo()); metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC()); metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC()); metadata.set(DublinCore.TITLE, subject); metadata.set(DublinCore.SUBJECT, msg.getConversationTopic()); try { for (String recipientAddress : msg.getRecipientEmailAddressList()) { if (recipientAddress != null) metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, recipientAddress); } } catch (ChunkNotFoundException he) { } // Will be fixed in POI 3.7 Final // Date - try two ways to find it // First try via the proper chunk if (msg.getMessageDate() != null) { metadata.set(DublinCore.DATE, msg.getMessageDate().getTime()); metadata.set(Office.CREATION_DATE, msg.getMessageDate().getTime()); metadata.set(Office.SAVE_DATE, msg.getMessageDate().getTime()); } else { try { // Failing that try via the raw headers String[] headers = msg.getHeaders(); if (headers != null && headers.length > 0) { for (String header : headers) { if (header.toLowerCase().startsWith("date:")) { String date = header.substring(header.indexOf(':') + 1).trim(); // See if we can parse it as a normal mail date try { Date d = MboxParser.parseDate(date); metadata.set(DublinCore.DATE, d); metadata.set(Office.CREATION_DATE, d); metadata.set(Office.SAVE_DATE, d); } catch (ParseException e) { // Store it as-is, and hope for the best... metadata.set(DublinCore.DATE, date); metadata.set(Office.CREATION_DATE, date); metadata.set(Office.SAVE_DATE, date); } break; } } } } catch (ChunkNotFoundException he) { // We can't find the date, sorry... } } xhtml.element("h1", subject); // Output the from and to details in text, as you // often want them in text form for searching xhtml.startElement("dl"); if (from != null) { header(xhtml, "From", from); } header(xhtml, "To", msg.getDisplayTo()); header(xhtml, "Cc", msg.getDisplayCC()); header(xhtml, "Bcc", msg.getDisplayBCC()); try { header(xhtml, "Recipients", msg.getRecipientEmailAddress()); } catch (ChunkNotFoundException e) { } List<String> attachmentList = new ArrayList<String>(); // // prepare attachments prepareExtractMultipart(xhtml, message, attachmentList); if (attachmentList.size() > 0) { header(xhtml, "Attachments", attachmentList.toString()); } xhtml.endElement("dl"); }