List of usage examples for org.apache.poi.hsmf MAPIMessage MAPIMessage
public MAPIMessage(DirectoryNode poifsDir) throws IOException
From source file:com.github.n_i_e.dirtreedb.MsgLister.java
License:Apache License
@Override protected PathEntry getNext() throws IOException { try {// www.j ava2 s .c om if (content == null) { MAPIMessage msg; msg = new MAPIMessage(inf); date = msg.getMessageDate() == null ? 0L : msg.getMessageDate().getTimeInMillis(); subject = msg.getSubject(); String s = subjectToFilename(subject, "text/plain", 1); s = s.replace("\\", "/"); Assertion.assertAssertionError(!s.equals("")); PathEntry next_entry = new PathEntry(getBasePath().getPath() + "/" + s, PathEntry.COMPRESSEDFILE); next_entry.setDateLastModified(date); next_entry.setStatus(PathEntry.DIRTY); byte[] body = getByteArrayWithBom(msg.getTextBody()); next_entry.setCompressedSize(body.length); next_entry.setSize(next_entry.getSize()); instream = new ByteArrayInputStreamWithCascadingClose(body); if (isCsumRequested()) { next_entry.setCsum(instream); } content = msg.getAttachmentFiles(); return next_entry; } else { AttachmentChunks part = null; byte[] data = null; while (data == null) { if (count >= content.length) { return null; } part = content[count]; try { data = part.attachData.getValue(); } catch (NullPointerException e) { data = null; count++; } } String filename; try { filename = part.attachFileName.getValue(); } catch (NullPointerException e) { filename = String.valueOf(count); } filename = filename.replace("\\", "/"); if (filename.equals("")) { filename = String.valueOf(count); } PathEntry next_entry = new PathEntry(getBasePath().getPath() + "/" + filename, PathEntry.COMPRESSEDFILE); next_entry.setDateLastModified(date); next_entry.setStatus(PathEntry.DIRTY); next_entry.setCompressedSize(data.length); next_entry.setSize(data.length); instream = new ByteArrayInputStreamWithCascadingClose(data); if (isCsumRequested()) { next_entry.setCsumAndClose(instream); } count++; return next_entry; } } catch (ChunkNotFoundException e) { return null; } }
From source file:com.jaeksoft.searchlib.parser.MapiMsgParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException, SearchLibException { MAPIMessage msg = new MAPIMessage(streamLimiter.getNewInputStream()); msg.setReturnNullOnMissingChunk(true); ParserResultItem result = getNewParserResultItem(); try {//www . ja v a2s. c o m result.addField(ParserFieldEnum.email_display_from, msg.getDisplayFrom()); result.addField(ParserFieldEnum.email_display_to, msg.getDisplayTo()); result.addField(ParserFieldEnum.email_display_cc, msg.getDisplayCC()); result.addField(ParserFieldEnum.email_display_bcc, msg.getDisplayBCC()); result.addField(ParserFieldEnum.subject, msg.getSubject()); result.addField(ParserFieldEnum.htmlSource, msg.getHtmlBody()); result.addField(ParserFieldEnum.content, msg.getTextBody()); result.addField(ParserFieldEnum.creation_date, msg.getMessageDate()); result.addField(ParserFieldEnum.email_conversation_topic, msg.getConversationTopic()); RecipientChunks[] recipientChuncksList = msg.getRecipientDetailsChunks(); if (recipientChuncksList != null) { for (RecipientChunks recipientChunks : recipientChuncksList) { result.addField(ParserFieldEnum.email_recipient_name, recipientChunks.getRecipientName()); result.addField(ParserFieldEnum.email_recipient_address, recipientChunks.getRecipientEmailAddress()); } } if (StringUtils.isEmpty(msg.getHtmlBody())) result.langDetection(10000, ParserFieldEnum.content); else result.langDetection(10000, ParserFieldEnum.htmlSource); } catch (ChunkNotFoundException e) { Logging.warn(e); } }
From source file:com.lp.client.util.OutlookToJCR.java
License:Open Source License
@Override protected List<JCRDocDto> createJCRImpl(File file, JCRDocDto jcr) throws IOException { List<JCRDocDto> jcrs = new ArrayList<JCRDocDto>(); MAPIMessage msg = new MAPIMessage(file.getPath()); jcr.setbData(Helper.getBytesFromFile(file)); try {//from ww w . j a v a 2s . co m jcr.setsName(msg.getSubject() == null || msg.getSubject().isEmpty() ? "Email" : msg.getSubject()); StringBuffer schlagworte = new StringBuffer(); schlagworte.append(msg.getDisplayFrom()); schlagworte.append(" TO "); schlagworte.append(msg.getDisplayTo()); jcr.setsSchlagworte(schlagworte.toString()); } catch (ChunkNotFoundException e) { e.printStackTrace(); return null; } jcr.setDocPath(new DocPath().add(new DocNodeMail(jcr.getsName()))); jcrs.addAll(getAttachments(msg, jcr)); jcr.getDocPath().add(new DocNodeFile("original")); jcrs.add(jcr); return jcrs; }
From source file:com.openkm.extractor.MsOutlookTextExtractor.java
License:Open Source License
/** * {@inheritDoc} Returns an empty reader if an error occured extracting text from * the outlook message./* www . j ava 2 s . c o m*/ */ public String extractText(InputStream stream, String type, String encoding) throws IOException { try { MAPIMessage message = new MAPIMessage(stream); StringBuffer buffer = new StringBuffer(); buffer.append(message.getDisplayFrom()).append('\n'); buffer.append(message.getDisplayTo()).append('\n'); buffer.append(message.getSubject()).append('\n'); buffer.append(message.getTextBody()); return buffer.toString(); } catch (Exception e) { logger.warn("Failed to extract Message content", e); throw new IOException(e.getMessage(), e); } finally { stream.close(); } }
From source file:com.opensearchserver.extractor.parser.MapiMsg.java
License:Apache License
@Override protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception { MAPIMessage msg = new MAPIMessage(inputStream); msg.setReturnNullOnMissingChunk(true); ParserDocument document = getNewParserDocument(); document.add(FROM, msg.getDisplayFrom()); document.add(RECIPIENT_TO, msg.getDisplayTo()); document.add(RECIPIENT_CC, msg.getDisplayCC()); document.add(RECIPIENT_BCC, msg.getDisplayBCC()); document.add(SUBJECT, msg.getSubject()); document.add(HTML_CONTENT, msg.getHtmlBody()); document.add(PLAIN_CONTENT, msg.getTextBody()); document.add(MESSAGE_DATE, msg.getMessageDate()); document.add(CONVERSATION_TOPIC, msg.getConversationTopic()); if (StringUtils.isEmpty(msg.getHtmlBody())) document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000)); else/* w w w . j a va2 s .c o m*/ document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000)); // TODO manage attachments }
From source file:com.qwazr.library.poi.MapiMsgParser.java
License:Apache License
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final MAPIMessage msg = new MAPIMessage(inputStream); msg.setReturnNullOnMissingChunk(true); final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]); final ParserFieldsBuilder document = resultBuilder.newDocument(); document.add(FROM, msg.getDisplayFrom()); document.add(RECIPIENT_TO, msg.getDisplayTo()); document.add(RECIPIENT_CC, msg.getDisplayCC()); document.add(RECIPIENT_BCC, msg.getDisplayBCC()); document.add(SUBJECT, msg.getSubject()); document.add(HTML_CONTENT, msg.getHtmlBody()); document.add(PLAIN_CONTENT, msg.getTextBody()); document.add(MESSAGE_DATE, msg.getMessageDate()); document.add(CONVERSATION_TOPIC, msg.getConversationTopic()); if (StringUtils.isEmpty(msg.getHtmlBody())) document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000)); else//from ww w. j a va 2s.c om document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000)); // TODO manage attachments }
From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java
License:Open Source License
/** * Try to extract the following :/* w w w . java2 s .co m*/ * * Taken from : http://www.significantproperties.org.uk/email-testingreport.html * * message-id (Message-ID), References (References), In-Reply-To (In-Reply-To), Attachment * subject (Subject), keywords sent-date (Date), Received-date (in Received last date), * Trace-field (Received?) * * * From (From), To (To), CC (Cc), BCC (Bcc), Content-Type, Content-Transfer-Encoding * * ? DomainKey-Signature, Sender, X-Original-Sender, X-Forwarded-Message-Id, * * 1) Core property set * * The core property set indicates the minimum amount of information that is considered * necessary to establish the authenticity and integrity of the email message * * Local-part, Domain-part, Relationship, Subject, Trace-field , Message body with no mark-up, * Attachments * * 2) Message thread scenario * * Email is frequently used as a communication method between two or more people. To understand * the context in which a message was created it may be necessary to refer to earlier messages. * To identify the thread of a discussion, the following fields should be provided, in addition * to the core property set: * * Local-part, Domain-part, Relationship, Subject, Trace-field, Message body with no mark-up, * Attachments, Message-ID, References * * 3) Recommended property set * * The recommended property set indicates additional information that should be provided in an * ideal scenario, if it is present within the email. The list * * Local-part, Domain-part, Domain-literal (if present), Relationship, Subject, Trace-field, * Attachments, Message-ID, References, Sent-date, Received date, Display name, In-reply-to, * Keywords, Message body & associated mark-up (see table 6 for scenarios) * * * * @param msgFile * @param filename * @param argument * @param config * @return */ public static Element extractInfoEmail(File msgFile, String filename, VitamArgument argument, ConfigLoader config) { File oldDir = argument.currentOutputDir; if (argument.currentOutputDir == null) { if (config.outputDir != null) { argument.currentOutputDir = new File(config.outputDir); } else { argument.currentOutputDir = new File(msgFile.getParentFile().getAbsolutePath()); } } Element root = XmlDom.factory.createElement(EMAIL_FIELDS.formatMSG.name); try { //System.out.println("msg: "+msgFile.getAbsolutePath()); MAPIMessage msg = new MAPIMessage(msgFile.getAbsolutePath()); extractInfoSubEmail(msg, argument.currentOutputDir, root, argument, config); } catch (UnsupportedOperationException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); e.printStackTrace(); String status = "Error during identification"; root.addAttribute(EMAIL_FIELDS.status.name, status); } catch (IOException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); e.printStackTrace(); String status = "Error during identification"; root.addAttribute(EMAIL_FIELDS.status.name, status); } argument.currentOutputDir = oldDir; return root; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.OutlookExtractor.java
License:Apache License
public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException { super(context); try {//from w w w. j a v a 2 s . c o m this.msg = new MAPIMessage(root); } catch (IOException e) { throw new TikaException("Failed to parse Outlook message", e); } }
From source file:net.lifove.research.utils.Msg2txt.java
License:Apache License
public Msg2txt(String fileName) throws IOException { fileNameStem = fileName;//from w ww. j av a 2s . c o m if (fileNameStem.endsWith(".msg") || fileNameStem.endsWith(".MSG")) { fileNameStem = fileNameStem.substring(0, fileNameStem.length() - 4); } msg = new MAPIMessage(fileName); }
From source file:org.alfresco.repo.content.transform.MSGParser.java
License:Apache License
/** * Extracts properties and text from an Msg Document input stream. * * @param stream/*from ww w .j a va 2s . c o m*/ * the stream * @param handler * the handler * @param metadata * the metadata * @param context * the context * @throws IOException * Signals that an I/O exception has occurred. * @throws SAXException * the sAX exception * @throws TikaException * the tika exception */ @Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); try { this.message = new MAPIMessage(new NPOIFSFileSystem(stream)); message.setReturnNullOnMissingChunk(true); // // If the message contains strings that aren't stored // // as Unicode, try to sort out an encoding for them if (message.has7BitEncodingStrings()) { if (message.getHeaders() != null) { // There's normally something in the headers message.guess7BitEncoding(); encoding = "utf-7"; } else { // Nothing in the header, try encoding detection // on the message body StringChunk text = message.getMainChunks().textBodyChunk; if (text != null) { CharsetDetector detector = new CharsetDetector(); detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match.getConfidence() > 35) { message.set7BitEncoding(match.getName()); encoding = match.getName(); } } } } else { encoding = UTF_8; } processHeader(message, metadata, xhtml); // real work. adaptedExtractMultipart(xhtml, message, context); xhtml.endDocument(); } catch (Exception e) { throw new TikaException("Error while processing message", e); } }