Example usage for org.apache.poi.hsmf MAPIMessage MAPIMessage

List of usage examples for org.apache.poi.hsmf MAPIMessage MAPIMessage

Introduction

In this page you can find the example usage for org.apache.poi.hsmf MAPIMessage MAPIMessage.

Prototype

public MAPIMessage(DirectoryNode poifsDir) throws IOException 

Source Link

Document

Constructor for reading MSG Files from a certain point within a POIFS filesystem

Usage

From source file:com.github.n_i_e.dirtreedb.MsgLister.java

License:Apache License

@Override
protected PathEntry getNext() throws IOException {
    try {// www.j ava2  s  .c om
        if (content == null) {
            MAPIMessage msg;
            msg = new MAPIMessage(inf);

            date = msg.getMessageDate() == null ? 0L : msg.getMessageDate().getTimeInMillis();
            subject = msg.getSubject();

            String s = subjectToFilename(subject, "text/plain", 1);
            s = s.replace("\\", "/");
            Assertion.assertAssertionError(!s.equals(""));
            PathEntry next_entry = new PathEntry(getBasePath().getPath() + "/" + s, PathEntry.COMPRESSEDFILE);
            next_entry.setDateLastModified(date);
            next_entry.setStatus(PathEntry.DIRTY);
            byte[] body = getByteArrayWithBom(msg.getTextBody());
            next_entry.setCompressedSize(body.length);
            next_entry.setSize(next_entry.getSize());

            instream = new ByteArrayInputStreamWithCascadingClose(body);
            if (isCsumRequested()) {
                next_entry.setCsum(instream);
            }
            content = msg.getAttachmentFiles();
            return next_entry;
        } else {
            AttachmentChunks part = null;
            byte[] data = null;
            while (data == null) {
                if (count >= content.length) {
                    return null;
                }
                part = content[count];
                try {
                    data = part.attachData.getValue();
                } catch (NullPointerException e) {
                    data = null;
                    count++;
                }
            }

            String filename;
            try {
                filename = part.attachFileName.getValue();
            } catch (NullPointerException e) {
                filename = String.valueOf(count);
            }
            filename = filename.replace("\\", "/");
            if (filename.equals("")) {
                filename = String.valueOf(count);
            }

            PathEntry next_entry = new PathEntry(getBasePath().getPath() + "/" + filename,
                    PathEntry.COMPRESSEDFILE);
            next_entry.setDateLastModified(date);
            next_entry.setStatus(PathEntry.DIRTY);
            next_entry.setCompressedSize(data.length);
            next_entry.setSize(data.length);

            instream = new ByteArrayInputStreamWithCascadingClose(data);
            if (isCsumRequested()) {
                next_entry.setCsumAndClose(instream);
            }
            count++;
            return next_entry;
        }
    } catch (ChunkNotFoundException e) {
        return null;
    }
}

From source file:com.jaeksoft.searchlib.parser.MapiMsgParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang)
        throws IOException, SearchLibException {
    MAPIMessage msg = new MAPIMessage(streamLimiter.getNewInputStream());
    msg.setReturnNullOnMissingChunk(true);
    ParserResultItem result = getNewParserResultItem();
    try {//www  .  ja  v  a2s.  c o m
        result.addField(ParserFieldEnum.email_display_from, msg.getDisplayFrom());
        result.addField(ParserFieldEnum.email_display_to, msg.getDisplayTo());
        result.addField(ParserFieldEnum.email_display_cc, msg.getDisplayCC());
        result.addField(ParserFieldEnum.email_display_bcc, msg.getDisplayBCC());
        result.addField(ParserFieldEnum.subject, msg.getSubject());
        result.addField(ParserFieldEnum.htmlSource, msg.getHtmlBody());
        result.addField(ParserFieldEnum.content, msg.getTextBody());
        result.addField(ParserFieldEnum.creation_date, msg.getMessageDate());
        result.addField(ParserFieldEnum.email_conversation_topic, msg.getConversationTopic());
        RecipientChunks[] recipientChuncksList = msg.getRecipientDetailsChunks();
        if (recipientChuncksList != null) {
            for (RecipientChunks recipientChunks : recipientChuncksList) {
                result.addField(ParserFieldEnum.email_recipient_name, recipientChunks.getRecipientName());
                result.addField(ParserFieldEnum.email_recipient_address,
                        recipientChunks.getRecipientEmailAddress());
            }
        }
        if (StringUtils.isEmpty(msg.getHtmlBody()))
            result.langDetection(10000, ParserFieldEnum.content);
        else
            result.langDetection(10000, ParserFieldEnum.htmlSource);
    } catch (ChunkNotFoundException e) {
        Logging.warn(e);
    }
}

From source file:com.lp.client.util.OutlookToJCR.java

License:Open Source License

@Override
protected List<JCRDocDto> createJCRImpl(File file, JCRDocDto jcr) throws IOException {
    List<JCRDocDto> jcrs = new ArrayList<JCRDocDto>();
    MAPIMessage msg = new MAPIMessage(file.getPath());

    jcr.setbData(Helper.getBytesFromFile(file));
    try {//from  ww  w . j a  v a  2s .  co m
        jcr.setsName(msg.getSubject() == null || msg.getSubject().isEmpty() ? "Email" : msg.getSubject());
        StringBuffer schlagworte = new StringBuffer();
        schlagworte.append(msg.getDisplayFrom());
        schlagworte.append(" TO ");
        schlagworte.append(msg.getDisplayTo());

        jcr.setsSchlagworte(schlagworte.toString());

    } catch (ChunkNotFoundException e) {
        e.printStackTrace();
        return null;
    }

    jcr.setDocPath(new DocPath().add(new DocNodeMail(jcr.getsName())));

    jcrs.addAll(getAttachments(msg, jcr));

    jcr.getDocPath().add(new DocNodeFile("original"));

    jcrs.add(jcr);

    return jcrs;
}

From source file:com.openkm.extractor.MsOutlookTextExtractor.java

License:Open Source License

/**
 * {@inheritDoc} Returns an empty reader if an error occured extracting text from
 * the outlook message./*  www . j  ava  2  s  .  c  o m*/
 */
public String extractText(InputStream stream, String type, String encoding) throws IOException {
    try {
        MAPIMessage message = new MAPIMessage(stream);
        StringBuffer buffer = new StringBuffer();
        buffer.append(message.getDisplayFrom()).append('\n');
        buffer.append(message.getDisplayTo()).append('\n');
        buffer.append(message.getSubject()).append('\n');
        buffer.append(message.getTextBody());
        return buffer.toString();
    } catch (Exception e) {
        logger.warn("Failed to extract Message content", e);
        throw new IOException(e.getMessage(), e);
    } finally {
        stream.close();
    }
}

From source file:com.opensearchserver.extractor.parser.MapiMsg.java

License:Apache License

@Override
protected void parseContent(InputStream inputStream, String extension, String mimeType) throws Exception {
    MAPIMessage msg = new MAPIMessage(inputStream);
    msg.setReturnNullOnMissingChunk(true);

    ParserDocument document = getNewParserDocument();

    document.add(FROM, msg.getDisplayFrom());
    document.add(RECIPIENT_TO, msg.getDisplayTo());
    document.add(RECIPIENT_CC, msg.getDisplayCC());
    document.add(RECIPIENT_BCC, msg.getDisplayBCC());
    document.add(SUBJECT, msg.getSubject());
    document.add(HTML_CONTENT, msg.getHtmlBody());
    document.add(PLAIN_CONTENT, msg.getTextBody());
    document.add(MESSAGE_DATE, msg.getMessageDate());
    document.add(CONVERSATION_TOPIC, msg.getConversationTopic());

    if (StringUtils.isEmpty(msg.getHtmlBody()))
        document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000));
    else/*  w w w  .  j a  va2 s  .c  o m*/
        document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000));

    // TODO manage attachments
}

From source file:com.qwazr.library.poi.MapiMsgParser.java

License:Apache License

@Override
public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
        final String extension, final String mimeType, final ParserResultBuilder resultBuilder)
        throws Exception {

    final MAPIMessage msg = new MAPIMessage(inputStream);
    msg.setReturnNullOnMissingChunk(true);

    final ParserFieldsBuilder metas = resultBuilder.metas();
    metas.set(MIME_TYPE, DEFAULT_MIMETYPES[0]);

    final ParserFieldsBuilder document = resultBuilder.newDocument();

    document.add(FROM, msg.getDisplayFrom());
    document.add(RECIPIENT_TO, msg.getDisplayTo());
    document.add(RECIPIENT_CC, msg.getDisplayCC());
    document.add(RECIPIENT_BCC, msg.getDisplayBCC());
    document.add(SUBJECT, msg.getSubject());
    document.add(HTML_CONTENT, msg.getHtmlBody());
    document.add(PLAIN_CONTENT, msg.getTextBody());
    document.add(MESSAGE_DATE, msg.getMessageDate());
    document.add(CONVERSATION_TOPIC, msg.getConversationTopic());

    if (StringUtils.isEmpty(msg.getHtmlBody()))
        document.add(LANG_DETECTION, languageDetection(document, PLAIN_CONTENT, 10000));
    else//from  ww w. j  a  va  2s.c  om
        document.add(LANG_DETECTION, languageDetection(document, HTML_CONTENT, 10000));

    // TODO manage attachments
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

/**
 * Try to extract the following :/* w w  w  .  java2 s  .co  m*/
 * 
 * Taken from : http://www.significantproperties.org.uk/email-testingreport.html
 * 
 * message-id (Message-ID), References (References), In-Reply-To (In-Reply-To), Attachment
 * subject (Subject), keywords sent-date (Date), Received-date (in Received last date),
 * Trace-field (Received?)
 * 
 * 
 * From (From), To (To), CC (Cc), BCC (Bcc), Content-Type, Content-Transfer-Encoding
 * 
 * ? DomainKey-Signature, Sender, X-Original-Sender, X-Forwarded-Message-Id,
 * 
 * 1) Core property set
 * 
 * The core property set indicates the minimum amount of information that is considered
 * necessary to establish the authenticity and integrity of the email message
 * 
 * Local-part, Domain-part, Relationship, Subject, Trace-field , Message body with no mark-up,
 * Attachments
 * 
 * 2) Message thread scenario
 * 
 * Email is frequently used as a communication method between two or more people. To understand
 * the context in which a message was created it may be necessary to refer to earlier messages.
 * To identify the thread of a discussion, the following fields should be provided, in addition
 * to the core property set:
 * 
 * Local-part, Domain-part, Relationship, Subject, Trace-field, Message body with no mark-up,
 * Attachments, Message-ID, References
 * 
 * 3) Recommended property set
 * 
 * The recommended property set indicates additional information that should be provided in an
 * ideal scenario, if it is present within the email. The list
 * 
 * Local-part, Domain-part, Domain-literal (if present), Relationship, Subject, Trace-field,
 * Attachments, Message-ID, References, Sent-date, Received date, Display name, In-reply-to,
 * Keywords, Message body & associated mark-up (see table 6 for scenarios)
 * 
 * 
 * 
 * @param msgFile
 * @param filename
 * @param argument
 * @param config
 * @return
 */
public static Element extractInfoEmail(File msgFile, String filename, VitamArgument argument,
        ConfigLoader config) {
    File oldDir = argument.currentOutputDir;
    if (argument.currentOutputDir == null) {
        if (config.outputDir != null) {
            argument.currentOutputDir = new File(config.outputDir);
        } else {
            argument.currentOutputDir = new File(msgFile.getParentFile().getAbsolutePath());
        }
    }
    Element root = XmlDom.factory.createElement(EMAIL_FIELDS.formatMSG.name);
    try {
        //System.out.println("msg: "+msgFile.getAbsolutePath());
        MAPIMessage msg = new MAPIMessage(msgFile.getAbsolutePath());
        extractInfoSubEmail(msg, argument.currentOutputDir, root, argument, config);
    } catch (UnsupportedOperationException e) {
        System.err.println(StaticValues.LBL.error_error.get() + e.toString());
        e.printStackTrace();
        String status = "Error during identification";
        root.addAttribute(EMAIL_FIELDS.status.name, status);
    } catch (IOException e) {
        System.err.println(StaticValues.LBL.error_error.get() + e.toString());
        e.printStackTrace();
        String status = "Error during identification";
        root.addAttribute(EMAIL_FIELDS.status.name, status);
    }
    argument.currentOutputDir = oldDir;
    return root;
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.OutlookExtractor.java

License:Apache License

public OutlookExtractor(DirectoryNode root, ParseContext context) throws TikaException {
    super(context);

    try {//from w  w w.  j a v  a 2  s  .  c o m
        this.msg = new MAPIMessage(root);
    } catch (IOException e) {
        throw new TikaException("Failed to parse Outlook message", e);
    }
}

From source file:net.lifove.research.utils.Msg2txt.java

License:Apache License

public Msg2txt(String fileName) throws IOException {
    fileNameStem = fileName;//from w  ww.  j  av a  2s  .  c  o m
    if (fileNameStem.endsWith(".msg") || fileNameStem.endsWith(".MSG")) {
        fileNameStem = fileNameStem.substring(0, fileNameStem.length() - 4);
    }
    msg = new MAPIMessage(fileName);
}

From source file:org.alfresco.repo.content.transform.MSGParser.java

License:Apache License

/**
 * Extracts properties and text from an Msg Document input stream.
 *
 * @param stream/*from  ww  w  .j  a  va 2s . c o  m*/
 *            the stream
 * @param handler
 *            the handler
 * @param metadata
 *            the metadata
 * @param context
 *            the context
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 * @throws SAXException
 *             the sAX exception
 * @throws TikaException
 *             the tika exception
 */
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();

    try {
        this.message = new MAPIMessage(new NPOIFSFileSystem(stream));
        message.setReturnNullOnMissingChunk(true);
        // // If the message contains strings that aren't stored
        // // as Unicode, try to sort out an encoding for them
        if (message.has7BitEncodingStrings()) {
            if (message.getHeaders() != null) {
                // There's normally something in the headers
                message.guess7BitEncoding();
                encoding = "utf-7";
            } else {
                // Nothing in the header, try encoding detection
                // on the message body
                StringChunk text = message.getMainChunks().textBodyChunk;
                if (text != null) {
                    CharsetDetector detector = new CharsetDetector();
                    detector.setText(text.getRawValue());
                    CharsetMatch match = detector.detect();
                    if (match.getConfidence() > 35) {
                        message.set7BitEncoding(match.getName());
                        encoding = match.getName();
                    }
                }
            }
        } else {
            encoding = UTF_8;
        }

        processHeader(message, metadata, xhtml);

        // real work.
        adaptedExtractMultipart(xhtml, message, context);

        xhtml.endDocument();

    } catch (Exception e) {
        throw new TikaException("Error while processing message", e);
    }
}