org.xmlactions.email.EMailParser.java Source code

Java tutorial

Introduction

Here is the source code for org.xmlactions.email.EMailParser.java

Source

/*
 * Copyright (C) Mike Murphy 2003-2015 <mike.murphy@xmlactions.org><mwjmurphy@gmail.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.xmlactions.email;

import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;

import javax.activation.DataHandler;
import javax.mail.BodyPart;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.MimeBodyPart;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.dom4j.DocumentException;

/**
 * Parses the content of an email including body and attachments to an
 * EMailExportMessage class.
 * 
 * @author MichaelMurphy
 */
public class EMailParser {

    private static Logger log = LoggerFactory.getLogger(EMailParser.class);

    // if this gets set then we've moved onto a forwarded email content
    private Message lastMessage;

    private boolean firstMessageProcessed;

    // emails can have both plain text and html content, text is what we want as
    // it's easier to work with. We set this true if we find a text/plain body
    // content, which will instruct the code to ignore any text/html parts
    private boolean ignoreHTML = false;

    public void mapMessage(Message message) throws IOException, MessagingException, DocumentException {

        log.info("reading email:" + message.getSubject());
        setLastMessage(message); // if this is set then we have an attached
        // message
        setFirstMessageProcessed(false);
        Object object = message;

        if (object instanceof Multipart) {
            mapMultiPart((Multipart) object);
        } else if (object instanceof Part) {
            // showPart((Part) content);
            handlePart((Part) object);
        } else {
            throw new IOException("Invalid content found in email:" + object.getClass().getName() + "\n" + object);
        }
    }

    private void mapMultiPart(Multipart multiPart) throws MessagingException, IOException, DocumentException {

        for (int i = 0, n = multiPart.getCount(); i < n; i++) {
            BodyPart bodyPart = multiPart.getBodyPart(i);
            // showPart(bodyPart);
            handlePart(bodyPart);
        }
    }

    private void handlePart(Part part) throws MessagingException, IOException, DocumentException {

        log.debug("\n\n\nhandlePart ==>>");
        log.debug("part.toString():" + part.toString());
        log.debug(
                "part.getContent():" + (part.getFileName() == null ? part.getContent().toString() : "Attachment"));
        log.debug("part.getContentType():" + part.getContentType());
        log.debug("part.getFilename():" + part.getFileName());
        log.debug("part.isAttachment:" + part.getFileName());
        log.debug("part.isMessage:" + (part.getContent() instanceof Message));
        Object obj = part.getContent();
        if (obj instanceof Multipart) {
            Multipart mmp = (Multipart) obj;
            for (int i = 0; i < mmp.getCount(); i++) {
                Part bodyPart = mmp.getBodyPart(i);
                if (bodyPart instanceof Message) {
                    setFirstMessageProcessed(true);// need to mark this when we
                    // get a forwarded message
                    // so we don't look for case
                    // numbers in forwarded
                    // emails.
                }
                handlePart(bodyPart);
            }
        } else if (obj instanceof Part) {
            if (obj instanceof Message) {
                setFirstMessageProcessed(true);// need to mark this when we get
                // a forwarded message so we
                // don't look for case numbers
                // in forwarded emails.
            }
            handlePart((Part) obj);
        } else {
            if (part instanceof MimeBodyPart) {
                MimeBodyPart p = (MimeBodyPart) part;
                Enumeration enumeration = p.getAllHeaders();
                while (enumeration.hasMoreElements()) {
                    Object e = enumeration.nextElement();
                    if (e == null)
                        e = null;
                }
                Object content = p.getContent();
                enumeration = p.getAllHeaderLines();
                while (enumeration.hasMoreElements()) {
                    Object e = enumeration.nextElement();
                    if (e == null)
                        e = null;
                }
                DataHandler dh = p.getDataHandler();
                if (dh == null)
                    dh = null;
            }
            addPart(part);
            log.debug("=== Add Part ===");
            log.debug((String) (part.getFileName() != null ? "isAttachment" : part.getContent()));
            // log.info("not recognised class:" + obj.getClass().getName() +
            // "\n" + obj);
        }
        log.debug("<<== handlePart");
    }

    public boolean isFirstMessageProcessed() {

        return firstMessageProcessed;
    }

    public void setFirstMessageProcessed(boolean firstMessageProcessed) {

        this.firstMessageProcessed = firstMessageProcessed;
    }

    private void showPart(Part part) throws IOException, MessagingException {

        log.info("\n\n\nshowPart ==>>");
        log.info("part.toString():" + part.toString());
        log.info("part.getContent():" + (part.getFileName() == null ? part.getContent().toString() : "Attachment"));
        log.info("part.getContentType():" + part.getContentType());
        log.info("part.getFilename():" + part.getFileName());
        log.info("part.isAttachment:" + part.getFileName());
        log.info("part.isMessage:" + (part.getContent() instanceof Message));
        Object obj = part.getContent();
        if (obj instanceof Multipart) {
            log.info("MultiPart");

            Multipart mmp = (Multipart) obj;
            for (int i = 0; i < mmp.getCount(); i++) {
                Part bodyPart = mmp.getBodyPart(i);
                showPart(bodyPart);
            }
        } else if (obj instanceof Part) {
            showPart((Part) obj);
        } else {
            log.info("=== Add Part ===");
            log.info((String) (part.getFileName() != null ? "isAttachment" : part.getContent()));
            // log.info("not recognised class:" + obj.getClass().getName() +
            // "\n" + obj);
        }
        log.info("<<== showPart");
    }

    private void addPart(Part part) throws IOException, MessagingException, DocumentException {

        String contentType = part.getContentType();
        boolean isAttachment;

        if (part.getFileName() != null) {
            isAttachment = true;
        } else {
            isAttachment = false;
        }

        log.debug("isAttachment:" + isAttachment + " contentType:" + contentType);

        if (isAttachment == true) {
        } else {
            // Check if plain
            if (contentType.toLowerCase().indexOf("text/plain") >= 0) {
                log.debug("process text/plain");
                setIgnoreHTML(true); // if we get any text/plain for body
            } else if (isIgnoreHTML() == false && contentType.toLowerCase().indexOf("text/html") >= 0) {
                // log.debug("skipping text/html");
                log.debug("process text/html");
                InputStream is = convertHTMLToText(part.getInputStream());
                try {
                } finally {
                    IOUtils.closeQuietly(is);
                }

            } else {
                log.debug("ignoring part [" + contentType + "]");
            }
        }
    }

    private void addNewEMailMessage(Message message, InputStream inputStream)
            throws MessagingException, DocumentException, IOException {

        addContent(inputStream);
    }

    private void addContent(InputStream inputStream) throws DocumentException, IOException {

        if (inputStream.markSupported()) {
            inputStream.mark(inputStream.available());
        }

        String bodyContent = IOUtils.toString(inputStream);

        if (this.firstMessageProcessed == false) {
        }
        if (inputStream.markSupported()) {
            inputStream.reset();
        }
    }

    private InputStream convertHTMLToText(InputStream inputStream) throws IOException {

        HtmlToText htmlToText = new HtmlToText();
        String plainText = htmlToText.map(IOUtils.toString(inputStream));
        return IOUtils.toInputStream(plainText);
    }

    public void setLastMessage(Message message) {

        this.lastMessage = message;
    }

    public Message getLastMessage() {

        return lastMessage;
    }

    /**
     * Gets the subject from the email if one exists.
     * 
     * @param email
     *            that we want to get the subject from.
     * @return the subject or "" if no subject found.
     */
    public static String getSubject(Message email) {

        String subject = null;
        try {
            // MM 24 Jun 09. Fixed code from 'subject = getSubject(email);' to
            // 'email.getSubject();' the latter causes an stack overflow.
            subject = email.getSubject();
        } catch (Throwable t) {
            // MM 28 JUL 2009 - lower log severity from error to warn
            log.warn("Error getting Subject from email:" + t.getMessage() + ". Subject ignored");
            subject = "";
        }
        return subject;
    }

    public void setIgnoreHTML(boolean ignoreHTML) {

        this.ignoreHTML = ignoreHTML;
    }

    public boolean isIgnoreHTML() {

        return ignoreHTML;
    }

}