fr.gouv.culture.vitam.eml.EmlExtract.java Source code

Java tutorial

Introduction

Here is the source code for fr.gouv.culture.vitam.eml.EmlExtract.java

Source

/**
 * This file is part of Vitam Project.
 * 
 * Copyright 2010, Frederic Bregier, and individual contributors by the @author
 * tags. See the COPYRIGHT.txt in the distribution for a full listing of individual contributors.
 * 
 * All Vitam Project is free software: you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 * 
 * Vitam is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 * Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with Vitam. If not, see
 * <http://www.gnu.org/licenses/>.
 */
package fr.gouv.culture.vitam.eml;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;

import javax.mail.Address;
import javax.mail.BodyPart;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Session;
import javax.mail.internet.MailDateFormat;
import javax.mail.internet.MimeMessage;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;

import fr.gouv.culture.vitam.digest.Base64;
import fr.gouv.culture.vitam.eml.StringUtils.EMAIL_FIELDS;
import fr.gouv.culture.vitam.extract.ExtractInfo;
import fr.gouv.culture.vitam.utils.Commands;
import fr.gouv.culture.vitam.utils.ConfigLoader;
import fr.gouv.culture.vitam.utils.StaticValues;
import fr.gouv.culture.vitam.utils.VitamArgument;
import fr.gouv.culture.vitam.utils.XmlDom;

/**
 * Class to try to handle EML files (email)
 * 
 * @author "Frederic Bregier"
 * 
 */
public class EmlExtract {

    public static HashMap<String, String> filEmls = new HashMap<String, String>();

    private static String addAddress(Element root, String entry, Address address, String except) {
        String value = address.toString();
        String ad = StringUtils.selectChevron(value);
        if (ad == null || (except != null && ad.equalsIgnoreCase(except))) {
            return null;
        }
        String nams = value.replace('<' + ad + '>', "");
        Element val = XmlDom.factory.createElement(entry);
        Element name = XmlDom.factory.createElement(EMAIL_FIELDS.emailName.name);
        Element addresse = XmlDom.factory.createElement(EMAIL_FIELDS.emailAddress.name);
        name.setText(StringUtils.unescapeHTML(nams, true, false));
        addresse.setText(StringUtils.unescapeHTML(ad, true, false));
        val.add(name);
        val.add(addresse);
        root.add(val);
        return value;
    }

    private static void addAddress(Element root, String entry, String[] addresses, String except) {
        for (String address : addresses) {
            if (address.contains(",")) {
                // multiple emails
                String[] split = address.split(",");
                for (String sub : split) {
                    String value = sub;
                    String ad = StringUtils.selectChevron(value);
                    if (ad == null || (except != null && ad.equalsIgnoreCase(except))) {
                        continue;
                    }
                    String nams = value.replace('<' + ad + '>', "");
                    Element val = XmlDom.factory.createElement(entry);
                    Element name = XmlDom.factory.createElement(EMAIL_FIELDS.emailName.name);
                    Element addresse = XmlDom.factory.createElement(EMAIL_FIELDS.emailAddress.name);
                    name.setText(StringUtils.unescapeHTML(nams, true, false));
                    addresse.setText(StringUtils.unescapeHTML(ad, true, false));
                    val.add(name);
                    val.add(addresse);
                    root.add(val);
                }
            } else {
                String value = address;
                String ad = StringUtils.selectChevron(value);
                if (ad == null || (except != null && ad.equalsIgnoreCase(except))) {
                    continue;
                }
                String nams = value.replace('<' + ad + '>', "");
                Element val = XmlDom.factory.createElement(entry);
                Element name = XmlDom.factory.createElement(EMAIL_FIELDS.emailName.name);
                Element addresse = XmlDom.factory.createElement(EMAIL_FIELDS.emailAddress.name);
                name.setText(StringUtils.unescapeHTML(nams, true, false));
                addresse.setText(StringUtils.unescapeHTML(ad, true, false));
                val.add(name);
                val.add(addresse);
                root.add(val);
            }
        }
    }

    /**
     * Try to extract the following :
     * 
     * Taken from : http://www.significantproperties.org.uk/email-testingreport.html
     * 
     * message-id (Message-ID), References (References), In-Reply-To (In-Reply-To), Attachment
     * subject (Subject), keywords 
     * sent-date (Date), Received-date (in Received last date), Trace-field (Received?)
     * 
     * 
     * From (From), To (To), CC (Cc), BCC (Bcc), Content-Type, Content-Transfer-Encoding
     * 
     * ? DomainKey-Signature, Sender, X-Original-Sender, X-Forwarded-Message-Id, 
     * 
     * 1) Core property set
     * 
     * The core property set indicates the minimum amount of information that is considered necessary to establish the authenticity and integrity of the email message
     * 
     * Local-part, Domain-part, Relationship, Subject, Trace-field , Message body with no mark-up, Attachments
     * 
     * 2) Message thread scenario
     * 
     * Email is frequently used as a communication method between two or more people. To understand the context in which a message was created it may be necessary to refer to earlier messages. To identify the thread of a discussion, the following fields should be provided, in addition to the core property set:
     * 
     * Local-part, Domain-part, Relationship, Subject, Trace-field, Message body with no mark-up, Attachments, Message-ID, References
     * 
     * 3) Recommended property set
     * 
     * The recommended property set indicates additional information that should be provided in an ideal scenario, if it is present within the email. The list
     * 
     * Local-part, Domain-part, Domain-literal (if present), Relationship, Subject, Trace-field, Attachments, Message-ID, References, Sent-date, Received date,
     * Display name, In-reply-to, Keywords, Message body & associated mark-up (see table 6 for scenarios)
     * 
     * 
     * 
     * @param emlFile
     * @param filename
     * @param argument
     * @param config
     * @return
     */
    public static Element extractInfoEmail(File emlFile, String filename, VitamArgument argument,
            ConfigLoader config) {
        File oldDir = argument.currentOutputDir;
        if (argument.currentOutputDir == null) {
            if (config.outputDir != null) {
                argument.currentOutputDir = new File(config.outputDir);
            } else {
                argument.currentOutputDir = new File(emlFile.getParentFile().getAbsolutePath());
            }
        }

        MimeMessage message = null;
        try {
            message = createOneMessageFromFile(emlFile);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
            String status = "Error during identification";
            newElt.addAttribute(EMAIL_FIELDS.status.name, status);
            return newElt;
        } catch (MessagingException e) {
            e.printStackTrace();
            Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
            String status = "Error during identification";
            newElt.addAttribute(EMAIL_FIELDS.status.name, status);
            return newElt;
        }
        Element root = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
        extractInfoMessage(message, root, argument, config);
        argument.currentOutputDir = oldDir;
        return root;
    }

    public static String extractInfoMessage(MimeMessage message, Element root, VitamArgument argument,
            ConfigLoader config) {
        File oldDir = argument.currentOutputDir;
        if (argument.currentOutputDir == null) {
            if (config.outputDir != null) {
                argument.currentOutputDir = new File(config.outputDir);
            }
        }
        Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name);
        Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name);
        String skey = "";
        String id = config.addRankId(root);
        Address[] from = null;
        Element sub2 = null;
        try {
            from = message.getFrom();
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("From");
                if (partialResult != null && partialResult.length > 0) {
                    sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
                    Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name);
                    add.setText(partialResult[0]);
                    sub2.add(add);
                }
            } catch (MessagingException e) {
            }
        }
        Address sender = null;
        try {
            sender = message.getSender();
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("Sender");
                if (partialResult != null && partialResult.length > 0) {
                    if (sub2 == null) {
                        sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
                        Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name);
                        add.setText(partialResult[0]);
                        sub2.add(add);
                    }
                }
            } catch (MessagingException e) {
            }
        }
        if (from != null && from.length > 0) {
            String value0 = null;
            Element sub = (sub2 != null ? sub2 : XmlDom.factory.createElement(EMAIL_FIELDS.from.name));
            if (sender != null) {
                value0 = addAddress(sub, EMAIL_FIELDS.fromUnit.name, sender, null);
            }
            for (Address address : from) {
                addAddress(sub, EMAIL_FIELDS.fromUnit.name, address, value0);
            }
            metadata.add(sub);
        } else if (sender != null) {
            Element sub = (sub2 != null ? sub2 : XmlDom.factory.createElement(EMAIL_FIELDS.from.name));
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, sender, null);
            metadata.add(sub);
        } else {
            if (sub2 != null) {
                metadata.add(sub2);
            }
        }
        Address[] replyTo = null;
        try {
            replyTo = message.getReplyTo();
            if (replyTo != null && replyTo.length > 0) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.replyTo.name);
                for (Address address : replyTo) {
                    addAddress(sub, EMAIL_FIELDS.fromUnit.name, address, null);
                }
                metadata.add(sub);
            }
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("ReplyTo");
                if (partialResult != null && partialResult.length > 0) {
                    sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.replyTo.name);
                    addAddress(sub2, EMAIL_FIELDS.fromUnit.name, partialResult, null);
                    /*Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name);
                    add.setText(partialResult[0]);
                    sub2.add(add);*/
                    metadata.add(sub2);
                }
            } catch (MessagingException e) {
            }
        }
        Address[] toRecipients = null;
        try {
            toRecipients = message.getRecipients(Message.RecipientType.TO);
            if (toRecipients != null && toRecipients.length > 0) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name);
                for (Address address : toRecipients) {
                    addAddress(sub, EMAIL_FIELDS.toUnit.name, address, null);
                }
                metadata.add(sub);
            }
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("To");
                if (partialResult != null && partialResult.length > 0) {
                    sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name);
                    addAddress(sub2, EMAIL_FIELDS.toUnit.name, partialResult, null);
                    /*for (String string : partialResult) {
                       Element add = XmlDom.factory.createElement(EMAIL_FIELDS.toUnit.name);
                       add.setText(string);
                       sub2.add(add);
                    }*/
                    metadata.add(sub2);
                }
            } catch (MessagingException e) {
            }
        }
        Address[] ccRecipients;
        try {
            ccRecipients = message.getRecipients(Message.RecipientType.CC);
            if (ccRecipients != null && ccRecipients.length > 0) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name);
                for (Address address : ccRecipients) {
                    addAddress(sub, EMAIL_FIELDS.ccUnit.name, address, null);
                }
                metadata.add(sub);
            }
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("Cc");
                if (partialResult != null && partialResult.length > 0) {
                    sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name);
                    addAddress(sub2, EMAIL_FIELDS.ccUnit.name, partialResult, null);
                    /*for (String string : partialResult) {
                       Element add = XmlDom.factory.createElement(EMAIL_FIELDS.ccUnit.name);
                       add.setText(string);
                       sub2.add(add);
                    }*/
                    metadata.add(sub2);
                }
            } catch (MessagingException e) {
            }
        }
        Address[] bccRecipients;
        try {
            bccRecipients = message.getRecipients(Message.RecipientType.BCC);
            if (bccRecipients != null && bccRecipients.length > 0) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name);
                for (Address address : bccRecipients) {
                    addAddress(sub, EMAIL_FIELDS.bccUnit.name, address, null);
                }
                metadata.add(sub);
            }
        } catch (MessagingException e1) {
            String[] partialResult;
            try {
                partialResult = message.getHeader("Cc");
                if (partialResult != null && partialResult.length > 0) {
                    sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name);
                    addAddress(sub2, EMAIL_FIELDS.bccUnit.name, partialResult, null);
                    /*for (String string : partialResult) {
                       Element add = XmlDom.factory.createElement(EMAIL_FIELDS.bccUnit.name);
                       add.setText(string);
                       sub2.add(add);
                    }*/
                    metadata.add(sub2);
                }
            } catch (MessagingException e) {
            }
        }
        try {
            String subject = message.getSubject();
            if (subject != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name);
                sub.setText(StringUtils.unescapeHTML(subject, true, false));
                metadata.add(sub);
            }
            Date sentDate = message.getSentDate();
            if (sentDate != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name);
                sub.setText(sentDate.toString());
                metadata.add(sub);
            }
            Date receivedDate = message.getReceivedDate();
            if (receivedDate != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name);
                sub.setText(receivedDate.toString());
                metadata.add(sub);
            }
            String[] headers = message.getHeader("Received");
            if (headers != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name);
                MailDateFormat mailDateFormat = null;
                long maxTime = 0;
                if (receivedDate == null) {
                    mailDateFormat = new MailDateFormat();
                }
                for (String string : headers) {
                    Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name);
                    sub3.setText(StringUtils.unescapeHTML(string, true, false));
                    sub.add(sub3);
                    if (receivedDate == null) {
                        int pos = string.lastIndexOf(';');
                        if (pos > 0) {
                            String recvdate = string.substring(pos + 2).replaceAll("\t\n\r\f", "").trim();
                            try {
                                Date date = mailDateFormat.parse(recvdate);
                                if (date.getTime() > maxTime) {
                                    maxTime = date.getTime();
                                }
                            } catch (ParseException e) {
                            }
                        }
                    }
                }
                if (receivedDate == null) {
                    Element subdate = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name);
                    Date date = new Date(maxTime);
                    subdate.setText(date.toString());
                    metadata.add(subdate);
                }
                metadata.add(sub);
            }
            int internalSize = message.getSize();
            if (internalSize > 0) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name);
                sub.setText(Integer.toString(internalSize));
                metadata.add(sub);
            }
            String encoding = message.getEncoding();
            if (encoding != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.encoding.name);
                sub.setText(StringUtils.unescapeHTML(encoding, true, false));
                metadata.add(sub);
            }
            String description = message.getDescription();
            if (description != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.description.name);
                sub.setText(StringUtils.unescapeHTML(description, true, false));
                metadata.add(sub);
            }
            String contentType = message.getContentType();
            if (contentType != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentType.name);
                sub.setText(StringUtils.unescapeHTML(contentType, true, false));
                metadata.add(sub);
            }
            headers = message.getHeader("Content-Transfer-Encoding");
            if (headers != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentTransferEncoding.name);
                StringBuilder builder = new StringBuilder();
                for (String string : headers) {
                    builder.append(StringUtils.unescapeHTML(string, true, false));
                    builder.append(' ');
                }
                sub.setText(builder.toString());
                metadata.add(sub);
            }
            String[] contentLanguage = message.getContentLanguage();
            if (contentLanguage != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentLanguage.name);
                StringBuilder builder = new StringBuilder();
                for (String string : contentLanguage) {
                    builder.append(StringUtils.unescapeHTML(string, true, false));
                    builder.append(' ');
                }
                sub.setText(builder.toString());
                metadata.add(sub);
            }
            String contentId = message.getContentID();
            if (contentId != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentId.name);
                sub.setText(StringUtils.removeChevron(StringUtils.unescapeHTML(contentId, true, false)));
                metadata.add(sub);
            }
            String disposition = message.getDisposition();
            if (disposition != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.disposition.name);
                sub.setText(StringUtils.removeChevron(StringUtils.unescapeHTML(disposition, true, false)));
                metadata.add(sub);
            }
            headers = message.getHeader("Keywords");
            if (headers != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.msgKeywords.name);
                StringBuilder builder = new StringBuilder();
                for (String string : headers) {
                    builder.append(StringUtils.unescapeHTML(string, true, false));
                    builder.append(' ');
                }
                sub.setText(builder.toString());
                metadata.add(sub);
            }
            String messageId = message.getMessageID();
            if (messageId != null) {
                messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim();
                if (messageId.length() > 1) {
                    Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name);
                    sub.setText(messageId);
                    metadata.add(sub);
                }
            }
            headers = message.getHeader("In-Reply-To");
            String inreplyto = null;
            if (headers != null) {
                StringBuilder builder = new StringBuilder();
                for (String string : headers) {
                    builder.append(StringUtils.removeChevron(StringUtils.unescapeHTML(string, true, false)));
                    builder.append(' ');
                }
                inreplyto = builder.toString().trim();
                if (inreplyto.length() > 0) {
                    Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name);
                    sub.setText(inreplyto);
                    if (messageId != null && messageId.length() > 1) {
                        String old = filEmls.get(inreplyto);
                        if (old == null) {
                            old = messageId;
                        } else {
                            old += "," + messageId;
                        }
                        filEmls.put(inreplyto, old);
                    }
                    metadata.add(sub);
                }
            }
            headers = message.getHeader("References");
            if (headers != null) {
                Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.references.name);
                StringBuilder builder = new StringBuilder();
                for (String string : headers) {
                    builder.append(StringUtils.removeChevron(StringUtils.unescapeHTML(string, true, false)));
                    builder.append(' ');
                }
                String[] refs = builder.toString().trim().split(" ");
                for (String string : refs) {
                    if (string.length() > 0) {
                        Element ref = XmlDom.factory.createElement(EMAIL_FIELDS.reference.name);
                        ref.setText(string);
                        sub.add(ref);
                    }
                }
                metadata.add(sub);
            }
            Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name);
            headers = message.getHeader("X-Priority");
            if (headers == null) {
                headers = message.getHeader("Priority");
                if (headers != null && headers.length > 0) {
                    prop.addAttribute(EMAIL_FIELDS.priority.name, headers[0]);
                }
            } else if (headers != null && headers.length > 0) {
                String imp = headers[0];
                try {
                    int Priority = Integer.parseInt(imp);
                    switch (Priority) {
                    case 5:
                        imp = "LOWEST";
                        break;
                    case 4:
                        imp = "LOW";
                        break;
                    case 3:
                        imp = "NORMAL";
                        break;
                    case 2:
                        imp = "HIGH";
                        break;
                    case 1:
                        imp = "HIGHEST";
                        break;
                    default:
                        imp = "LEV" + Priority;
                    }
                } catch (NumberFormatException e) {
                    // ignore since imp will be used as returned
                }
                prop.addAttribute(EMAIL_FIELDS.priority.name, imp);
            }
            headers = message.getHeader("Sensitivity");
            if (headers != null && headers.length > 0) {
                prop.addAttribute(EMAIL_FIELDS.sensitivity.name, headers[0]);
            }
            headers = message.getHeader("X-RDF");
            if (headers != null && headers.length > 0) {
                System.err.println("Found X-RDF");
                StringBuilder builder = new StringBuilder();
                for (String string : headers) {
                    builder.append(string);
                    builder.append("\n");
                }
                try {
                    byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(builder.toString());
                    String rdf = new String(decoded);
                    Document tempDocument = DocumentHelper.parseText(rdf);
                    Element xrdf = prop.addElement("x-rdf");
                    xrdf.add(tempDocument.getRootElement());
                } catch (Exception e) {
                    System.err.println("Cannot decode X-RDF: " + e.getMessage());
                }
            }
            try {
                File old = argument.currentOutputDir;
                if (config.extractFile) {
                    File newOutDir = new File(argument.currentOutputDir, id);
                    newOutDir.mkdirs();
                    argument.currentOutputDir = newOutDir;
                }
                if (argument.extractKeyword) {
                    skey = handleMessage(message, metadata, prop, id, argument, config);
                    // should have hasAttachment
                    if (prop.hasContent()) {
                        metadata.add(prop);
                    }
                    if (metadata.hasContent()) {
                        root.add(metadata);
                    }
                    ExtractInfo.exportMetadata(keywords, skey, "", config, null);
                    if (keywords.hasContent()) {
                        root.add(keywords);
                    }
                } else {
                    handleMessage(message, metadata, prop, id, argument, config);
                    // should have hasAttachment
                    if (prop.hasContent()) {
                        metadata.add(prop);
                    }
                    if (metadata.hasContent()) {
                        root.add(metadata);
                    }
                }
                argument.currentOutputDir = old;
            } catch (IOException e) {
                System.err.println(StaticValues.LBL.error_error.get() + e.toString());
            }
            try {
                message.getInputStream().close();
            } catch (IOException e) {
                System.err.println(StaticValues.LBL.error_error.get() + e.toString());
            }
            root.addAttribute(EMAIL_FIELDS.status.name, "ok");
        } catch (MessagingException e) {
            System.err.println(StaticValues.LBL.error_error.get() + e.toString());
            e.printStackTrace();
            String status = "Error during identification";
            root.addAttribute(EMAIL_FIELDS.status.name, status);
        } catch (Exception e) {
            System.err.println(StaticValues.LBL.error_error.get() + e.toString());
            e.printStackTrace();
            String status = "Error during identification";
            root.addAttribute(EMAIL_FIELDS.status.name, status);
        }
        argument.currentOutputDir = oldDir;
        return skey;
    }

    private static final MimeMessage createOneMessageFromFile(File emlFile)
            throws FileNotFoundException, MessagingException {
        Properties props = System.getProperties();
        Session session = Session.getDefaultInstance(props);
        /*props.put("mail.host", "smtp.vitamdomain.com");
        props.put("mail.transport.protocol", "smtp");
        Session session = Session.getDefaultInstance(props, null);*/
        InputStream source = new FileInputStream(emlFile);
        return new MimeMessage(session, source);
    }

    private static final String[] extractContentType(String contentType, String contentTypeEncoding) {
        String charset = null;
        int pos = contentType.indexOf(';');
        if (pos > 0) {
            charset = contentType.substring(pos + 1).trim();
            contentType = contentType.substring(0, pos).trim();
            pos = charset.indexOf("charset=");
            if (pos >= 0) {
                charset = charset.substring(pos);
                charset = charset.replace("charset=", "").trim();
                pos = charset.indexOf(';');
                if (pos > 0) {
                    charset = charset.substring(0, pos).trim();
                }
                if (charset.startsWith("\"")) {
                    pos = charset.indexOf('\"', 2);
                    if (pos > 0) {
                        charset = charset.substring(1, pos).trim();
                    }
                }
            } else {
                charset = null;
            }
        }
        String[] result = new String[4];
        result[0] = contentType;
        result[1] = charset;
        result[2] = contentTypeEncoding;
        if ("text/plain".equals(contentType)) {
            result[3] = ".txt";
        } else if ("text/html".equals(contentType)) {
            result[3] = ".html";
        } else {
            result[3] = ".unknown";
        }
        //System.out.println(contentType+":"+charset+":"+contentTypeEncoding+":"+result[3]);
        return result;
    }

    private static final String saveBody(InputStream stream, String[] aresult, String id, VitamArgument argument,
            ConfigLoader config) throws MessagingException, IOException {
        String tosave = null;
        if (config.extractFile) {
            FileOutputStream outputStream = new FileOutputStream(
                    new File(argument.currentOutputDir, id + "_body" + aresult[3]));
            if (aresult[2] != null && aresult[2].equals("quoted-printable")) {
                tosave = StringUtils.unescapeQuotedPrintable(stream, aresult[1]);
            } else {
                /*if (aresult[1] != null) {
                   tosave = new String(((String) content).getBytes(), aresult[1]);
                } else {
                   tosave = ((String) content);
                }*/
                tosave = StringUtils.undecodeString(stream, aresult[1]);
                //tosave = content;
            }
            outputStream.write(tosave.getBytes(StaticValues.CURRENT_OUTPUT_ENCODING));
            //outputStream.write(tosave.getBytes());
            outputStream.flush();
            outputStream.close();
        } else if (argument.extractKeyword) {
            if (aresult[2].equals("quoted-printable")) {
                tosave = StringUtils.unescapeQuotedPrintable(stream, aresult[1]);
            } else {
                /*if (aresult[1] != null) {
                   tosave = new String(((String) content).getBytes(), aresult[1]);
                } else {
                   tosave = ((String) content);
                }*/
                tosave = StringUtils.undecodeString(stream, aresult[1]);
            }
        }
        return tosave;
    }

    private static final String handleMessage(Message message, Element metadata, Element prop, String id,
            VitamArgument argument, ConfigLoader config) throws IOException, MessagingException {
        Object content = message.getContent();
        String[] cte = message.getHeader("Content-Transfer-Encoding");
        String[] aresult = null;
        if (cte != null && cte.length > 0) {
            aresult = extractContentType(message.getContentType(), cte[0]);
        } else {
            aresult = extractContentType(message.getContentType(), null);
        }
        String result = "";
        if (content instanceof String) {
            Element body = XmlDom.factory.createElement("body");
            body.addAttribute("mime", aresult[0]);
            if (aresult[1] != null) {
                body.addAttribute("charset", aresult[1]);
            }
            metadata.add(body);
            //result = saveBody((String) content.toString(), aresult, id, argument, config);
            result = saveBody(message.getInputStream(), aresult, id, argument, config);
        } else if (content instanceof Multipart) {
            // handle multi part
            prop.addAttribute(EMAIL_FIELDS.hasAttachment.name, "true");
            Multipart mp = (Multipart) content;
            Element identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name);
            String value = handleMultipart(mp, identification, id, argument, config);
            if (identification.hasContent()) {
                metadata.add(identification);
            }
            if (argument.extractKeyword) {
                result = value;
            }
        }
        return result;
    }

    private static final String handleMultipart(Multipart mp, Element identification, String id,
            VitamArgument argument, ConfigLoader config) throws MessagingException, IOException {
        int count = mp.getCount();
        String result = "";
        identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(count - 1));
        for (int i = 0; i < count; i++) {
            BodyPart bp = mp.getBodyPart(i);

            Object content = bp.getContent();
            if (content instanceof String) {
                String[] cte = bp.getHeader("Content-Transfer-Encoding");
                String[] aresult = null;
                if (cte != null && cte.length > 0) {
                    aresult = extractContentType(bp.getContentType(), cte[0]);
                } else {
                    aresult = extractContentType(bp.getContentType(), null);
                }
                Element emlroot = XmlDom.factory.createElement("body");
                // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
                Element subidenti = XmlDom.factory.createElement("identification");
                Element identity = XmlDom.factory.createElement("identity");
                identity.addAttribute("format", "Internet Message Body Format");
                identity.addAttribute("mime", aresult[0] != null ? aresult[0] : "unknown");
                identity.addAttribute("extensions", aresult[3] != null ? aresult[3].substring(1) : "unknown");
                if (aresult[1] != null) {
                    identity.addAttribute("charset", aresult[1]);
                }
                identification.add(identity);
                emlroot.add(subidenti);
                identification.add(emlroot);
                //result += " " + saveBody((String) content.toString(), aresult, id, argument, config);
                result += " " + saveBody(bp.getInputStream(), aresult, id, argument, config);
            } else if (content instanceof InputStream) {
                // handle input stream
                if (argument.extractKeyword) {
                    result += " " + addSubIdentities(identification, bp, (InputStream) content, argument, config);
                } else {
                    addSubIdentities(identification, bp, (InputStream) content, argument, config);
                }
                ((InputStream) content).close();
            } else if (content instanceof Message) {
                Message message = (Message) content;
                // XXX perhaps using Commands.addFormatIdentification
                Element emlroot = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
                // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
                Element subidenti = XmlDom.factory.createElement("identification");
                Element identity = XmlDom.factory.createElement("identity");
                identity.addAttribute("format", "Internet Message Format");
                identity.addAttribute("mime", "message/rfc822");
                identity.addAttribute("puid", "fmt/278");
                identity.addAttribute("extensions", "eml");
                identification.add(identity);
                emlroot.add(subidenti);
                identification.add(emlroot);
                if (argument.extractKeyword) {
                    result += " " + extractInfoMessage((MimeMessage) message, emlroot, argument, config);
                } else {
                    extractInfoMessage((MimeMessage) message, emlroot, argument, config);
                }
            } else if (content instanceof Multipart) {
                Multipart mp2 = (Multipart) content;
                if (argument.extractKeyword) {
                    result += " " + handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
                } else {
                    handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
                }
            }
        }
        return result;
    }

    private static final String addSubIdentities(Element identification, BodyPart bp, InputStream inputStream,
            VitamArgument argument, ConfigLoader config) {
        Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.subidentity.name);
        String filename = null;
        String result = "";
        try {
            filename = bp.getFileName();
            filename = StringUtils.toFileName(filename);
            if (filename != null) {
                Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.filename.name);
                elt.setText(filename);
                newElt.add(elt);
            } else {
                filename = "eml.eml";
            }
        } catch (MessagingException e) {
        }
        try {
            int size = bp.getSize();
            if (size > 0) {
                Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.attSize.name);
                elt.setText(Integer.toString(size));
                newElt.add(elt);
            }
        } catch (MessagingException e) {
        }
        try {
            String description = bp.getDescription();
            if (description != null) {
                Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.description.name);
                elt.setText(description);
                newElt.add(elt);
            }
        } catch (MessagingException e) {
        }
        try {
            String disposition = bp.getDisposition();
            if (disposition != null) {
                Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.disposition.name);
                elt.setText(disposition);
                newElt.add(elt);
            }
        } catch (MessagingException e) {
        }
        File filetemp = null;
        FileOutputStream outputStream = null;
        try {
            // Force out to analysis
            if (config.extractFile) {
                filetemp = new File(argument.currentOutputDir, filename);
            } else {
                filetemp = File.createTempFile(StaticValues.PREFIX_TEMPFILE, filename);
            }
            byte[] buffer = new byte[8192];
            int read = 0;
            outputStream = new FileOutputStream(filetemp);
            while ((read = inputStream.read(buffer)) >= 0) {
                outputStream.write(buffer, 0, read);
            }
            outputStream.close();
            outputStream = null;
        } catch (IOException e1) {
            if (filetemp != null && !config.extractFile) {
                filetemp.delete();
            }
            if (outputStream != null) {
                try {
                    outputStream.close();
                } catch (IOException e) {
                }
            }
            String status = "Error during access to attachment";
            newElt.addAttribute(EMAIL_FIELDS.status.name, status);
            identification.add(newElt);
            return "";
        }
        try {
            Commands.addFormatIdentification(newElt, filename, filetemp, config, argument);
            if (argument.extractKeyword) {
                // get back keyword in the main list
                Element keyw = (Element) newElt.selectSingleNode(EMAIL_FIELDS.keywords.name);
                if (keyw != null) {
                    StringBuilder builder = new StringBuilder();
                    @SuppressWarnings("unchecked")
                    List<Element> elts = (List<Element>) keyw.selectNodes(EMAIL_FIELDS.keywordRank.name);
                    for (Element elt : elts) {
                        String value = elt.attributeValue(EMAIL_FIELDS.keywordOccur.name);
                        int occur = Integer.parseInt(value) / 2 + 1;
                        @SuppressWarnings("unchecked")
                        List<Element> words = (List<Element>) elt.selectNodes(EMAIL_FIELDS.keywordWord.name);
                        for (Element eword : words) {
                            String word = eword.attributeValue(EMAIL_FIELDS.keywordValue.name) + " ";
                            for (int i = 0; i < occur; i++) {
                                builder.append(word);
                            }
                        }
                    }
                    result = builder.toString().trim();
                }
            }

        } catch (Exception e) {
            String status = "Error during identification";
            e.printStackTrace();
            config.addRankId(newElt);
            newElt.addAttribute(EMAIL_FIELDS.status.name, status);
        }
        if (filetemp != null && !config.extractFile) {
            filetemp.delete();
        }
        identification.add(newElt);
        return result;
    }

    private static final String handleMessageRecur(Message message, Element identification, String id,
            VitamArgument argument, ConfigLoader config) throws IOException, MessagingException {
        Object content = message.getContent();
        String result = "";
        if (content instanceof String) {
            String[] cte = message.getHeader("Content-Transfer-Encoding");
            String[] aresult = null;
            if (cte != null && cte.length > 0) {
                aresult = extractContentType(message.getContentType(), cte[0]);
            } else {
                aresult = extractContentType(message.getContentType(), null);
            }
            Element emlroot = XmlDom.factory.createElement("body");
            // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
            Element subidenti = XmlDom.factory.createElement("identification");
            Element identity = XmlDom.factory.createElement("identity");
            identity.addAttribute("format", "Internet Message Body Format");
            identity.addAttribute("mime", aresult[0] != null ? aresult[0] : "unknown");
            identity.addAttribute("extensions", aresult[3] != null ? aresult[3].substring(1) : "unknown");
            if (aresult[1] != null) {
                identity.addAttribute("charset", aresult[1]);
            }
            identification.add(identity);
            emlroot.add(subidenti);
            identification.add(emlroot);
            //result += " " + saveBody((String) content.toString(), aresult, id, argument, config);
            result += " " + saveBody(message.getInputStream(), aresult, id, argument, config);
            // ignore string
        } else if (content instanceof Multipart) {
            Multipart mp = (Multipart) content;
            if (argument.extractKeyword) {
                result = handleMultipartRecur(mp, identification, id, argument, config);
            } else {
                handleMultipartRecur(mp, identification, id, argument, config);
            }
            // handle multi part
        }
        return result;
    }

    private static final String handleMultipartRecur(Multipart mp, Element identification, String id,
            VitamArgument argument, ConfigLoader config) throws MessagingException, IOException {
        int count = mp.getCount();
        String result = "";
        for (int i = 0; i < count; i++) {
            BodyPart bp = mp.getBodyPart(i);
            Object content = bp.getContent();
            if (content instanceof String) {
                String[] cte = bp.getHeader("Content-Transfer-Encoding");
                String[] aresult = null;
                if (cte != null && cte.length > 0) {
                    aresult = extractContentType(bp.getContentType(), cte[0]);
                } else {
                    aresult = extractContentType(bp.getContentType(), null);
                }
                Element emlroot = XmlDom.factory.createElement("body");
                // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
                Element subidenti = XmlDom.factory.createElement("identification");
                Element identity = XmlDom.factory.createElement("identity");
                identity.addAttribute("format", "Internet Message Body Format");
                identity.addAttribute("mime", aresult[0] != null ? aresult[0] : "unknown");
                identity.addAttribute("extensions", aresult[3] != null ? aresult[3].substring(1) : "unknown");
                if (aresult[1] != null) {
                    identity.addAttribute("charset", aresult[1]);
                }
                identification.add(identity);
                emlroot.add(subidenti);
                identification.add(emlroot);
                //result += " " + saveBody((String) content.toString(), aresult, id, argument, config);
                result += " " + saveBody(bp.getInputStream(), aresult, id, argument, config);
                // ignore string
            } else if (content instanceof InputStream) {
                // handle input stream
                if (argument.extractKeyword) {
                    result += " " + addSubIdentities(identification, bp, (InputStream) content, argument, config);
                } else {
                    addSubIdentities(identification, bp, (InputStream) content, argument, config);
                }
            } else if (content instanceof Message) {
                Message message = (Message) content;
                if (argument.extractKeyword) {
                    result += " " + handleMessageRecur(message, identification, id + "_" + i, argument, config);
                } else {
                    handleMessageRecur(message, identification, id + "_" + i, argument, config);
                }
            } else if (content instanceof Multipart) {
                Multipart mp2 = (Multipart) content;
                if (argument.extractKeyword) {
                    result += " " + handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
                } else {
                    handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
                }
            }
        }
        return result;
    }
}