Example usage for org.apache.poi.hsmf MAPIMessage getHeaders

List of usage examples for org.apache.poi.hsmf MAPIMessage getHeaders

Introduction

In this page you can find the example usage for org.apache.poi.hsmf MAPIMessage getHeaders.

Prototype

public String[] getHeaders() throws ChunkNotFoundException 

Source Link

Document

Returns all the headers, one entry per line

Usage

From source file:com.openkm.util.MailUtils.java

License:Open Source License

/**
 * Convert Outlook Message to Mail/*from   w ww  . ja  va2  s.  c  om*/
 */
public static Mail messageToMail(MAPIMessage msg) throws MessagingException, IOException {
    com.openkm.bean.Mail mail = new com.openkm.bean.Mail();
    Calendar receivedDate = Calendar.getInstance();
    Calendar sentDate = Calendar.getInstance();

    try {
        // Can be void
        if (msg.getMessageDate() != null) {
            receivedDate.setTime(msg.getMessageDate().getTime());
        }

        // Can be void
        if (msg.getMessageDate() != null) {
            sentDate.setTime(msg.getMessageDate().getTime());
        }

        if (msg.getRtfBody() != null) {
            try {
                // JEditorPaneRTF2HTMLConverter converter = new JEditorPaneRTF2HTMLConverter();
                // mail.setContent(converter.rtf2html(msg.getBodyRTF()));
                ByteArrayInputStream bais = new ByteArrayInputStream(msg.getRtfBody().getBytes());
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                DocConverter.getInstance().rtf2html(bais, baos);
                mail.setMimeType(MimeTypeConfig.MIME_HTML);
                mail.setContent(baos.toString().replace("<BR>", ""));
                IOUtils.closeQuietly(bais);
                IOUtils.closeQuietly(baos);
            } catch (Exception e) {
                throw new MessagingException(e.getMessage(), e);
            }
        } else if (msg.getHtmlBody() != null) {
            mail.setMimeType(MimeTypeConfig.MIME_HTML);
            mail.setContent(msg.getHtmlBody());
        } else if (msg.getTextBody() != null) {
            mail.setMimeType(MimeTypeConfig.MIME_TEXT);
            mail.setContent(msg.getTextBody());
        } else {
            mail.setMimeType(MimeTypeConfig.MIME_UNDEFINED);
        }

        if (msg.getDisplayTo() != null) {
            mail.setTo(recipientToString(msg.getDisplayTo()));
        } else {
            mail.setTo(new String[] {});
        }

        StringBuilder sb = new StringBuilder();
        for (String header : msg.getHeaders()) {
            sb.append(header).append("\n");
        }

        // Need to replace 0x00 because PostgreSQL does not accept string containing 0x00
        // Need to remove Unicode surrogate because of MySQL => SQL Error: 1366, SQLState: HY000
        String subject = FormatUtil.trimUnicodeSurrogates(FormatUtil.fixUTF8(msg.getSubject()));

        mail.setSize(mail.getContent().length());
        mail.setSubject((subject == null || subject.isEmpty()) ? NO_SUBJECT : subject);
        mail.setFrom(msg.getDisplayFrom());
        mail.setCc(recipientToString(msg.getDisplayCC()));
        mail.setBcc(recipientToString(msg.getDisplayBCC()));
        mail.setReceivedDate(receivedDate);
        mail.setSentDate(sentDate);
    } catch (ChunkNotFoundException e) {
        throw new MessagingException(e.getMessage(), e);
    }

    return mail;
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

private static String extractInfoSubEmail(MAPIMessage msg, File curDir, Element root, VitamArgument argument,
        ConfigLoader config) {//w  w w  .ja  v a2 s .c  o  m
    File curPath = null;
    Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name);
    Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name);

    String id = config.addRankId(root);
    curPath = new File(curDir, "MSG_" + id);
    //System.out.println("start of "+id);
    String[] values = new String[Keywords.values().length];
    for (int i = 0; i < Keywords.values().length; i++) {
        values[i] = null;
    }
    String[] test = null;
    try {
        test = msg.getHeaders();
    } catch (ChunkNotFoundException e4) {
        // TODO Auto-generated catch block
        e4.printStackTrace();
    }
    int lastRank = -1;
    for (String string : test) {
        if (string.startsWith(Keywords.NextOne.name) && lastRank >= 0) {
            String recv = string.substring(Keywords.NextOne.name.length());
            if (values[lastRank] == null) {
                values[lastRank] = recv;
            } else {
                values[lastRank] += (Keywords.Received.ordinal() == lastRank ? "\n" : " ") + recv;
            }
        } else {
            if (string.startsWith(Keywords.Date.name)) {
                values[Keywords.Date.ordinal()] = string.substring(Keywords.Date.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XOriginalArrivalTime.name)) {
                values[Keywords.XOriginalArrivalTime.ordinal()] = string
                        .substring(Keywords.XOriginalArrivalTime.name.length());
                int pos = values[Keywords.XOriginalArrivalTime.ordinal()].indexOf(" FILETIME=");
                if (pos > 0) {
                    values[Keywords.XOriginalArrivalTime
                            .ordinal()] = values[Keywords.XOriginalArrivalTime.ordinal()].substring(0, pos);
                }
                lastRank = -1;
            } else if (string.startsWith(Keywords.MessageId.name)) {
                values[Keywords.MessageId.ordinal()] = string.substring(Keywords.MessageId.name.length());
                values[Keywords.MessageId.ordinal()] = StringUtils
                        .removeChevron(
                                StringUtils.unescapeHTML(values[Keywords.MessageId.ordinal()], true, false))
                        .trim();
                lastRank = -1;
            } else if (string.startsWith(Keywords.InReplyTo.name)) {
                String reply = StringUtils.removeChevron(StringUtils
                        .unescapeHTML(string.substring(Keywords.InReplyTo.name.length()), true, false));
                if (values[Keywords.InReplyTo.ordinal()] == null) {
                    values[Keywords.InReplyTo.ordinal()] = reply;
                } else {
                    values[Keywords.InReplyTo.ordinal()] += " " + reply;
                }
                lastRank = Keywords.InReplyTo.ordinal();
            } else if (string.startsWith(Keywords.Received.name)) {
                String recv = string.substring(Keywords.Received.name.length());
                if (values[Keywords.Received.ordinal()] == null) {
                    values[Keywords.Received.ordinal()] = recv;
                } else {
                    values[Keywords.Received.ordinal()] += "\n" + recv;
                }
                lastRank = Keywords.Received.ordinal();
            } else if (string.startsWith(Keywords.From.name)) {
                values[Keywords.From.ordinal()] = string.substring(Keywords.From.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.To.name)) {
                if (values[Keywords.To.ordinal()] == null) {
                    values[Keywords.To.ordinal()] = string.substring(Keywords.To.name.length());
                } else {
                    values[Keywords.To.ordinal()] += " " + string.substring(Keywords.To.name.length());
                }
                lastRank = Keywords.To.ordinal();
            } else if (string.startsWith(Keywords.Cc.name)) {
                if (values[Keywords.Cc.ordinal()] == null) {
                    values[Keywords.Cc.ordinal()] = string.substring(Keywords.Cc.name.length());
                } else {
                    values[Keywords.Cc.ordinal()] += " " + string.substring(Keywords.Cc.name.length());
                }
                lastRank = Keywords.Cc.ordinal();
            } else if (string.startsWith(Keywords.Bcc.name)) {
                if (values[Keywords.Bcc.ordinal()] == null) {
                    values[Keywords.Bcc.ordinal()] = string.substring(Keywords.Bcc.name.length());
                } else {
                    values[Keywords.Bcc.ordinal()] += " " + string.substring(Keywords.Bcc.name.length());
                }
                lastRank = Keywords.Bcc.ordinal();
            } else if (string.startsWith(Keywords.ReturnPath.name)) {
                if (values[Keywords.ReturnPath.ordinal()] == null) {
                    values[Keywords.ReturnPath.ordinal()] = string.substring(Keywords.ReturnPath.name.length());
                } else {
                    values[Keywords.ReturnPath.ordinal()] += " "
                            + string.substring(Keywords.ReturnPath.name.length());
                }
                lastRank = Keywords.ReturnPath.ordinal();
            } else if (string.startsWith(Keywords.Importance.name)) {
                values[Keywords.Importance.ordinal()] = string.substring(Keywords.Importance.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Priority.name)) {
                values[Keywords.Priority.ordinal()] = string.substring(Keywords.Priority.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XFolder.name)) {
                values[Keywords.XFolder.ordinal()] = string.substring(Keywords.XFolder.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XSDOC.name)) {
                values[Keywords.XSDOC.ordinal()] = string.substring(Keywords.XSDOC.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Sensitivity.name)) {
                values[Keywords.Sensitivity.ordinal()] = string.substring(Keywords.Sensitivity.name.length());
                lastRank = -1;
            } else {
                lastRank = -1;
            }
        }
    }
    /*for (int i = 0; i < Keywords.values().length; i++) {
       System.out.println(Keywords.values()[i].name()+": "+values[i]);
    }*/

    if (values[Keywords.XFolder.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name);
        sub.addAttribute(EMAIL_FIELDS.folderName.name, values[Keywords.XFolder.ordinal()]);
        metadata.add(sub);
    }
    String fromEmail = values[Keywords.From.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
        addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail);
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null && !fromEmail.contains(fromEmail2)) {
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
        }
        metadata.add(sub);
    } else {
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
            metadata.add(sub);
        }
    }
    fromEmail = values[Keywords.To.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.toUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Cc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.ccUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Bcc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.bccUnit.name, string2);
        }
        metadata.add(sub);
    }

    String subject = null;
    try {
        subject = msg.getSubject();
    } catch (ChunkNotFoundException e3) {
        // TODO Auto-generated catch block
        e3.printStackTrace();
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    subject = null;
    try {
        subject = msg.getConversationTopic();
    } catch (ChunkNotFoundException e3) {
        //System.err.println(e3.getMessage());
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.conversationTopic.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    if (values[Keywords.Date.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name);
        sub.setText(values[Keywords.Date.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.XOriginalArrivalTime.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name);
        sub.setText(values[Keywords.XOriginalArrivalTime.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.Received.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name);
        String[] traces = values[Keywords.Received.ordinal()].split("\n");
        for (String string : traces) {
            Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name);
            sub3.setText(string);
            sub.add(sub3);
        }
        metadata.add(sub);
    }
    if (values[Keywords.XSDOC.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name);
        sub.setText(values[Keywords.XSDOC.ordinal()]);
        metadata.add(sub);
    }
    String messageId = values[Keywords.MessageId.ordinal()];
    if (messageId != null) {
        messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim();
        if (messageId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name);
            sub.setText(messageId);
            metadata.add(sub);
        }
    }
    String InReplyToId = values[Keywords.InReplyTo.ordinal()];
    if (InReplyToId != null) {
        InReplyToId = StringUtils.removeChevron(StringUtils.unescapeHTML(InReplyToId, true, false)).trim();
        if (InReplyToId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name);
            sub.setText(InReplyToId);
            if (messageId != null && messageId.length() > 1) {
                String old = EmlExtract.filEmls.get(InReplyToId);
                if (old == null) {
                    old = messageId;
                } else {
                    old += "," + messageId;
                }
                EmlExtract.filEmls.put(InReplyToId, old);
            }
            metadata.add(sub);
        }
        InReplyToId = null;
    }
    Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name);
    String imp = values[Keywords.Importance.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.importance.name, imp);
    }
    imp = values[Keywords.Priority.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.priority.name, imp);
    }
    if (values[Keywords.Sensitivity.ordinal()] != null) {
        prop.addAttribute(EMAIL_FIELDS.sensitivity.name, values[Keywords.Sensitivity.ordinal()]);
    }
    AttachmentChunks[] files = msg.getAttachmentFiles();
    boolean Attachments = (files != null && files.length > 0);
    prop.addAttribute(EMAIL_FIELDS.hasAttachment.name, Boolean.toString(Attachments));
    metadata.add(prop);

    String result = "";
    Element identification = null;
    if (Attachments) {
        File oldPath = curPath;
        if (config.extractFile) {
            File newDir = new File(curPath, id);
            newDir.mkdir();
            curPath = newDir;
        }
        identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name);
        // get the number of attachments for this message
        int NumberOfAttachments = files.length;
        identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(NumberOfAttachments));
        // get a specific attachment from this email.
        for (int attachmentNumber = 0; attachmentNumber < NumberOfAttachments; attachmentNumber++) {
            AttachmentChunks attachment = files[attachmentNumber];
            if (argument.extractKeyword) {
                result += " " + extractInfoAttachment(attachment, identification, argument, config, curPath);
            } else {
                extractInfoAttachment(attachment, identification, argument, config, curPath);
            }
        }
        curPath = oldPath;
    }
    // Plain text e-mail body
    String body = "";
    if (argument.extractKeyword || config.extractFile) {
        try {
            body = msg.getTextBody();
        } catch (ChunkNotFoundException e2) {
            // TODO Auto-generated catch block
            e2.printStackTrace();
        }
        boolean isTxt = true;
        boolean isHttp = false;
        if (body == null || body.isEmpty()) {
            isTxt = false;
            try {
                body = msg.getHtmlBody();
            } catch (ChunkNotFoundException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            isHttp = true;
            if (body == null || body.isEmpty()) {
                isHttp = false;
                try {
                    body = msg.getRtfBody();
                } catch (ChunkNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
        if (body != null && !body.isEmpty()) {
            if (config.extractFile) {
                // XXX FIXME could saved email from HTML Body (clearer) if possible
                // use curRank in name, and attachment will be under directory named
                // add currank in field
                File newDir = new File(curPath, id);
                newDir.mkdir();
                String filenamebody = messageId;
                if (filenamebody == null || !filenamebody.isEmpty()) {
                    filenamebody = id;
                }
                String html = null;
                if (isHttp) {
                    html = body;
                }
                String rtf = null;
                if (!isTxt && !isHttp) {
                    rtf = body;
                }
                if (isTxt) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".txt"));
                        byte[] bb = body.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    try {
                        html = msg.getHtmlBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (html != null && !html.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".html"));
                        byte[] bb = html.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    html = null;
                }
                if (isTxt || isHttp) {
                    try {
                        rtf = msg.getRtfBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (rtf != null && !rtf.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".rtf"));
                        byte[] bb = rtf.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    rtf = null;
                }
            }
        }
    }
    if (metadata.hasContent()) {
        root.add(metadata);
    }
    if (identification != null && identification.hasContent()) {
        root.add(identification);
    }
    if (argument.extractKeyword) {
        result = body + " " + result;
        body = null;
        ExtractInfo.exportMetadata(keywords, result, "", config, null);
        if (keywords.hasContent()) {
            root.add(keywords);
        }
    }
    root.addAttribute(EMAIL_FIELDS.status.name, "ok");
    //System.out.println("end of "+id);
    return result;
}

From source file:org.alfresco.repo.content.transform.MSGParser.java

License:Apache License

/**
 * Process header./*from  w w w . j  a v  a2s. c  o  m*/
 *
 * @param msg
 *            the msg
 * @param metadata
 *            the metadata
 * @param xhtml
 *            the xhtml
 * @throws Exception
 *             the exception
 */
private void processHeader(MAPIMessage msg, Metadata metadata, XHTMLContentHandler xhtml) throws Exception {
    StringChunk subjectChunk = msg.getMainChunks().subjectChunk;
    if (msg.has7BitEncodingStrings()) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(subjectChunk.getRawValue());
        CharsetMatch detect = detector.detect();
        if (detect.getConfidence() >= 20) {
            subjectChunk.set7BitEncoding(detect.getName());
        }
    }
    String subject = subjectChunk.getValue();
    String from = msg.getDisplayFrom();

    metadata.set(DublinCore.CREATOR, from);
    metadata.set(Metadata.MESSAGE_FROM, from);
    metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
    metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
    metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());

    metadata.set(DublinCore.TITLE, subject);
    metadata.set(DublinCore.SUBJECT, msg.getConversationTopic());

    try {
        for (String recipientAddress : msg.getRecipientEmailAddressList()) {
            if (recipientAddress != null)
                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, recipientAddress);
        }
    } catch (ChunkNotFoundException he) {
    } // Will be fixed in POI 3.7 Final

    // Date - try two ways to find it
    // First try via the proper chunk
    if (msg.getMessageDate() != null) {
        metadata.set(DublinCore.DATE, msg.getMessageDate().getTime());
        metadata.set(Office.CREATION_DATE, msg.getMessageDate().getTime());
        metadata.set(Office.SAVE_DATE, msg.getMessageDate().getTime());
    } else {
        try {
            // Failing that try via the raw headers
            String[] headers = msg.getHeaders();
            if (headers != null && headers.length > 0) {
                for (String header : headers) {
                    if (header.toLowerCase().startsWith("date:")) {
                        String date = header.substring(header.indexOf(':') + 1).trim();

                        // See if we can parse it as a normal mail date
                        try {
                            Date d = MboxParser.parseDate(date);
                            metadata.set(DublinCore.DATE, d);
                            metadata.set(Office.CREATION_DATE, d);
                            metadata.set(Office.SAVE_DATE, d);
                        } catch (ParseException e) {
                            // Store it as-is, and hope for the best...
                            metadata.set(DublinCore.DATE, date);
                            metadata.set(Office.CREATION_DATE, date);
                            metadata.set(Office.SAVE_DATE, date);
                        }
                        break;
                    }
                }
            }
        } catch (ChunkNotFoundException he) {
            // We can't find the date, sorry...
        }
    }

    xhtml.element("h1", subject);

    // Output the from and to details in text, as you
    // often want them in text form for searching
    xhtml.startElement("dl");
    if (from != null) {
        header(xhtml, "From", from);
    }
    header(xhtml, "To", msg.getDisplayTo());
    header(xhtml, "Cc", msg.getDisplayCC());
    header(xhtml, "Bcc", msg.getDisplayBCC());
    try {
        header(xhtml, "Recipients", msg.getRecipientEmailAddress());
    } catch (ChunkNotFoundException e) {
    }
    List<String> attachmentList = new ArrayList<String>();
    // // prepare attachments
    prepareExtractMultipart(xhtml, message, attachmentList);
    if (attachmentList.size() > 0) {
        header(xhtml, "Attachments", attachmentList.toString());
    }
    xhtml.endElement("dl");

}

From source file:org.apache.tika.parser.microsoft.OutlookExtractor.java

License:Apache License

/**
 * Tries to identify the correct encoding for 7-bit (non-unicode)
 *  strings in the file.// w ww .ja  va2s  . c om
 * <p>Many messages store their strings as unicode, which is
 *  nice and easy. Some use one-byte encodings for their
 *  strings, but don't always store the encoding anywhere
 *  helpful in the file.</p>
 * <p>This method checks for codepage properties, and failing that
 *  looks at the headers for the message, and uses these to
 *  guess the correct encoding for your file.</p>
 * <p>Bug #49441 has more on why this is needed</p>
 * <p>This is taken verbatim from POI (TIKA-1238)
 * as a temporary workaround to prevent unsupported encoding exceptions</p>
 */
private void guess7BitEncoding(MAPIMessage msg) {
    Chunks mainChunks = msg.getMainChunks();
    //sanity check
    if (mainChunks == null) {
        return;
    }

    Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
    if (props != null) {
        // First choice is a codepage property
        for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE,
                MAPIProperty.INTERNET_CPID }) {
            List<PropertyValue> val = props.get(prop);
            if (val != null && val.size() > 0) {
                int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
                String encoding = null;
                try {
                    encoding = CodePageUtil.codepageToEncoding(codepage, true);
                } catch (UnsupportedEncodingException e) {
                    //swallow
                }
                if (tryToSet7BitEncoding(msg, encoding)) {
                    return;
                }
            }
        }
    }

    // Second choice is a charset on a content type header
    try {
        String[] headers = msg.getHeaders();
        if (headers != null && headers.length > 0) {
            // Look for a content type with a charset
            Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?",
                    Pattern.CASE_INSENSITIVE);

            for (String header : headers) {
                if (header.startsWith("Content-Type")) {
                    Matcher m = p.matcher(header);
                    if (m.matches()) {
                        // Found it! Tell all the string chunks
                        String charset = m.group(1);
                        if (tryToSet7BitEncoding(msg, charset)) {
                            return;
                        }
                    }
                }
            }
        }
    } catch (ChunkNotFoundException e) {
    }

    // Nothing suitable in the headers, try HTML
    // TODO: do we need to replicate this in Tika? If we wind up
    // parsing the html version of the email, this is duplicative??
    // Or do we need to reset the header strings based on the html
    // meta header if there is no other information?
    try {
        String html = msg.getHtmlBody();
        if (html != null && html.length() > 0) {
            Charset charset = null;
            try {
                charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
            } catch (IOException e) {
                //swallow
            }
            if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
                return;
            }
        }
    } catch (ChunkNotFoundException e) {
    }

    //absolute last resort, try charset detector
    StringChunk text = mainChunks.textBodyChunk;
    if (text != null) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(text.getRawValue());
        CharsetMatch match = detector.detect();
        if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
            return;
        }
    }
}