Example usage for org.apache.poi.hsmf MAPIMessage getHeaders

Introduction

In this page you can find the example usage for org.apache.poi.hsmf MAPIMessage getHeaders.

Prototype

public String[] getHeaders() throws ChunkNotFoundException

Source Link

Document

Returns all the headers, one entry per line

Usage

From source file:com.openkm.util.MailUtils.java

License:Open Source License

/**
 * Convert Outlook Message to Mail/*from   w ww  . ja  va2  s.  c  om*/
 */
public static Mail messageToMail(MAPIMessage msg) throws MessagingException, IOException {
    com.openkm.bean.Mail mail = new com.openkm.bean.Mail();
    Calendar receivedDate = Calendar.getInstance();
    Calendar sentDate = Calendar.getInstance();

    try {
        // Can be void
        if (msg.getMessageDate() != null) {
            receivedDate.setTime(msg.getMessageDate().getTime());
        }

        // Can be void
        if (msg.getMessageDate() != null) {
            sentDate.setTime(msg.getMessageDate().getTime());
        }

        if (msg.getRtfBody() != null) {
            try {
                // JEditorPaneRTF2HTMLConverter converter = new JEditorPaneRTF2HTMLConverter();
                // mail.setContent(converter.rtf2html(msg.getBodyRTF()));
                ByteArrayInputStream bais = new ByteArrayInputStream(msg.getRtfBody().getBytes());
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                DocConverter.getInstance().rtf2html(bais, baos);
                mail.setMimeType(MimeTypeConfig.MIME_HTML);
                mail.setContent(baos.toString().replace("<BR>", ""));
                IOUtils.closeQuietly(bais);
                IOUtils.closeQuietly(baos);
            } catch (Exception e) {
                throw new MessagingException(e.getMessage(), e);
            }
        } else if (msg.getHtmlBody() != null) {
            mail.setMimeType(MimeTypeConfig.MIME_HTML);
            mail.setContent(msg.getHtmlBody());
        } else if (msg.getTextBody() != null) {
            mail.setMimeType(MimeTypeConfig.MIME_TEXT);
            mail.setContent(msg.getTextBody());
        } else {
            mail.setMimeType(MimeTypeConfig.MIME_UNDEFINED);
        }

        if (msg.getDisplayTo() != null) {
            mail.setTo(recipientToString(msg.getDisplayTo()));
        } else {
            mail.setTo(new String[] {});
        }

        StringBuilder sb = new StringBuilder();
        for (String header : msg.getHeaders()) {
            sb.append(header).append("\n");
        }

        // Need to replace 0x00 because PostgreSQL does not accept string containing 0x00
        // Need to remove Unicode surrogate because of MySQL => SQL Error: 1366, SQLState: HY000
        String subject = FormatUtil.trimUnicodeSurrogates(FormatUtil.fixUTF8(msg.getSubject()));

        mail.setSize(mail.getContent().length());
        mail.setSubject((subject == null || subject.isEmpty()) ? NO_SUBJECT : subject);
        mail.setFrom(msg.getDisplayFrom());
        mail.setCc(recipientToString(msg.getDisplayCC()));
        mail.setBcc(recipientToString(msg.getDisplayBCC()));
        mail.setReceivedDate(receivedDate);
        mail.setSentDate(sentDate);
    } catch (ChunkNotFoundException e) {
        throw new MessagingException(e.getMessage(), e);
    }

    return mail;
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

private static String extractInfoSubEmail(MAPIMessage msg, File curDir, Element root, VitamArgument argument,
        ConfigLoader config) {//w  w w  .ja  v a2 s .c  o  m
    File curPath = null;
    Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name);
    Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name);

    String id = config.addRankId(root);
    curPath = new File(curDir, "MSG_" + id);
    //System.out.println("start of "+id);
    String[] values = new String[Keywords.values().length];
    for (int i = 0; i < Keywords.values().length; i++) {
        values[i] = null;
    }
    String[] test = null;
    try {
        test = msg.getHeaders();
    } catch (ChunkNotFoundException e4) {
        // TODO Auto-generated catch block
        e4.printStackTrace();
    }
    int lastRank = -1;
    for (String string : test) {
        if (string.startsWith(Keywords.NextOne.name) && lastRank >= 0) {
            String recv = string.substring(Keywords.NextOne.name.length());
            if (values[lastRank] == null) {
                values[lastRank] = recv;
            } else {
                values[lastRank] += (Keywords.Received.ordinal() == lastRank ? "\n" : " ") + recv;
            }
        } else {
            if (string.startsWith(Keywords.Date.name)) {
                values[Keywords.Date.ordinal()] = string.substring(Keywords.Date.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XOriginalArrivalTime.name)) {
                values[Keywords.XOriginalArrivalTime.ordinal()] = string
                        .substring(Keywords.XOriginalArrivalTime.name.length());
                int pos = values[Keywords.XOriginalArrivalTime.ordinal()].indexOf(" FILETIME=");
                if (pos > 0) {
                    values[Keywords.XOriginalArrivalTime
                            .ordinal()] = values[Keywords.XOriginalArrivalTime.ordinal()].substring(0, pos);
                }
                lastRank = -1;
            } else if (string.startsWith(Keywords.MessageId.name)) {
                values[Keywords.MessageId.ordinal()] = string.substring(Keywords.MessageId.name.length());
                values[Keywords.MessageId.ordinal()] = StringUtils
                        .removeChevron(
                                StringUtils.unescapeHTML(values[Keywords.MessageId.ordinal()], true, false))
                        .trim();
                lastRank = -1;
            } else if (string.startsWith(Keywords.InReplyTo.name)) {
                String reply = StringUtils.removeChevron(StringUtils
                        .unescapeHTML(string.substring(Keywords.InReplyTo.name.length()), true, false));
                if (values[Keywords.InReplyTo.ordinal()] == null) {
                    values[Keywords.InReplyTo.ordinal()] = reply;
                } else {
                    values[Keywords.InReplyTo.ordinal()] += " " + reply;
                }
                lastRank = Keywords.InReplyTo.ordinal();
            } else if (string.startsWith(Keywords.Received.name)) {
                String recv = string.substring(Keywords.Received.name.length());
                if (values[Keywords.Received.ordinal()] == null) {
                    values[Keywords.Received.ordinal()] = recv;
                } else {
                    values[Keywords.Received.ordinal()] += "\n" + recv;
                }
                lastRank = Keywords.Received.ordinal();
            } else if (string.startsWith(Keywords.From.name)) {
                values[Keywords.From.ordinal()] = string.substring(Keywords.From.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.To.name)) {
                if (values[Keywords.To.ordinal()] == null) {
                    values[Keywords.To.ordinal()] = string.substring(Keywords.To.name.length());
                } else {
                    values[Keywords.To.ordinal()] += " " + string.substring(Keywords.To.name.length());
                }
                lastRank = Keywords.To.ordinal();
            } else if (string.startsWith(Keywords.Cc.name)) {
                if (values[Keywords.Cc.ordinal()] == null) {
                    values[Keywords.Cc.ordinal()] = string.substring(Keywords.Cc.name.length());
                } else {
                    values[Keywords.Cc.ordinal()] += " " + string.substring(Keywords.Cc.name.length());
                }
                lastRank = Keywords.Cc.ordinal();
            } else if (string.startsWith(Keywords.Bcc.name)) {
                if (values[Keywords.Bcc.ordinal()] == null) {
                    values[Keywords.Bcc.ordinal()] = string.substring(Keywords.Bcc.name.length());
                } else {
                    values[Keywords.Bcc.ordinal()] += " " + string.substring(Keywords.Bcc.name.length());
                }
                lastRank = Keywords.Bcc.ordinal();
            } else if (string.startsWith(Keywords.ReturnPath.name)) {
                if (values[Keywords.ReturnPath.ordinal()] == null) {
                    values[Keywords.ReturnPath.ordinal()] = string.substring(Keywords.ReturnPath.name.length());
                } else {
                    values[Keywords.ReturnPath.ordinal()] += " "
                            + string.substring(Keywords.ReturnPath.name.length());
                }
                lastRank = Keywords.ReturnPath.ordinal();
            } else if (string.startsWith(Keywords.Importance.name)) {
                values[Keywords.Importance.ordinal()] = string.substring(Keywords.Importance.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Priority.name)) {
                values[Keywords.Priority.ordinal()] = string.substring(Keywords.Priority.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XFolder.name)) {
                values[Keywords.XFolder.ordinal()] = string.substring(Keywords.XFolder.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XSDOC.name)) {
                values[Keywords.XSDOC.ordinal()] = string.substring(Keywords.XSDOC.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Sensitivity.name)) {
                values[Keywords.Sensitivity.ordinal()] = string.substring(Keywords.Sensitivity.name.length());
                lastRank = -1;
            } else {
                lastRank = -1;
            }
        }
    }
    /*for (int i = 0; i < Keywords.values().length; i++) {
       System.out.println(Keywords.values()[i].name()+": "+values[i]);
    }*/

    if (values[Keywords.XFolder.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name);
        sub.addAttribute(EMAIL_FIELDS.folderName.name, values[Keywords.XFolder.ordinal()]);
        metadata.add(sub);
    }
    String fromEmail = values[Keywords.From.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
        addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail);
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null && !fromEmail.contains(fromEmail2)) {
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
        }
        metadata.add(sub);
    } else {
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
            metadata.add(sub);
        }
    }
    fromEmail = values[Keywords.To.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.toUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Cc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.ccUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Bcc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.bccUnit.name, string2);
        }
        metadata.add(sub);
    }

    String subject = null;
    try {
        subject = msg.getSubject();
    } catch (ChunkNotFoundException e3) {
        // TODO Auto-generated catch block
        e3.printStackTrace();
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    subject = null;
    try {
        subject = msg.getConversationTopic();
    } catch (ChunkNotFoundException e3) {
        //System.err.println(e3.getMessage());
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.conversationTopic.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    if (values[Keywords.Date.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name);
        sub.setText(values[Keywords.Date.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.XOriginalArrivalTime.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name);
        sub.setText(values[Keywords.XOriginalArrivalTime.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.Received.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name);
        String[] traces = values[Keywords.Received.ordinal()].split("\n");
        for (String string : traces) {
            Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name);
            sub3.setText(string);
            sub.add(sub3);
        }
        metadata.add(sub);
    }
    if (values[Keywords.XSDOC.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name);
        sub.setText(values[Keywords.XSDOC.ordinal()]);
        metadata.add(sub);
    }
    String messageId = values[Keywords.MessageId.ordinal()];
    if (messageId != null) {
        messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim();
        if (messageId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name);
            sub.setText(messageId);
            metadata.add(sub);
        }
    }
    String InReplyToId = values[Keywords.InReplyTo.ordinal()];
    if (InReplyToId != null) {
        InReplyToId = StringUtils.removeChevron(StringUtils.unescapeHTML(InReplyToId, true, false)).trim();
        if (InReplyToId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name);
            sub.setText(InReplyToId);
            if (messageId != null && messageId.length() > 1) {
                String old = EmlExtract.filEmls.get(InReplyToId);
                if (old == null) {
                    old = messageId;
                } else {
                    old += "," + messageId;
                }
                EmlExtract.filEmls.put(InReplyToId, old);
            }
            metadata.add(sub);
        }
        InReplyToId = null;
    }
    Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name);
    String imp = values[Keywords.Importance.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.importance.name, imp);
    }
    imp = values[Keywords.Priority.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.priority.name, imp);
    }
    if (values[Keywords.Sensitivity.ordinal()] != null) {
        prop.addAttribute(EMAIL_FIELDS.sensitivity.name, values[Keywords.Sensitivity.ordinal()]);
    }
    AttachmentChunks[] files = msg.getAttachmentFiles();
    boolean Attachments = (files != null && files.length > 0);
    prop.addAttribute(EMAIL_FIELDS.hasAttachment.name, Boolean.toString(Attachments));
    metadata.add(prop);

    String result = "";
    Element identification = null;
    if (Attachments) {
        File oldPath = curPath;
        if (config.extractFile) {
            File newDir = new File(curPath, id);
            newDir.mkdir();
            curPath = newDir;
        }
        identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name);
        // get the number of attachments for this message
        int NumberOfAttachments = files.length;
        identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(NumberOfAttachments));
        // get a specific attachment from this email.
        for (int attachmentNumber = 0; attachmentNumber < NumberOfAttachments; attachmentNumber++) {
            AttachmentChunks attachment = files[attachmentNumber];
            if (argument.extractKeyword) {
                result += " " + extractInfoAttachment(attachment, identification, argument, config, curPath);
            } else {
                extractInfoAttachment(attachment, identification, argument, config, curPath);
            }
        }
        curPath = oldPath;
    }
    // Plain text e-mail body
    String body = "";
    if (argument.extractKeyword || config.extractFile) {
        try {
            body = msg.getTextBody();
        } catch (ChunkNotFoundException e2) {
            // TODO Auto-generated catch block
            e2.printStackTrace();
        }
        boolean isTxt = true;
        boolean isHttp = false;
        if (body == null || body.isEmpty()) {
            isTxt = false;
            try {
                body = msg.getHtmlBody();
            } catch (ChunkNotFoundException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            isHttp = true;
            if (body == null || body.isEmpty()) {
                isHttp = false;
                try {
                    body = msg.getRtfBody();
                } catch (ChunkNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
        if (body != null && !body.isEmpty()) {
            if (config.extractFile) {
                // XXX FIXME could saved email from HTML Body (clearer) if possible
                // use curRank in name, and attachment will be under directory named
                // add currank in field
                File newDir = new File(curPath, id);
                newDir.mkdir();
                String filenamebody = messageId;
                if (filenamebody == null || !filenamebody.isEmpty()) {
                    filenamebody = id;
                }
                String html = null;
                if (isHttp) {
                    html = body;
                }
                String rtf = null;
                if (!isTxt && !isHttp) {
                    rtf = body;
                }
                if (isTxt) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".txt"));
                        byte[] bb = body.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    try {
                        html = msg.getHtmlBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (html != null && !html.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".html"));
                        byte[] bb = html.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    html = null;
                }
                if (isTxt || isHttp) {
                    try {
                        rtf = msg.getRtfBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (rtf != null && !rtf.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".rtf"));
                        byte[] bb = rtf.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    rtf = null;
                }
            }
        }
    }
    if (metadata.hasContent()) {
        root.add(metadata);
    }
    if (identification != null && identification.hasContent()) {
        root.add(identification);
    }
    if (argument.extractKeyword) {
        result = body + " " + result;
        body = null;
        ExtractInfo.exportMetadata(keywords, result, "", config, null);
        if (keywords.hasContent()) {
            root.add(keywords);
        }
    }
    root.addAttribute(EMAIL_FIELDS.status.name, "ok");
    //System.out.println("end of "+id);
    return result;
}

From source file:org.alfresco.repo.content.transform.MSGParser.java

License:Apache License

/**
 * Process header./*from  w w w . j  a v  a2s. c  o  m*/
 *
 * @param msg
 *            the msg
 * @param metadata
 *            the metadata
 * @param xhtml
 *            the xhtml
 * @throws Exception
 *             the exception
 */
private void processHeader(MAPIMessage msg, Metadata metadata, XHTMLContentHandler xhtml) throws Exception {
    StringChunk subjectChunk = msg.getMainChunks().subjectChunk;
    if (msg.has7BitEncodingStrings()) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(subjectChunk.getRawValue());
        CharsetMatch detect = detector.detect();
        if (detect.getConfidence() >= 20) {
            subjectChunk.set7BitEncoding(detect.getName());
        }
    }
    String subject = subjectChunk.getValue();
    String from = msg.getDisplayFrom();

    metadata.set(DublinCore.CREATOR, from);
    metadata.set(Metadata.MESSAGE_FROM, from);
    metadata.set(Metadata.MESSAGE_TO, msg.getDisplayTo());
    metadata.set(Metadata.MESSAGE_CC, msg.getDisplayCC());
    metadata.set(Metadata.MESSAGE_BCC, msg.getDisplayBCC());

    metadata.set(DublinCore.TITLE, subject);
    metadata.set(DublinCore.SUBJECT, msg.getConversationTopic());

    try {
        for (String recipientAddress : msg.getRecipientEmailAddressList()) {
            if (recipientAddress != null)
                metadata.add(Metadata.MESSAGE_RECIPIENT_ADDRESS, recipientAddress);
        }
    } catch (ChunkNotFoundException he) {
    } // Will be fixed in POI 3.7 Final

    // Date - try two ways to find it
    // First try via the proper chunk
    if (msg.getMessageDate() != null) {
        metadata.set(DublinCore.DATE, msg.getMessageDate().getTime());
        metadata.set(Office.CREATION_DATE, msg.getMessageDate().getTime());
        metadata.set(Office.SAVE_DATE, msg.getMessageDate().getTime());
    } else {
        try {
            // Failing that try via the raw headers
            String[] headers = msg.getHeaders();
            if (headers != null && headers.length > 0) {
                for (String header : headers) {
                    if (header.toLowerCase().startsWith("date:")) {
                        String date = header.substring(header.indexOf(':') + 1).trim();

                        // See if we can parse it as a normal mail date
                        try {
                            Date d = MboxParser.parseDate(date);
                            metadata.set(DublinCore.DATE, d);
                            metadata.set(Office.CREATION_DATE, d);
                            metadata.set(Office.SAVE_DATE, d);
                        } catch (ParseException e) {
                            // Store it as-is, and hope for the best...
                            metadata.set(DublinCore.DATE, date);
                            metadata.set(Office.CREATION_DATE, date);
                            metadata.set(Office.SAVE_DATE, date);
                        }
                        break;
                    }
                }
            }
        } catch (ChunkNotFoundException he) {
            // We can't find the date, sorry...
        }
    }

    xhtml.element("h1", subject);

    // Output the from and to details in text, as you
    // often want them in text form for searching
    xhtml.startElement("dl");
    if (from != null) {
        header(xhtml, "From", from);
    }
    header(xhtml, "To", msg.getDisplayTo());
    header(xhtml, "Cc", msg.getDisplayCC());
    header(xhtml, "Bcc", msg.getDisplayBCC());
    try {
        header(xhtml, "Recipients", msg.getRecipientEmailAddress());
    } catch (ChunkNotFoundException e) {
    }
    List<String> attachmentList = new ArrayList<String>();
    // // prepare attachments
    prepareExtractMultipart(xhtml, message, attachmentList);
    if (attachmentList.size() > 0) {
        header(xhtml, "Attachments", attachmentList.toString());
    }
    xhtml.endElement("dl");

}

From source file:org.apache.tika.parser.microsoft.OutlookExtractor.java

License:Apache License

/**
 * Tries to identify the correct encoding for 7-bit (non-unicode)
 *  strings in the file.// w ww .ja  va2s  . c om
 * <p>Many messages store their strings as unicode, which is
 *  nice and easy. Some use one-byte encodings for their
 *  strings, but don't always store the encoding anywhere
 *  helpful in the file.</p>
 * <p>This method checks for codepage properties, and failing that
 *  looks at the headers for the message, and uses these to
 *  guess the correct encoding for your file.</p>
 * <p>Bug #49441 has more on why this is needed</p>
 * <p>This is taken verbatim from POI (TIKA-1238)
 * as a temporary workaround to prevent unsupported encoding exceptions</p>
 */
private void guess7BitEncoding(MAPIMessage msg) {
    Chunks mainChunks = msg.getMainChunks();
    //sanity check
    if (mainChunks == null) {
        return;
    }

    Map<MAPIProperty, List<PropertyValue>> props = mainChunks.getProperties();
    if (props != null) {
        // First choice is a codepage property
        for (MAPIProperty prop : new MAPIProperty[] { MAPIProperty.MESSAGE_CODEPAGE,
                MAPIProperty.INTERNET_CPID }) {
            List<PropertyValue> val = props.get(prop);
            if (val != null && val.size() > 0) {
                int codepage = ((PropertyValue.LongPropertyValue) val.get(0)).getValue();
                String encoding = null;
                try {
                    encoding = CodePageUtil.codepageToEncoding(codepage, true);
                } catch (UnsupportedEncodingException e) {
                    //swallow
                }
                if (tryToSet7BitEncoding(msg, encoding)) {
                    return;
                }
            }
        }
    }

    // Second choice is a charset on a content type header
    try {
        String[] headers = msg.getHeaders();
        if (headers != null && headers.length > 0) {
            // Look for a content type with a charset
            Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?",
                    Pattern.CASE_INSENSITIVE);

            for (String header : headers) {
                if (header.startsWith("Content-Type")) {
                    Matcher m = p.matcher(header);
                    if (m.matches()) {
                        // Found it! Tell all the string chunks
                        String charset = m.group(1);
                        if (tryToSet7BitEncoding(msg, charset)) {
                            return;
                        }
                    }
                }
            }
        }
    } catch (ChunkNotFoundException e) {
    }

    // Nothing suitable in the headers, try HTML
    // TODO: do we need to replicate this in Tika? If we wind up
    // parsing the html version of the email, this is duplicative??
    // Or do we need to reset the header strings based on the html
    // meta header if there is no other information?
    try {
        String html = msg.getHtmlBody();
        if (html != null && html.length() > 0) {
            Charset charset = null;
            try {
                charset = detector.detect(new ByteArrayInputStream(html.getBytes(UTF_8)), EMPTY_METADATA);
            } catch (IOException e) {
                //swallow
            }
            if (charset != null && tryToSet7BitEncoding(msg, charset.name())) {
                return;
            }
        }
    } catch (ChunkNotFoundException e) {
    }

    //absolute last resort, try charset detector
    StringChunk text = mainChunks.textBodyChunk;
    if (text != null) {
        CharsetDetector detector = new CharsetDetector();
        detector.setText(text.getRawValue());
        CharsetMatch match = detector.detect();
        if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
            return;
        }
    }
}