Example usage for org.dom4j Element add

List of usage examples for org.dom4j Element add

Introduction

In this page you can find the example usage for org.dom4j Element add.

Prototype

void add(Namespace namespace);

Source Link

Document

Adds the given Namespace to this element.

Usage

From source file:fr.gouv.culture.vitam.eml.EmlExtract.java

License:Open Source License

private static final String addSubIdentities(Element identification, BodyPart bp, InputStream inputStream,
        VitamArgument argument, ConfigLoader config) {
    Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.subidentity.name);
    String filename = null;//from   w ww.j  a  v a  2 s.  co m
    String result = "";
    try {
        filename = bp.getFileName();
        filename = StringUtils.toFileName(filename);
        if (filename != null) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.filename.name);
            elt.setText(filename);
            newElt.add(elt);
        } else {
            filename = "eml.eml";
        }
    } catch (MessagingException e) {
    }
    try {
        int size = bp.getSize();
        if (size > 0) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.attSize.name);
            elt.setText(Integer.toString(size));
            newElt.add(elt);
        }
    } catch (MessagingException e) {
    }
    try {
        String description = bp.getDescription();
        if (description != null) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.description.name);
            elt.setText(description);
            newElt.add(elt);
        }
    } catch (MessagingException e) {
    }
    try {
        String disposition = bp.getDisposition();
        if (disposition != null) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.disposition.name);
            elt.setText(disposition);
            newElt.add(elt);
        }
    } catch (MessagingException e) {
    }
    File filetemp = null;
    FileOutputStream outputStream = null;
    try {
        // Force out to analysis
        if (config.extractFile) {
            filetemp = new File(argument.currentOutputDir, filename);
        } else {
            filetemp = File.createTempFile(StaticValues.PREFIX_TEMPFILE, filename);
        }
        byte[] buffer = new byte[8192];
        int read = 0;
        outputStream = new FileOutputStream(filetemp);
        while ((read = inputStream.read(buffer)) >= 0) {
            outputStream.write(buffer, 0, read);
        }
        outputStream.close();
        outputStream = null;
    } catch (IOException e1) {
        if (filetemp != null && !config.extractFile) {
            filetemp.delete();
        }
        if (outputStream != null) {
            try {
                outputStream.close();
            } catch (IOException e) {
            }
        }
        String status = "Error during access to attachment";
        newElt.addAttribute(EMAIL_FIELDS.status.name, status);
        identification.add(newElt);
        return "";
    }
    try {
        Commands.addFormatIdentification(newElt, filename, filetemp, config, argument);
        if (argument.extractKeyword) {
            // get back keyword in the main list
            Element keyw = (Element) newElt.selectSingleNode(EMAIL_FIELDS.keywords.name);
            if (keyw != null) {
                StringBuilder builder = new StringBuilder();
                @SuppressWarnings("unchecked")
                List<Element> elts = (List<Element>) keyw.selectNodes(EMAIL_FIELDS.keywordRank.name);
                for (Element elt : elts) {
                    String value = elt.attributeValue(EMAIL_FIELDS.keywordOccur.name);
                    int occur = Integer.parseInt(value) / 2 + 1;
                    @SuppressWarnings("unchecked")
                    List<Element> words = (List<Element>) elt.selectNodes(EMAIL_FIELDS.keywordWord.name);
                    for (Element eword : words) {
                        String word = eword.attributeValue(EMAIL_FIELDS.keywordValue.name) + " ";
                        for (int i = 0; i < occur; i++) {
                            builder.append(word);
                        }
                    }
                }
                result = builder.toString().trim();
            }
        }

    } catch (Exception e) {
        String status = "Error during identification";
        e.printStackTrace();
        config.addRankId(newElt);
        newElt.addAttribute(EMAIL_FIELDS.status.name, status);
    }
    if (filetemp != null && !config.extractFile) {
        filetemp.delete();
    }
    identification.add(newElt);
    return result;
}

From source file:fr.gouv.culture.vitam.eml.EmlExtract.java

License:Open Source License

private static final String handleMessageRecur(Message message, Element identification, String id,
        VitamArgument argument, ConfigLoader config) throws IOException, MessagingException {
    Object content = message.getContent();
    String result = "";
    if (content instanceof String) {
        String[] cte = message.getHeader("Content-Transfer-Encoding");
        String[] aresult = null;//from   w w  w  .  j a va  2s  .c o  m
        if (cte != null && cte.length > 0) {
            aresult = extractContentType(message.getContentType(), cte[0]);
        } else {
            aresult = extractContentType(message.getContentType(), null);
        }
        Element emlroot = XmlDom.factory.createElement("body");
        // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
        Element subidenti = XmlDom.factory.createElement("identification");
        Element identity = XmlDom.factory.createElement("identity");
        identity.addAttribute("format", "Internet Message Body Format");
        identity.addAttribute("mime", aresult[0] != null ? aresult[0] : "unknown");
        identity.addAttribute("extensions", aresult[3] != null ? aresult[3].substring(1) : "unknown");
        if (aresult[1] != null) {
            identity.addAttribute("charset", aresult[1]);
        }
        identification.add(identity);
        emlroot.add(subidenti);
        identification.add(emlroot);
        //result += " " + saveBody((String) content.toString(), aresult, id, argument, config);
        result += " " + saveBody(message.getInputStream(), aresult, id, argument, config);
        // ignore string
    } else if (content instanceof Multipart) {
        Multipart mp = (Multipart) content;
        if (argument.extractKeyword) {
            result = handleMultipartRecur(mp, identification, id, argument, config);
        } else {
            handleMultipartRecur(mp, identification, id, argument, config);
        }
        // handle multi part
    }
    return result;
}

From source file:fr.gouv.culture.vitam.eml.EmlExtract.java

License:Open Source License

private static final String handleMultipartRecur(Multipart mp, Element identification, String id,
        VitamArgument argument, ConfigLoader config) throws MessagingException, IOException {
    int count = mp.getCount();
    String result = "";
    for (int i = 0; i < count; i++) {
        BodyPart bp = mp.getBodyPart(i);
        Object content = bp.getContent();
        if (content instanceof String) {
            String[] cte = bp.getHeader("Content-Transfer-Encoding");
            String[] aresult = null;
            if (cte != null && cte.length > 0) {
                aresult = extractContentType(bp.getContentType(), cte[0]);
            } else {
                aresult = extractContentType(bp.getContentType(), null);
            }/*from ww  w  .  j  a va 2s .com*/
            Element emlroot = XmlDom.factory.createElement("body");
            // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
            Element subidenti = XmlDom.factory.createElement("identification");
            Element identity = XmlDom.factory.createElement("identity");
            identity.addAttribute("format", "Internet Message Body Format");
            identity.addAttribute("mime", aresult[0] != null ? aresult[0] : "unknown");
            identity.addAttribute("extensions", aresult[3] != null ? aresult[3].substring(1) : "unknown");
            if (aresult[1] != null) {
                identity.addAttribute("charset", aresult[1]);
            }
            identification.add(identity);
            emlroot.add(subidenti);
            identification.add(emlroot);
            //result += " " + saveBody((String) content.toString(), aresult, id, argument, config);
            result += " " + saveBody(bp.getInputStream(), aresult, id, argument, config);
            // ignore string
        } else if (content instanceof InputStream) {
            // handle input stream
            if (argument.extractKeyword) {
                result += " " + addSubIdentities(identification, bp, (InputStream) content, argument, config);
            } else {
                addSubIdentities(identification, bp, (InputStream) content, argument, config);
            }
        } else if (content instanceof Message) {
            Message message = (Message) content;
            if (argument.extractKeyword) {
                result += " " + handleMessageRecur(message, identification, id + "_" + i, argument, config);
            } else {
                handleMessageRecur(message, identification, id + "_" + i, argument, config);
            }
        } else if (content instanceof Multipart) {
            Multipart mp2 = (Multipart) content;
            if (argument.extractKeyword) {
                result += " " + handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
            } else {
                handleMultipartRecur(mp2, identification, id + "_" + i, argument, config);
            }
        }
    }
    return result;
}

From source file:fr.gouv.culture.vitam.eml.MailboxParser.java

License:Open Source License

private Element extractInfoMbox(File mboxFile, Element root) {
    collisions = 0;//from www. ja v a 2  s  .com
    HashSet<Integer> seenMessages = new HashSet<Integer>();

    // Open the file for reading
    BufferedReader reader = null;
    try {
        StringBuilder inputBuilder = new StringBuilder();
        String line = "";
        FileInputStream inputStream = new FileInputStream(mboxFile);
        reader = new BufferedReader(new InputStreamReader(inputStream, charset));
        // Read the mbox file line by line
        while ((line = reader.readLine()) != null) {
            inputBuilder.append(line);
            inputBuilder.append(System.getProperty("line.separator"));
        }

        String text = inputBuilder.toString();
        inputBuilder = null;

        String[] rawlines = null;//text.split("(\n\r)|(\n)|(\r)");
        rawlines = text.split("\r?\n|\r");
        if (debug)
            System.err.println("Split file into " + rawlines.length + " lines");

        Pattern seperatorPattern = Pattern.compile("^From (.*?) (.*?):(.*?):(.*?)$"); // From
        // Apache
        // JAMES
        // server
        Pattern headerPattern = Pattern.compile("^[\\x21-\\x39\\x3B-\\x7E]+:(.*)$"); // From RFC
        // 5322
        // - Oct
        // 2008
        String ssep = System.getProperty("line.separator");

        // Here comes the big ugly loop ...
        int lastFoundSepLine = -1;
        Map<Integer, Integer> separatorsMap = new HashMap<Integer, Integer>();

        for (int line_num = 0; line_num < rawlines.length; line_num++) {

            String currentLine = rawlines[line_num];

            // If we found a header name line
            if (headerPattern.matcher(currentLine).matches()) {
                /*if (debug)
                   System.err.println("HEADER MATCH! " + line_num);*/
                if (lastFoundSepLine != -1) {
                    if (separatorsMap.containsKey(lastFoundSepLine)) {
                        int numHeaders = separatorsMap.get(lastFoundSepLine);
                        numHeaders++;
                        separatorsMap.put(lastFoundSepLine, numHeaders);
                    }
                }
            }

            // If we found a separator line
            if (seperatorPattern.matcher(currentLine).matches()) {
                /*if (debug)
                   System.err.println("SEP MATCH! " + line_num);*/
                lastFoundSepLine = line_num;
                separatorsMap.put(lastFoundSepLine, 0);
            }
        }
        // Treat the end of the file as potential separator ;-)
        separatorsMap.put(rawlines.length, HEADERTHRESHOLD);

        // Compose the messages
        // If we read at least HEADERTHRESHOLD many headers after the separator
        List<Integer> separators = new ArrayList<Integer>();
        for (Integer x : separatorsMap.keySet()) {
            if (separatorsMap.get(x) >= HEADERTHRESHOLD) {
                separators.add(x);
            } else {
                // Line x is a bogus header line and should be escaped!!
                rawlines[x.intValue()] = ">" + rawlines[x.intValue()];
            }
        }

        Collections.sort(separators);

        for (int i = 0; i < separators.size() - 1; i++) {
            int startLine = separators.get(i);
            int endLine = separators.get(i + 1);
            if (debug)
                System.err.println("Message from lines " + startLine + " - " + endLine + " ("
                        + (endLine - startLine + 1) + ")");
            else
                System.out.print('.');
            // compose a raw message
            StringBuilder rawMsgBuilder = new StringBuilder();
            for (int l = startLine + 1; l < endLine; l++) {
                rawMsgBuilder.append(rawlines[l] + ssep);
            }
            String rawMessageText = rawMsgBuilder.toString().trim();
            int hashKey = rawMessageText.hashCode();
            if (!seenMessages.contains(hashKey)) {
                MimeMessage message = convertTextToMimeMessage(rawMessageText);
                if (message == null) {
                    Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
                    String status = "Error during identification";
                    newElt.addAttribute(EMAIL_FIELDS.status.name, status);
                    root.add(newElt);
                } else {
                    numberEmails++;
                    if (IGNOREDUPLICATES) {
                        seenMessages.add(hashKey);
                    }
                    Element emlroot = XmlDom.factory.createElement(EMAIL_FIELDS.formatEML.name);
                    // <identity format="Internet Message Format" mime="message/rfc822" puid="fmt/278" extensions="eml"/>
                    Element identification = XmlDom.factory.createElement("identification");
                    Element identity = XmlDom.factory.createElement("identity");
                    identity.addAttribute("format", "Internet Message Format");
                    identity.addAttribute("mime", "message/rfc822");
                    identity.addAttribute("puid", "fmt/278");
                    identity.addAttribute("extensions", "eml");
                    identification.add(identity);
                    emlroot.add(identification);
                    EmlExtract.extractInfoMessage(message, emlroot, argument, config);
                    root.add(emlroot);
                    /*
                    if (config.extractFile) {
                       File old = argument.currentOutputDir;
                       String id = emlroot.attributeValue(EMAIL_FIELDS.rankId.name);
                       if (config.extractFile) {
                          File newOutDir = new File(argument.currentOutputDir, id);
                          newOutDir.mkdirs();
                          argument.currentOutputDir = newOutDir;
                       }
                       // XXX FIXME should write rawMessageText to eml file using id+"_"+message.getSubject()+".eml"
                       System.out.println("should write rawMessageText to eml file using "+id+" and subdir .eml");
                       argument.currentOutputDir = old;
                    }
                    */
                }
            } else {
                if (debug)
                    System.err.println("Duplicated message found");
                collisions++;
            }
        }
        // end compose the last message if that one was valid
        if (numberEmails == 0) {
            // not a MBOX
            root = null;
        } else {
            System.err.println("Split into " + numberEmails + " messages!");
        }

    } catch (IOException e) {
        System.err.println(
                "Error while trying to read file: " + mboxFile.getAbsolutePath() + " " + e.getMessage());
        if (debug)
            System.err.println("-------- Stacktrace ----------");
        if (debug)
            e.printStackTrace();
        return null;
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
            }
        }
    }
    if (root != null) {
        root.addAttribute("nbEml", config.nbDoc.toString());
    }

    return root;
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

private static void addAddress(Element root, String entry, String address) {
    Element val = XmlDom.factory.createElement(entry);
    String ad = StringUtils.selectChevron(address);
    if (ad == null) {
        ad = "";/*from   w  ww . ja va  2s.  c o  m*/
    }
    String nams = address.replace('<' + ad + '>', "");
    if (nams.length() > 0) {
        Element name = XmlDom.factory.createElement(EMAIL_FIELDS.emailName.name);
        name.setText(StringUtils.unescapeHTML(nams, true, false));
        val.add(name);
    }
    if (ad != null && ad.length() > 0) {
        Element addresse = XmlDom.factory.createElement(EMAIL_FIELDS.emailAddress.name);
        addresse.setText(StringUtils.unescapeHTML(ad, true, false));
        val.add(addresse);
    }
    if (val.hasContent()) {
        root.add(val);
    }
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

private static String extractInfoSubEmail(MAPIMessage msg, File curDir, Element root, VitamArgument argument,
        ConfigLoader config) {//from  w  ww.j a  v a  2  s. c  o m
    File curPath = null;
    Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name);
    Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name);

    String id = config.addRankId(root);
    curPath = new File(curDir, "MSG_" + id);
    //System.out.println("start of "+id);
    String[] values = new String[Keywords.values().length];
    for (int i = 0; i < Keywords.values().length; i++) {
        values[i] = null;
    }
    String[] test = null;
    try {
        test = msg.getHeaders();
    } catch (ChunkNotFoundException e4) {
        // TODO Auto-generated catch block
        e4.printStackTrace();
    }
    int lastRank = -1;
    for (String string : test) {
        if (string.startsWith(Keywords.NextOne.name) && lastRank >= 0) {
            String recv = string.substring(Keywords.NextOne.name.length());
            if (values[lastRank] == null) {
                values[lastRank] = recv;
            } else {
                values[lastRank] += (Keywords.Received.ordinal() == lastRank ? "\n" : " ") + recv;
            }
        } else {
            if (string.startsWith(Keywords.Date.name)) {
                values[Keywords.Date.ordinal()] = string.substring(Keywords.Date.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XOriginalArrivalTime.name)) {
                values[Keywords.XOriginalArrivalTime.ordinal()] = string
                        .substring(Keywords.XOriginalArrivalTime.name.length());
                int pos = values[Keywords.XOriginalArrivalTime.ordinal()].indexOf(" FILETIME=");
                if (pos > 0) {
                    values[Keywords.XOriginalArrivalTime
                            .ordinal()] = values[Keywords.XOriginalArrivalTime.ordinal()].substring(0, pos);
                }
                lastRank = -1;
            } else if (string.startsWith(Keywords.MessageId.name)) {
                values[Keywords.MessageId.ordinal()] = string.substring(Keywords.MessageId.name.length());
                values[Keywords.MessageId.ordinal()] = StringUtils
                        .removeChevron(
                                StringUtils.unescapeHTML(values[Keywords.MessageId.ordinal()], true, false))
                        .trim();
                lastRank = -1;
            } else if (string.startsWith(Keywords.InReplyTo.name)) {
                String reply = StringUtils.removeChevron(StringUtils
                        .unescapeHTML(string.substring(Keywords.InReplyTo.name.length()), true, false));
                if (values[Keywords.InReplyTo.ordinal()] == null) {
                    values[Keywords.InReplyTo.ordinal()] = reply;
                } else {
                    values[Keywords.InReplyTo.ordinal()] += " " + reply;
                }
                lastRank = Keywords.InReplyTo.ordinal();
            } else if (string.startsWith(Keywords.Received.name)) {
                String recv = string.substring(Keywords.Received.name.length());
                if (values[Keywords.Received.ordinal()] == null) {
                    values[Keywords.Received.ordinal()] = recv;
                } else {
                    values[Keywords.Received.ordinal()] += "\n" + recv;
                }
                lastRank = Keywords.Received.ordinal();
            } else if (string.startsWith(Keywords.From.name)) {
                values[Keywords.From.ordinal()] = string.substring(Keywords.From.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.To.name)) {
                if (values[Keywords.To.ordinal()] == null) {
                    values[Keywords.To.ordinal()] = string.substring(Keywords.To.name.length());
                } else {
                    values[Keywords.To.ordinal()] += " " + string.substring(Keywords.To.name.length());
                }
                lastRank = Keywords.To.ordinal();
            } else if (string.startsWith(Keywords.Cc.name)) {
                if (values[Keywords.Cc.ordinal()] == null) {
                    values[Keywords.Cc.ordinal()] = string.substring(Keywords.Cc.name.length());
                } else {
                    values[Keywords.Cc.ordinal()] += " " + string.substring(Keywords.Cc.name.length());
                }
                lastRank = Keywords.Cc.ordinal();
            } else if (string.startsWith(Keywords.Bcc.name)) {
                if (values[Keywords.Bcc.ordinal()] == null) {
                    values[Keywords.Bcc.ordinal()] = string.substring(Keywords.Bcc.name.length());
                } else {
                    values[Keywords.Bcc.ordinal()] += " " + string.substring(Keywords.Bcc.name.length());
                }
                lastRank = Keywords.Bcc.ordinal();
            } else if (string.startsWith(Keywords.ReturnPath.name)) {
                if (values[Keywords.ReturnPath.ordinal()] == null) {
                    values[Keywords.ReturnPath.ordinal()] = string.substring(Keywords.ReturnPath.name.length());
                } else {
                    values[Keywords.ReturnPath.ordinal()] += " "
                            + string.substring(Keywords.ReturnPath.name.length());
                }
                lastRank = Keywords.ReturnPath.ordinal();
            } else if (string.startsWith(Keywords.Importance.name)) {
                values[Keywords.Importance.ordinal()] = string.substring(Keywords.Importance.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Priority.name)) {
                values[Keywords.Priority.ordinal()] = string.substring(Keywords.Priority.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XFolder.name)) {
                values[Keywords.XFolder.ordinal()] = string.substring(Keywords.XFolder.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.XSDOC.name)) {
                values[Keywords.XSDOC.ordinal()] = string.substring(Keywords.XSDOC.name.length());
                lastRank = -1;
            } else if (string.startsWith(Keywords.Sensitivity.name)) {
                values[Keywords.Sensitivity.ordinal()] = string.substring(Keywords.Sensitivity.name.length());
                lastRank = -1;
            } else {
                lastRank = -1;
            }
        }
    }
    /*for (int i = 0; i < Keywords.values().length; i++) {
       System.out.println(Keywords.values()[i].name()+": "+values[i]);
    }*/

    if (values[Keywords.XFolder.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name);
        sub.addAttribute(EMAIL_FIELDS.folderName.name, values[Keywords.XFolder.ordinal()]);
        metadata.add(sub);
    }
    String fromEmail = values[Keywords.From.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
        addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail);
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null && !fromEmail.contains(fromEmail2)) {
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
        }
        metadata.add(sub);
    } else {
        String fromEmail2 = values[Keywords.ReturnPath.ordinal()];
        if (fromEmail2 != null) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name);
            addAddress(sub, EMAIL_FIELDS.fromUnit.name, fromEmail2);
            metadata.add(sub);
        }
    }
    fromEmail = values[Keywords.To.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.toUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Cc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.ccUnit.name, string2);
        }
        metadata.add(sub);
    }
    fromEmail = values[Keywords.Bcc.ordinal()];
    if (fromEmail != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name);
        String[] to = fromEmail.split(",");
        for (String string2 : to) {
            addAddress(sub, EMAIL_FIELDS.bccUnit.name, string2);
        }
        metadata.add(sub);
    }

    String subject = null;
    try {
        subject = msg.getSubject();
    } catch (ChunkNotFoundException e3) {
        // TODO Auto-generated catch block
        e3.printStackTrace();
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    subject = null;
    try {
        subject = msg.getConversationTopic();
    } catch (ChunkNotFoundException e3) {
        //System.err.println(e3.getMessage());
    }
    if (subject != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.conversationTopic.name);
        sub.setText(StringUtils.unescapeHTML(subject, true, false));
        metadata.add(sub);
    }
    if (values[Keywords.Date.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name);
        sub.setText(values[Keywords.Date.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.XOriginalArrivalTime.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name);
        sub.setText(values[Keywords.XOriginalArrivalTime.ordinal()]);
        metadata.add(sub);
    }
    if (values[Keywords.Received.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name);
        String[] traces = values[Keywords.Received.ordinal()].split("\n");
        for (String string : traces) {
            Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name);
            sub3.setText(string);
            sub.add(sub3);
        }
        metadata.add(sub);
    }
    if (values[Keywords.XSDOC.ordinal()] != null) {
        Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name);
        sub.setText(values[Keywords.XSDOC.ordinal()]);
        metadata.add(sub);
    }
    String messageId = values[Keywords.MessageId.ordinal()];
    if (messageId != null) {
        messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim();
        if (messageId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name);
            sub.setText(messageId);
            metadata.add(sub);
        }
    }
    String InReplyToId = values[Keywords.InReplyTo.ordinal()];
    if (InReplyToId != null) {
        InReplyToId = StringUtils.removeChevron(StringUtils.unescapeHTML(InReplyToId, true, false)).trim();
        if (InReplyToId.length() > 1) {
            Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name);
            sub.setText(InReplyToId);
            if (messageId != null && messageId.length() > 1) {
                String old = EmlExtract.filEmls.get(InReplyToId);
                if (old == null) {
                    old = messageId;
                } else {
                    old += "," + messageId;
                }
                EmlExtract.filEmls.put(InReplyToId, old);
            }
            metadata.add(sub);
        }
        InReplyToId = null;
    }
    Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name);
    String imp = values[Keywords.Importance.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.importance.name, imp);
    }
    imp = values[Keywords.Priority.ordinal()];
    if (imp != null && imp.length() > 0) {
        try {
            int Priority = Integer.parseInt(imp);
            switch (Priority) {
            case 5:
                imp = "LOWEST";
                break;
            case 4:
                imp = "LOW";
                break;
            case 3:
                imp = "NORMAL";
                break;
            case 2:
                imp = "HIGH";
                break;
            case 1:
                imp = "HIGHEST";
                break;
            default:
                imp = "LEV" + Priority;
            }
        } catch (NumberFormatException e) {
            // ignore since imp will be used as returned
        }
        prop.addAttribute(EMAIL_FIELDS.priority.name, imp);
    }
    if (values[Keywords.Sensitivity.ordinal()] != null) {
        prop.addAttribute(EMAIL_FIELDS.sensitivity.name, values[Keywords.Sensitivity.ordinal()]);
    }
    AttachmentChunks[] files = msg.getAttachmentFiles();
    boolean Attachments = (files != null && files.length > 0);
    prop.addAttribute(EMAIL_FIELDS.hasAttachment.name, Boolean.toString(Attachments));
    metadata.add(prop);

    String result = "";
    Element identification = null;
    if (Attachments) {
        File oldPath = curPath;
        if (config.extractFile) {
            File newDir = new File(curPath, id);
            newDir.mkdir();
            curPath = newDir;
        }
        identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name);
        // get the number of attachments for this message
        int NumberOfAttachments = files.length;
        identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(NumberOfAttachments));
        // get a specific attachment from this email.
        for (int attachmentNumber = 0; attachmentNumber < NumberOfAttachments; attachmentNumber++) {
            AttachmentChunks attachment = files[attachmentNumber];
            if (argument.extractKeyword) {
                result += " " + extractInfoAttachment(attachment, identification, argument, config, curPath);
            } else {
                extractInfoAttachment(attachment, identification, argument, config, curPath);
            }
        }
        curPath = oldPath;
    }
    // Plain text e-mail body
    String body = "";
    if (argument.extractKeyword || config.extractFile) {
        try {
            body = msg.getTextBody();
        } catch (ChunkNotFoundException e2) {
            // TODO Auto-generated catch block
            e2.printStackTrace();
        }
        boolean isTxt = true;
        boolean isHttp = false;
        if (body == null || body.isEmpty()) {
            isTxt = false;
            try {
                body = msg.getHtmlBody();
            } catch (ChunkNotFoundException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            isHttp = true;
            if (body == null || body.isEmpty()) {
                isHttp = false;
                try {
                    body = msg.getRtfBody();
                } catch (ChunkNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }
        if (body != null && !body.isEmpty()) {
            if (config.extractFile) {
                // XXX FIXME could saved email from HTML Body (clearer) if possible
                // use curRank in name, and attachment will be under directory named
                // add currank in field
                File newDir = new File(curPath, id);
                newDir.mkdir();
                String filenamebody = messageId;
                if (filenamebody == null || !filenamebody.isEmpty()) {
                    filenamebody = id;
                }
                String html = null;
                if (isHttp) {
                    html = body;
                }
                String rtf = null;
                if (!isTxt && !isHttp) {
                    rtf = body;
                }
                if (isTxt) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".txt"));
                        byte[] bb = body.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    try {
                        html = msg.getHtmlBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (html != null && !html.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".html"));
                        byte[] bb = html.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    html = null;
                }
                if (isTxt || isHttp) {
                    try {
                        rtf = msg.getRtfBody();
                    } catch (ChunkNotFoundException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                if (rtf != null && !rtf.isEmpty()) {
                    FileOutputStream output = null;
                    try {
                        output = new FileOutputStream(new File(newDir, filenamebody + ".rtf"));
                        byte[] bb = rtf.getBytes();
                        output.write(bb, 0, bb.length);
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        if (output != null) {
                            try {
                                output.close();
                            } catch (IOException e) {
                            }
                        }
                    }
                    rtf = null;
                }
            }
        }
    }
    if (metadata.hasContent()) {
        root.add(metadata);
    }
    if (identification != null && identification.hasContent()) {
        root.add(identification);
    }
    if (argument.extractKeyword) {
        result = body + " " + result;
        body = null;
        ExtractInfo.exportMetadata(keywords, result, "", config, null);
        if (keywords.hasContent()) {
            root.add(keywords);
        }
    }
    root.addAttribute(EMAIL_FIELDS.status.name, "ok");
    //System.out.println("end of "+id);
    return result;
}

From source file:fr.gouv.culture.vitam.eml.MsgExtract2.java

License:Open Source License

private static String extractInfoAttachment(AttachmentChunks fatt, Element identification,
        VitamArgument argument, ConfigLoader config, File curPath) {
    Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.subidentity.name);
    String filename = null;/*from w  w w.jav a  2s . c  om*/
    String result = "";
    byte[] bytes = fatt.attachData.getValue();
    long size = bytes.length;

    // Check file
    filename = fatt.attachLongFileName.toString();
    if (filename.isEmpty()) {
        filename = fatt.attachFileName.toString();
    }
    filename = StringUtils.toFileName(filename);
    FileOutputStream out = null;
    File filetemp = null;
    try {
        String tempfilename = filename.isEmpty() ? (config.nbDoc.get() + 1) + "_unknownAttachment.msg"
                : filename;
        // Force out as eml
        if (config.extractFile) {
            filetemp = new File(curPath, tempfilename);
        } else {
            filetemp = File.createTempFile(StaticValues.PREFIX_TEMPFILE, tempfilename);
        }
        out = new FileOutputStream(filetemp);
        out.write(bytes);
        out.close();
        bytes = null;
        // Now check file against Droid or more
        try {
            Commands.addFormatIdentification(newElt, filename, filetemp, config, argument);
            if (argument.extractKeyword) {
                // get back keyword in the main list
                Element keyw = (Element) newElt.selectSingleNode(EMAIL_FIELDS.keywords.name);
                if (keyw != null) {
                    StringBuilder builder = new StringBuilder();
                    @SuppressWarnings("unchecked")
                    List<Element> elts = (List<Element>) keyw.selectNodes(EMAIL_FIELDS.keywordRank.name);
                    for (Element elt : elts) {
                        String value = elt.attributeValue(EMAIL_FIELDS.keywordOccur.name);
                        int occur = Integer.parseInt(value) / 2 + 1;
                        @SuppressWarnings("unchecked")
                        List<Element> words = (List<Element>) elt.selectNodes(EMAIL_FIELDS.keywordWord.name);
                        for (Element eword : words) {
                            String word = eword.attributeValue(EMAIL_FIELDS.keywordValue.name) + " ";
                            for (int i = 0; i < occur; i++) {
                                builder.append(word);
                            }
                        }
                    }
                    result = builder.toString().trim();
                }
            }
        } catch (Exception e) {
            config.addRankId(newElt);
            // String id = Long.toString(config.nbDoc.incrementAndGet());
            // newElt.addAttribute(EMAIL_FIELDS.rankId.name, id);
            String status = "Error during identification";
            e.printStackTrace();
            newElt.addAttribute(EMAIL_FIELDS.status.name, status);
            return "";
        }
        // then clear
        if (!config.extractFile) {
            filetemp.delete();
        }
    } catch (IOException e) {
        config.addRankId(newElt);
        // String id = Long.toString(config.nbDoc.incrementAndGet());
        // newElt.addAttribute(EMAIL_FIELDS.rankId.name, id);
        e.printStackTrace();
        String status = "Error during access to attachment";
        newElt.addAttribute(EMAIL_FIELDS.status.name, status);
        identification.add(newElt);
        return "";
    } finally {
        if (filetemp != null && !config.extractFile) {
            filetemp.delete();
        }
        if (out != null) {
            try {
                out.close();
            } catch (IOException e2) {
            }
        }
    }
    if (filename != null) {
        Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.filename.name);
        elt.setText(filename);
        newElt.add(elt);
    }
    if (size > 0) {
        Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.attSize.name);
        elt.setText(Long.toString(size));
        newElt.add(elt);
    }
    String mimetag = fatt.attachMimeTag.toString();
    if (mimetag != null) {
        Element elt = XmlDom.factory.createElement("attchmentMimeType");
        elt.setText(mimetag);
        newElt.add(elt);
    }
    identification.add(newElt);
    return result;
}

From source file:fr.gouv.culture.vitam.eml.PstExtract.java

License:Open Source License

private void extractInfoFolder(PSTFolder folder) {
    depth++;//from   ww w .  ja  va  2  s  . co m
    Element curdepth = currentRoot;
    // the root folder doesn't have a display name
    if (depth > 0) {
        printDepth();
        System.out.println(folder.getDisplayName());
    }

    // go through the folders...
    if (folder.hasSubfolders()) {
        Vector<PSTFolder> childFolders;
        try {
            childFolders = folder.getSubFolders();
            for (PSTFolder childFolder : childFolders) {
                Element nextdepth = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name);
                nextdepth.addAttribute(EMAIL_FIELDS.folderName.name, childFolder.getDisplayName());
                File pastDir = curPath;
                if (config.extractFile || (extractSeparateXmlFolder && writer != null)) {
                    // XXX FIXME multiple output
                    curPath = new File(curPath, childFolder.getDisplayName());
                    curPath.mkdirs();
                    argument.currentOutputDir = curPath;
                    nextdepth.addAttribute(EMAIL_FIELDS.folderFile.name, curPath.getPath());
                }
                currentRoot = nextdepth;
                long before = config.nbDoc.get();
                extractInfoFolder(childFolder);
                long after = config.nbDoc.get();
                currentRoot.addAttribute("nbSubMsg", Long.toString(after - before));
                currentRoot.addAttribute(EMAIL_FIELDS.status.name, "ok");
                if (extractSeparateXmlFolder && writer != null) {
                    File separate = new File(curPath, "info_" + childFolder.getDisplayName() + ".xml");
                    FileOutputStream out2 = null;
                    try {
                        out2 = new FileOutputStream(separate);
                        writer.setOutputStream(out2);
                        writer.write(nextdepth);
                        writer.flush();
                        nextdepth = XmlDom.factory.createElement(EMAIL_FIELDS.folder.name);
                        nextdepth.addAttribute(EMAIL_FIELDS.folderName.name, childFolder.getDisplayName());
                        nextdepth.addAttribute(EMAIL_FIELDS.filename.name, separate.getPath());
                        nextdepth.addAttribute("nbSubMsg", Long.toString(after - before));
                        nextdepth.addAttribute(EMAIL_FIELDS.status.name, "ok");
                    } catch (UnsupportedEncodingException e) {
                    } catch (IOException e) {
                    } finally {
                        try {
                            if (out2 != null) {
                                out2.close();
                            }
                        } catch (IOException e) {
                        }
                    }
                }
                curdepth.add(nextdepth);

                // XXX FIXME if multiple files as output => curPath + metadata.xml = 
                // currentRoot to duplicate (single node) and detach to save
                curPath = pastDir;
                argument.currentOutputDir = curPath;
                currentRoot = curdepth;
            }
        } catch (PSTException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // and now the emails for this folder
    if (folder.getContentCount() > 0) {
        depth++;
        currentRoot = curdepth;
        PSTMessage email;
        try {
            email = (PSTMessage) folder.getNextChild();
            while (email != null) {
                System.out.print('.');
                extractInfoMessage(email);
                email = (PSTMessage) folder.getNextChild();
            }
        } catch (PSTException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        depth--;
        System.out.println();
    }
    depth--;
}

From source file:fr.gouv.culture.vitam.eml.PstExtract.java

License:Open Source License

private String extractInfoAttachment(PSTAttachment attachment, Element identification) {
    Element newElt = XmlDom.factory.createElement(EMAIL_FIELDS.subidentity.name);
    String filename = null;//from w  w w .  ja  v a  2  s.  com
    String result = "";
    Date creationTime = attachment.getCreationTime();
    if (creationTime != null) {
        Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.creationTime.name);
        elt.setText(creationTime.toString());
        newElt.add(elt);
    }
    boolean isMsg = false;
    try {
        PSTMessage msg = attachment.getEmbeddedPSTMessage();
        if (msg != null) {
            Element cur = currentRoot;
            currentRoot = newElt;
            if (argument.extractKeyword) {
                result = extractInfoMessage(msg);
            } else {
                extractInfoMessage(msg);
            }
            currentRoot = cur;
            isMsg = true;
        }
    } catch (IOException e) {
    } catch (PSTException e) {
    }
    if (!isMsg) {
        // Check file
        filename = attachment.getLongFilename();
        if (filename.isEmpty()) {
            filename = attachment.getFilename();
        }
        filename = StringUtils.toFileName(filename);
        long size = 0;
        if (true) {
            InputStream attachmentStream = null;
            FileOutputStream out = null;
            File filetemp = null;
            try {
                attachmentStream = attachment.getFileInputStream();
                String tempfilename = filename.isEmpty() ? (config.nbDoc.get() + 1) + "_unknownAttachment.eml"
                        : filename;
                // Force out as eml
                if (config.extractFile) {
                    filetemp = new File(curPath, tempfilename);
                } else {
                    filetemp = File.createTempFile(StaticValues.PREFIX_TEMPFILE, tempfilename);
                }
                out = new FileOutputStream(filetemp);
                // 8176 is the block size used internally and should give the best performance
                int bufferSize = 8176;
                byte[] buffer = new byte[bufferSize];
                int count;
                do {
                    count = attachmentStream.read(buffer);
                    if (count >= 0) {
                        out.write(buffer, 0, count);
                        size += count;
                    }
                } while (count == bufferSize);
                out.close();
                attachmentStream.close();
                // Now check file against Droid or more
                try {
                    Commands.addFormatIdentification(newElt, filename, filetemp, config, argument);
                    if (argument.extractKeyword) {
                        // get back keyword in the main list
                        Element keyw = (Element) newElt.selectSingleNode(EMAIL_FIELDS.keywords.name);
                        if (keyw != null) {
                            StringBuilder builder = new StringBuilder();
                            @SuppressWarnings("unchecked")
                            List<Element> elts = (List<Element>) keyw
                                    .selectNodes(EMAIL_FIELDS.keywordRank.name);
                            for (Element elt : elts) {
                                String value = elt.attributeValue(EMAIL_FIELDS.keywordOccur.name);
                                int occur = Integer.parseInt(value) / 2 + 1;
                                @SuppressWarnings("unchecked")
                                List<Element> words = (List<Element>) elt
                                        .selectNodes(EMAIL_FIELDS.keywordWord.name);
                                for (Element eword : words) {
                                    String word = eword.attributeValue(EMAIL_FIELDS.keywordValue.name) + " ";
                                    for (int i = 0; i < occur; i++) {
                                        builder.append(word);
                                    }
                                }
                            }
                            result = builder.toString().trim();
                        }
                    }

                } catch (Exception e) {
                    config.addRankId(newElt);
                    //String id = Long.toString(config.nbDoc.incrementAndGet());
                    //newElt.addAttribute(EMAIL_FIELDS.rankId.name, id);
                    String status = "Error during identification";
                    e.printStackTrace();
                    newElt.addAttribute(EMAIL_FIELDS.status.name, status);
                    return "";
                }
                // then clear
                if (!config.extractFile) {
                    filetemp.delete();
                }
            } catch (IOException e) {
                config.addRankId(newElt);
                //String id = Long.toString(config.nbDoc.incrementAndGet());
                //newElt.addAttribute(EMAIL_FIELDS.rankId.name, id);
                e.printStackTrace();
                String status = "Error during access to attachment";
                newElt.addAttribute(EMAIL_FIELDS.status.name, status);
                identification.add(newElt);
                return "";
            } catch (PSTException e) {
                config.addRankId(newElt);
                //String id = Long.toString(config.nbDoc.incrementAndGet());
                //newElt.addAttribute(EMAIL_FIELDS.rankId.name, id);
                e.printStackTrace();
                String status = "Error during access to attachment";
                newElt.addAttribute(EMAIL_FIELDS.status.name, status);
                identification.add(newElt);
                return "";
            } finally {
                if (filetemp != null && !config.extractFile) {
                    filetemp.delete();
                }
                if (out != null) {
                    try {
                        out.close();
                    } catch (IOException e2) {
                    }
                }
                if (attachmentStream != null) {
                    try {
                        attachmentStream.close();
                    } catch (IOException e2) {
                    }
                }
            }
        }
        if (filename != null) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.filename.name);
            elt.setText(filename);
            newElt.add(elt);
        }
        if (size == 0) {
            size = attachment.getAttachSize();
        }
        if (size > 0) {
            Element elt = XmlDom.factory.createElement(EMAIL_FIELDS.attSize.name);
            elt.setText(Long.toString(size));
            newElt.add(elt);
        }
    }
    identification.add(newElt);
    return result;
}

From source file:fr.gouv.culture.vitam.eml.PstExtract.java

License:Open Source License

private static void addAddress(Element root, String entry, String name, String address) {
    Element val = XmlDom.factory.createElement(entry);
    if (name != null && name.length() > 0) {
        Element nm = XmlDom.factory.createElement(EMAIL_FIELDS.emailName.name);
        nm.setText(StringUtils.unescapeHTML(name, true, false));
        val.add(nm);
    }/*from w ww  .  j  av a  2s .c  o  m*/
    if (address != null && address.length() > 0) {
        Element nm = XmlDom.factory.createElement(EMAIL_FIELDS.emailAddress.name);
        nm.setText(StringUtils.unescapeHTML(address, true, false));
        val.add(nm);
    }
    root.add(val);
}