List of usage examples for org.dom4j DocumentHelper parseText
public static Document parseText(String text) throws DocumentException
parseText
parses the given text as an XML document and returns the newly created Document.
From source file:edu.ucsd.library.dams.api.DAMSAPIServlet.java
private Map mintDOI(String objid, TripleStore ts, TripleStore es, FileStore fs, HttpServletResponse res) throws Exception { // make sure ezid is configured if (ezid == null) { return error(SC_BAD_REQUEST, "DOI minting is not configured", null); }// w ww .j av a2 s. c o m // load object XML Map m = objectShow(objid, ts, es); String xml = null; // path for Object, Collection? String targetPath = ""; if (m.get("obj") != null) { DAMSObject obj = (DAMSObject) m.get("obj"); xml = obj.getRDFXML(true); targetPath = findTargetPath(objid, obj.asModel(false)); } String targetUrl = ezidTargetUrl + targetPath + "/" + objid; // pre-validate try { Ezid.validate(xml); } catch (EzidException ex) { return error(SC_BAD_REQUEST, targetUrl + ": " + ex.getMessage(), null); } // transform to datacite format with XSL String datacite = xslt(xml, "datacite.xsl", null, null); // mint doi String doi = ezid.mintDOI(targetUrl, datacite); String doiURL = doi.replaceAll("doi:", "http://doi.org/"); log.info("Minted DOI: " + doiURL + " for " + targetUrl); // add doi to object Document doc = DocumentHelper.parseText(xml); Element obj = (Element) doc.getRootElement().elements().get(0); Element doiNote = obj.addElement("dams:note").addElement("dams:Note"); doiNote.addElement("dams:type").setText("identifier"); doiNote.addElement("dams:displayLabel").setText("DOI"); doiNote.addElement("rdf:value").setText(doiURL); // update preferred citation note List cites = doc.selectNodes("/rdf:RDF/*/dams:note/dams:Note[dams:type='preferred citation']/rdf:value"); if (cites.size() > 0) { Element cite = (Element) cites.get(0); cite.setText(cite.getText() + " " + doiURL); } // save RDF Map info = objectEdit(objid, false, new ByteArrayInputStream(doc.asXML().getBytes()), "all", null, null, null, ts, es, fs); // queue for reindexing indexQueue(objid, "modifyObject", DEFAULT_PRIORITY); info.put("message", "Minted DOI: " + doiURL); return info; }
From source file:edu.ucsd.library.dams.api.DAMSAPIServlet.java
protected boolean isCollection(TripleStore ts, TripleStore es, String objid) throws TripleStoreException, DocumentException { // Q: check cache & use export if cached? any impact on xsl? Map info = objectShow(objid, ts, es); boolean result = false; if (info.get("obj") != null) { DAMSObject obj = (DAMSObject) info.get("obj"); String rdfxml = obj.getRDFXML(false); if (StringUtils.isNotBlank(rdfxml)) { Document doc = DocumentHelper.parseText(rdfxml); org.dom4j.Node colNode = doc.getRootElement() .selectSingleNode("*[contains(local-name(), 'Collection')]"); if (colNode != null) result = true;// w w w .j ava 2s . c om } } return result; }
From source file:edu.ucsd.library.dams.jhove.MyJhoveBase.java
License:Open Source License
/** * Given INP/*from ww w. j a va 2s . c o m*/ * @param kobj * @throws DocumentException * @throws ParseException */ public void parseXml(JhoveInfo kobj, StringWriter swriter) throws DocumentException, ParseException { StringBuffer xmldata = new StringBuffer(swriter.toString()); kobj.setMetaxml(xmldata); Document jdoc = DocumentHelper.parseText(xmldata.toString()); Element root = jdoc.getRootElement(); removeNS(root); String statusstr = jdoc.valueOf("/jhove/repInfo/status"); //kobj.setStatus(statusstr); if (/*statusstr.indexOf("not valid") != -1 || */statusstr.indexOf("Not well-formed") != -1) { kobj.setValid(false); } else { kobj.setValid(true); } kobj.setCheckSum_CRC32(jdoc.valueOf("/jhove/repInfo/checksums/checksum[@type='CRC32']")); kobj.setChecksum_MD5(jdoc.valueOf("/jhove/repInfo/checksums/checksum[@type='MD5']")); kobj.setChecksum_SHA(jdoc.valueOf("/jhove/repInfo/checksums/checksum[@type='SHA-1']")); kobj.setMIMEtype(jdoc.valueOf("/jhove/repInfo/mimeType")); kobj.setSize(Long.parseLong(jdoc.valueOf("/jhove/repInfo/size"))); //try { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); kobj.setDateModified(sdf.parse(jdoc.valueOf("/jhove/repInfo/lastModified"))); String format = jdoc.valueOf("/jhove/repInfo/format"); kobj.setFormat(format); if (format.equalsIgnoreCase("MP3")) { String layer = jdoc.valueOf( "/jhove/repInfo/properties/property/values/property[name='LayerDescription']/values/value"); String version = jdoc.valueOf( "/jhove/repInfo/properties/property/values/property[name='MPEG Audio Version ID']/values/value"); kobj.setVersion(version + ", Layer " + layer); } else { kobj.setVersion(jdoc.valueOf("/jhove/repInfo/version")); } kobj.setReportingModule(jdoc.valueOf("/jhove/repInfo/reportingModule")); String status = kobj.getStatus(); if ((status == null || status.length() == 0) || !"BYTESTREAM".equalsIgnoreCase(format)) kobj.setStatus(statusstr); // image resolution String imageWidth = jdoc.valueOf("//imageWidth"); String imageLength = jdoc.valueOf("//imageHeight"); if (imageWidth != null && imageLength != null && imageWidth.length() > 0) { kobj.setQuality(imageWidth + "x" + imageLength); } // WAV bit/sample/channels String abits1 = jdoc.valueOf("//bitDepth"); String afreq1 = jdoc.valueOf("//sampleRate"); String achan1 = jdoc.valueOf("//numChannels"); if (nblank(abits1) || nblank(afreq1) || nblank(achan1)) { String qual = audioQuality(abits1, afreq1, "Hz", achan1); kobj.setQuality(qual); } // MP3 bit/sample/channels String abits2 = valueOf(jdoc, "Bitrate Index"); String afreq2 = valueOf(jdoc, "Sampling rate frequency Index"); String achan2 = valueOf(jdoc, "Channel Mode"); if (nblank(abits2) || nblank(afreq2) || nblank(achan2)) { String qual = audioQuality(abits2, afreq2, "kHz", achan2); kobj.setQuality(qual); } List resultnodes = jdoc.selectNodes("/jhove/repInfo/messages/message[@severity='error']"); for (int r = 0; resultnodes != null && r < resultnodes.size(); r++) { Object noderesult = resultnodes.get(r); if (noderesult instanceof Node) { Node nt = (Node) noderesult; kobj.setMessage(nt.getStringValue()); } } }
From source file:eml.studio.server.graph.OozieGraphXMLParser.java
License:Open Source License
/** * @param xml xml description/*w ww . ja va 2 s. c om*/ * @throws DocumentException */ public static OozieGraph parse(String xml) throws DocumentException { OozieGraph graph = new OozieGraph(); Document doc = DocumentHelper.parseText(xml); Element root = doc.getRootElement(); // parse widgets List<Element> nodes = root.elements("widget"); for (Element node : nodes) { String type = node.attributeValue("type"); if (type.equals("dataset")) { OozieDatasetNode odn = parseDatasetNode(node); graph.addDatasetNode(odn); } else if (type.equals("program")) { OozieProgramNode opn = parseProgramNode(node); graph.addProgramNode(opn); graph.addActiveNode(opn.getId()); } } // parse edges List<Element> enodes = root.elements("edge"); for (Element elem : enodes) { OozieEdge edge = parseOozieEdge(elem); if (edge != null) graph.addEdge(edge); } return graph; }
From source file:eu.planets_project.pp.plato.action.workflow.ValidatePlanAction.java
License:Open Source License
/** * reads the executable preservation plan and formats it. * //w w w . j a v a 2 s .c o m */ private String formatExecutablePlan(String executablePlan) { if (executablePlan == null || "".equals(executablePlan)) { return ""; } try { Document doc = DocumentHelper.parseText(executablePlan); StringWriter sw = new StringWriter(); OutputFormat format = OutputFormat.createPrettyPrint(); format.setNewlines(true); format.setTrimText(true); format.setIndent(" "); format.setExpandEmptyElements(false); format.setNewLineAfterNTags(20); XMLWriter writer = new XMLWriter(sw, format); writer.write(doc); writer.close(); return sw.toString(); } catch (DocumentException e) { return ""; } catch (IOException e) { return ""; } }
From source file:eu.planets_project.pp.plato.xml.ProjectExporter.java
License:Open Source License
/** * Adds the XML-representation of the given project to the parent <code>projectNode</code> * * @param p/*from ww w.j ava 2s . c o m*/ * @param projectNode */ public void addProject(Plan p, Document projectsDoc, List<Integer> uploadIDs, List<Integer> recordIDs) { Element projectNode = projectsDoc.getRootElement().addElement(new QName("plan", platoNS)); // Base64 encoder for binary data BASE64Encoder encoder = new BASE64Encoder(); addChangeLog(p.getChangeLog(), projectNode); // Plan state projectNode.addElement("state").addAttribute("value", Integer.toString(p.getState().getValue())); Element properties = projectNode.addElement("properties"); addUpload(p.getPlanProperties().getReportUpload(), properties, "report", encoder, uploadIDs); // Plan properties properties.addAttribute("author", p.getPlanProperties().getAuthor()) .addAttribute("organization", p.getPlanProperties().getOrganization()) .addAttribute("name", p.getPlanProperties().getName()) .addAttribute("privateProject", Boolean.toString(p.getPlanProperties().isPrivateProject())) .addAttribute("reportPublic", Boolean.toString(p.getPlanProperties().isReportPublic())); addStringElement(properties, "description", p.getPlanProperties().getDescription()); addStringElement(properties, "owner", p.getPlanProperties().getOwner()); addChangeLog(p.getPlanProperties().getChangeLog(), properties); // Plan Basis Element basis = projectNode.addElement("basis"); basis.addAttribute("identificationCode", p.getProjectBasis().getIdentificationCode()); addStringElement(basis, "documentTypes", p.getProjectBasis().getDocumentTypes()); addStringElement(basis, "applyingPolicies", p.getProjectBasis().getApplyingPolicies()); addStringElement(basis, "designatedCommunity", p.getProjectBasis().getDesignatedCommunity()); addStringElement(basis, "mandate", p.getProjectBasis().getMandate()); addStringElement(basis, "organisationalProcedures", p.getProjectBasis().getOrganisationalProcedures()); addStringElement(basis, "planningPurpose", p.getProjectBasis().getPlanningPurpose()); addStringElement(basis, "planRelations", p.getProjectBasis().getPlanRelations()); addStringElement(basis, "preservationRights", p.getProjectBasis().getPreservationRights()); addStringElement(basis, "referenceToAgreements", p.getProjectBasis().getReferenceToAgreements()); Element triggers = basis.addElement("triggers"); if (p.getProjectBasis().getTriggers() != null) { addTrigger(triggers, p.getProjectBasis().getTriggers().getNewCollection()); addTrigger(triggers, p.getProjectBasis().getTriggers().getPeriodicReview()); addTrigger(triggers, p.getProjectBasis().getTriggers().getChangedEnvironment()); addTrigger(triggers, p.getProjectBasis().getTriggers().getChangedObjective()); addTrigger(triggers, p.getProjectBasis().getTriggers().getChangedCollectionProfile()); } Element policyTree = basis.addElement("policyTree"); addSubPolicyTree(p.getProjectBasis().getPolicyTree().getRoot(), policyTree); addChangeLog(p.getProjectBasis().getChangeLog(), basis); // Sample Records Element samplerecords = projectNode.addElement("sampleRecords"); addStringElement(samplerecords, "samplesDescription", p.getSampleRecordsDefinition().getSamplesDescription()); Element collectionProfile = samplerecords.addElement("collectionProfile"); if (p.getSampleRecordsDefinition().getCollectionProfile() != null) { addStringElement(collectionProfile, "collectionID", p.getSampleRecordsDefinition().getCollectionProfile().getCollectionID()); addStringElement(collectionProfile, "description", p.getSampleRecordsDefinition().getCollectionProfile().getDescription()); addStringElement(collectionProfile, "expectedGrowthRate", p.getSampleRecordsDefinition().getCollectionProfile().getExpectedGrowthRate()); addStringElement(collectionProfile, "numberOfObjects", p.getSampleRecordsDefinition().getCollectionProfile().getNumberOfObjects()); addStringElement(collectionProfile, "typeOfObjects", p.getSampleRecordsDefinition().getCollectionProfile().getTypeOfObjects()); addStringElement(collectionProfile, "retentionPeriod", p.getSampleRecordsDefinition().getCollectionProfile().getRetentionPeriod()); for (SampleObject rec : p.getSampleRecordsDefinition().getRecords()) { Element sampleRecord = samplerecords.addElement("record") .addAttribute("shortName", rec.getShortName()).addAttribute("fullname", rec.getFullname()) .addAttribute("contentType", rec.getContentType()); Element data = sampleRecord.addElement("data"); if (rec.isDataExistent()) { data.addAttribute("hasData", "true"); data.addAttribute("encoding", "base64"); if (recordIDs != null) { data.setText("" + rec.getId()); recordIDs.add(rec.getId()); } else { data.setText(encoder.encode(rec.getData().getData())); } addUpload(rec.getXcdlDescription(), sampleRecord, "xcdlDescription", encoder, uploadIDs); addJhoveString(rec, encoder, sampleRecord); addFitsInfo(rec, encoder, sampleRecord); } else { data.addAttribute("hasData", "false"); } Element formatInfo = sampleRecord.addElement("formatInfo") .addAttribute("puid", rec.getFormatInfo().getPuid()) .addAttribute("name", rec.getFormatInfo().getName()) .addAttribute("version", rec.getFormatInfo().getVersion()) .addAttribute("mimeType", rec.getFormatInfo().getMimeType()) .addAttribute("defaultExtension", rec.getFormatInfo().getDefaultExtension()); addChangeLog(rec.getFormatInfo().getChangeLog(), formatInfo); addChangeLog(rec.getChangeLog(), sampleRecord); addStringElement(sampleRecord, "description", rec.getDescription()); addStringElement(sampleRecord, "originalTechnicalEnvironment", rec.getOriginalTechnicalEnvironment()); } addChangeLog(p.getSampleRecordsDefinition().getChangeLog(), samplerecords); // Requirementsdefinition Element rdef = projectNode.addElement("requirementsDefinition"); addStringElement(rdef, "description", p.getRequirementsDefinition().getDescription()); Element uploads = rdef.addElement("uploads"); for (DigitalObject upload : p.getRequirementsDefinition().getUploads()) { addUpload(upload, uploads, "upload", encoder, uploadIDs); } addChangeLog(p.getRequirementsDefinition().getChangeLog(), rdef); // Alternatives Element alternatives = projectNode.addElement("alternatives"); addStringElement(alternatives, "description", p.getAlternativesDefinition().getDescription()); for (Alternative a : p.getAlternativesDefinition().getAlternatives()) { /* * Export all alternatives (also discarded ones) * Indices of the result-set reference only the considered alternatives! */ Element alt = alternatives.addElement("alternative") .addAttribute("discarded", Boolean.toString(a.isDiscarded())) .addAttribute("name", a.getName()); addStringElement(alt, "description", a.getDescription()); if (a.getAction() != null) { Element action = alt.addElement("action"); action.addAttribute("shortname", a.getAction().getShortname()) .addAttribute("url", a.getAction().getUrl()) .addAttribute("actionIdentifier", a.getAction().getActionIdentifier()) .addAttribute("info", a.getAction().getInfo()) .addAttribute("targetFormat", a.getAction().getTargetFormat()) .addAttribute("executable", String.valueOf(a.getAction().isExecutable())); addStringElement(action, "descriptor", a.getAction().getDescriptor()); addStringElement(action, "parameterInfo", a.getAction().getParameterInfo()); Element params = action.addElement("params"); if (a.getAction().getParams() != null) { for (Parameter param : a.getAction().getParams()) { params.addElement("param").addAttribute("name", param.getName()).addAttribute("value", param.getValue()); } } addChangeLog(a.getAction().getChangeLog(), action); } Element resourceDescr = alt.addElement("resourceDescription"); addStringElement(resourceDescr, "necessaryResources", a.getResourceDescription().getNecessaryResources()); addStringElement(resourceDescr, "configSettings", a.getResourceDescription().getConfigSettings()); addStringElement(resourceDescr, "reasonForConsidering", a.getResourceDescription().getReasonForConsidering()); addChangeLog(a.getResourceDescription().getChangeLog(), resourceDescr); Element experiment = alt.addElement("experiment"); Experiment exp = a.getExperiment(); addStringElement(experiment, "description", exp.getDescription()); //addStringElement(experiment, "runDescription", exp.getRunDescription()); addStringElement(experiment, "settings", exp.getSettings()); uploads = experiment.addElement("results"); for (SampleObject record : exp.getResults().keySet()) { DigitalObject up = exp.getResults().get(record); if (up != null) { // only existing uploads are exported Element upload = addUpload(up, uploads, "result", encoder, uploadIDs); if (upload != null) { upload.addAttribute("key", record.getShortName()); } } } // // */experiment/xcdlDescriptions/xcdlDescription // Element xcdls = experiment.addElement("xcdlDescriptions"); // for (SampleObject record : exp.getResults().keySet()) { // DigitalObject result = exp.getResults().get(record); // if (result != null) { // XcdlDescription x = result.getXcdlDescription(); // if (x != null) { // // only existing xcdls are exported // Element upload = addUpload(x, xcdls, "xcdlDescription", encoder, uploadIDs); // if (upload != null) { // upload.addAttribute("key", record.getShortName()); // } // } // } // } // export detailed experiment info's Element detailedInfos = experiment.addElement("detailedInfos"); for (SampleObject record : exp.getDetailedInfo().keySet()) { DetailedExperimentInfo dinfo = exp.getDetailedInfo().get(record); Element detailedInfo = detailedInfos.addElement("detailedInfo") .addAttribute("key", record.getShortName()) .addAttribute("successful", "" + dinfo.getSuccessful()); addStringElement(detailedInfo, "programOutput", dinfo.getProgramOutput()); addStringElement(detailedInfo, "cpr", dinfo.getCpr()); Element measurements = detailedInfo.addElement("measurements"); for (Measurement m : dinfo.getMeasurements().values()) { Element measurement = measurements.addElement("measurement"); // measurement.value: String typename = deriveElementname(m.getValue().getClass()); Element valueElem = measurement.addElement(typename); //.addAttribute("value", m.getValue().toString()); addStringElement(valueElem, "value", m.getValue().toString()); addChangeLog(m.getValue().getChangeLog(), valueElem); // measurement.property: Element property = measurement.addElement("property").addAttribute("name", m.getProperty().getName()); addScale(m.getProperty().getScale(), property); } } addChangeLog(a.getExperiment().getChangeLog(), experiment); addChangeLog(a.getChangeLog(), alt); } addChangeLog(p.getAlternativesDefinition().getChangeLog(), alternatives); // go-nogo - is created in the go-nogo step and need not exist if (p.getDecision() != null) { Element decision = projectNode.addElement("decision"); addStringElement(decision, "reason", p.getDecision().getReason()); addStringElement(decision, "actionNeeded", p.getDecision().getActionNeeded()); decision.addElement("goDecision").addAttribute("value", p.getDecision().getDecision().name()); addChangeLog(p.getDecision().getChangeLog(), decision); } // Evaluation Element evaluation = projectNode.addElement("evaluation"); addStringElement(evaluation, "comment", p.getEvaluation().getComment()); addChangeLog(p.getEvaluation().getChangeLog(), evaluation); // importance weighting Element importanceWeighting = projectNode.addElement("importanceWeighting"); addStringElement(importanceWeighting, "comment", p.getImportanceWeighting().getComment()); addChangeLog(p.getImportanceWeighting().getChangeLog(), importanceWeighting); // Recommendation Element recommendation = projectNode.addElement("recommendation"); if (p.getRecommendation().getAlternative() != null) { recommendation.addAttribute("alternativeName", p.getRecommendation().getAlternative().getName()); } addStringElement(recommendation, "reasoning", p.getRecommendation().getReasoning()); addStringElement(recommendation, "effects", p.getRecommendation().getEffects()); addChangeLog(p.getRecommendation().getChangeLog(), recommendation); // transformation Element trafo = projectNode.addElement("transformation"); addStringElement(trafo, "comment", p.getTransformation().getComment()); addChangeLog(p.getTransformation().getChangeLog(), trafo); // Objectivetree (including weights, evaluation values and transformers) Element tree = projectNode.addElement("tree"); tree.addAttribute("weightsInitialized", "" + p.getTree().isWeightsInitialized()); if (p.getTree().getRoot() != null) addSubTree(p.getTree().getRoot(), tree); } Element executablePlan = projectNode.addElement("executablePlan"); try { if (p.getExecutablePlanDefinition().getExecutablePlan() != null) { Document execPlan = DocumentHelper.parseText(p.getExecutablePlanDefinition().getExecutablePlan()); Element execPlanRoot = execPlan.getRootElement(); if (execPlanRoot.hasContent()) { Element planWorkflow = executablePlan.addElement("planWorkflow"); planWorkflow.add(execPlanRoot); } } if (p.getExecutablePlanDefinition().getEprintsExecutablePlan() != null) { Document execPlan = DocumentHelper .parseText(p.getExecutablePlanDefinition().getEprintsExecutablePlan()); Element execPlanRoot = execPlan.getRootElement(); if (execPlanRoot.hasContent()) { //Element planWorkflow = executablePlan.addElement("eprintsPlan"); executablePlan.add(execPlanRoot); } } } catch (DocumentException e) { // if the stored exec. plan is invalid for some reason, we leave the plan out. // TODO: HK this should no happen as we write the xml ourselves, but still, // we need a mechanism here to prevent the export if the xml is invalid. PlatoLogger.getLogger(this.getClass()).error(e.getMessage(), e); } // TODO HK how does this here relate to the upper try-catch block and the exception?? // Smells like a hack! ExecutablePlanDefinition plan = p.getExecutablePlanDefinition(); addStringElement(executablePlan, "objectPath", plan.getObjectPath()); addStringElement(executablePlan, "toolParameters", plan.getToolParameters()); addStringElement(executablePlan, "triggersConditions", plan.getTriggersConditions()); addStringElement(executablePlan, "validateQA", plan.getValidateQA()); addChangeLog(plan.getChangeLog(), executablePlan); Element planDef = projectNode.addElement("planDefinition"); PlanDefinition pdef = p.getPlanDefinition(); planDef.addAttribute("currency", pdef.getCurrency()); addStringElement(planDef, "costsIG", pdef.getCostsIG()); addStringElement(planDef, "costsPA", pdef.getCostsPA()); addStringElement(planDef, "costsPE", pdef.getCostsPE()); addStringElement(planDef, "costsQA", pdef.getCostsQA()); addStringElement(planDef, "costsREI", pdef.getCostsREI()); addStringElement(planDef, "costsRemarks", pdef.getCostsRemarks()); addStringElement(planDef, "costsRM", pdef.getCostsRM()); addStringElement(planDef, "costsTCO", pdef.getCostsTCO()); addStringElement(planDef, "responsibleExecution", pdef.getResponsibleExecution()); addStringElement(planDef, "responsibleMonitoring", pdef.getResponsibleMonitoring()); triggers = planDef.addElement("triggers"); if (pdef.getTriggers() != null) { addTrigger(triggers, pdef.getTriggers().getNewCollection()); addTrigger(triggers, pdef.getTriggers().getPeriodicReview()); addTrigger(triggers, pdef.getTriggers().getChangedEnvironment()); addTrigger(triggers, pdef.getTriggers().getChangedObjective()); addTrigger(triggers, pdef.getTriggers().getChangedCollectionProfile()); } addChangeLog(pdef.getChangeLog(), planDef); }
From source file:eu.scape_project.planning.xml.ProjectExporter.java
License:Apache License
/** * Adds a preservation action plan element to the provided parent if the * preservation action plan is defined./* ww w . ja v a2s. co m*/ * * @param preservationActionPlan * the preservation action plan to add * @param parent * the parent element * @param addDigitalObjectData * true if the data should be written, false otherwise * @return the newly created element or null * @throws PlanningException * if an error occurred during creation */ private Element addPreservationActionPlan(DigitalObject preservationActionPlan, Element parent, boolean addDigitalObjectData) throws PlanningException { Element preservationActionPlanElement = null; if (preservationActionPlan != null) { if (preservationActionPlan != null && preservationActionPlan.isDataExistent()) { if (!addDigitalObjectData) { preservationActionPlanElement = parent.addElement("preservationActionPlan"); preservationActionPlanElement.setText(String.valueOf(preservationActionPlan.getId())); } else { Document doc; try { doc = DocumentHelper.parseText( new String(preservationActionPlan.getData().getData(), PlanXMLConstants.ENCODING)); if (doc.getRootElement().hasContent()) { preservationActionPlanElement = doc.getRootElement(); parent.add(preservationActionPlanElement); } } catch (UnsupportedEncodingException e) { log.error("Error parsing preservation action plan {}.", e.getMessage()); throw new PlanningException("Error parsing preservation action plan.", e); } catch (DocumentException e) { log.error("Error parsing preservation action plan {}.", e.getMessage()); throw new PlanningException("Error parsing preservation action plan.", e); } } } } return preservationActionPlanElement; }
From source file:fr.gouv.culture.vitam.eml.EmlExtract.java
License:Open Source License
public static String extractInfoMessage(MimeMessage message, Element root, VitamArgument argument, ConfigLoader config) {/*from w ww. j a v a 2 s. com*/ File oldDir = argument.currentOutputDir; if (argument.currentOutputDir == null) { if (config.outputDir != null) { argument.currentOutputDir = new File(config.outputDir); } } Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name); Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name); String skey = ""; String id = config.addRankId(root); Address[] from = null; Element sub2 = null; try { from = message.getFrom(); } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("From"); if (partialResult != null && partialResult.length > 0) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.from.name); Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name); add.setText(partialResult[0]); sub2.add(add); } } catch (MessagingException e) { } } Address sender = null; try { sender = message.getSender(); } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("Sender"); if (partialResult != null && partialResult.length > 0) { if (sub2 == null) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.from.name); Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name); add.setText(partialResult[0]); sub2.add(add); } } } catch (MessagingException e) { } } if (from != null && from.length > 0) { String value0 = null; Element sub = (sub2 != null ? sub2 : XmlDom.factory.createElement(EMAIL_FIELDS.from.name)); if (sender != null) { value0 = addAddress(sub, EMAIL_FIELDS.fromUnit.name, sender, null); } for (Address address : from) { addAddress(sub, EMAIL_FIELDS.fromUnit.name, address, value0); } metadata.add(sub); } else if (sender != null) { Element sub = (sub2 != null ? sub2 : XmlDom.factory.createElement(EMAIL_FIELDS.from.name)); addAddress(sub, EMAIL_FIELDS.fromUnit.name, sender, null); metadata.add(sub); } else { if (sub2 != null) { metadata.add(sub2); } } Address[] replyTo = null; try { replyTo = message.getReplyTo(); if (replyTo != null && replyTo.length > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.replyTo.name); for (Address address : replyTo) { addAddress(sub, EMAIL_FIELDS.fromUnit.name, address, null); } metadata.add(sub); } } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("ReplyTo"); if (partialResult != null && partialResult.length > 0) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.replyTo.name); addAddress(sub2, EMAIL_FIELDS.fromUnit.name, partialResult, null); /*Element add = XmlDom.factory.createElement(EMAIL_FIELDS.fromUnit.name); add.setText(partialResult[0]); sub2.add(add);*/ metadata.add(sub2); } } catch (MessagingException e) { } } Address[] toRecipients = null; try { toRecipients = message.getRecipients(Message.RecipientType.TO); if (toRecipients != null && toRecipients.length > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name); for (Address address : toRecipients) { addAddress(sub, EMAIL_FIELDS.toUnit.name, address, null); } metadata.add(sub); } } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("To"); if (partialResult != null && partialResult.length > 0) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name); addAddress(sub2, EMAIL_FIELDS.toUnit.name, partialResult, null); /*for (String string : partialResult) { Element add = XmlDom.factory.createElement(EMAIL_FIELDS.toUnit.name); add.setText(string); sub2.add(add); }*/ metadata.add(sub2); } } catch (MessagingException e) { } } Address[] ccRecipients; try { ccRecipients = message.getRecipients(Message.RecipientType.CC); if (ccRecipients != null && ccRecipients.length > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name); for (Address address : ccRecipients) { addAddress(sub, EMAIL_FIELDS.ccUnit.name, address, null); } metadata.add(sub); } } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("Cc"); if (partialResult != null && partialResult.length > 0) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name); addAddress(sub2, EMAIL_FIELDS.ccUnit.name, partialResult, null); /*for (String string : partialResult) { Element add = XmlDom.factory.createElement(EMAIL_FIELDS.ccUnit.name); add.setText(string); sub2.add(add); }*/ metadata.add(sub2); } } catch (MessagingException e) { } } Address[] bccRecipients; try { bccRecipients = message.getRecipients(Message.RecipientType.BCC); if (bccRecipients != null && bccRecipients.length > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name); for (Address address : bccRecipients) { addAddress(sub, EMAIL_FIELDS.bccUnit.name, address, null); } metadata.add(sub); } } catch (MessagingException e1) { String[] partialResult; try { partialResult = message.getHeader("Cc"); if (partialResult != null && partialResult.length > 0) { sub2 = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name); addAddress(sub2, EMAIL_FIELDS.bccUnit.name, partialResult, null); /*for (String string : partialResult) { Element add = XmlDom.factory.createElement(EMAIL_FIELDS.bccUnit.name); add.setText(string); sub2.add(add); }*/ metadata.add(sub2); } } catch (MessagingException e) { } } try { String subject = message.getSubject(); if (subject != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name); sub.setText(StringUtils.unescapeHTML(subject, true, false)); metadata.add(sub); } Date sentDate = message.getSentDate(); if (sentDate != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name); sub.setText(sentDate.toString()); metadata.add(sub); } Date receivedDate = message.getReceivedDate(); if (receivedDate != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name); sub.setText(receivedDate.toString()); metadata.add(sub); } String[] headers = message.getHeader("Received"); if (headers != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name); MailDateFormat mailDateFormat = null; long maxTime = 0; if (receivedDate == null) { mailDateFormat = new MailDateFormat(); } for (String string : headers) { Element sub3 = XmlDom.factory.createElement(EMAIL_FIELDS.trace.name); sub3.setText(StringUtils.unescapeHTML(string, true, false)); sub.add(sub3); if (receivedDate == null) { int pos = string.lastIndexOf(';'); if (pos > 0) { String recvdate = string.substring(pos + 2).replaceAll("\t\n\r\f", "").trim(); try { Date date = mailDateFormat.parse(recvdate); if (date.getTime() > maxTime) { maxTime = date.getTime(); } } catch (ParseException e) { } } } } if (receivedDate == null) { Element subdate = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name); Date date = new Date(maxTime); subdate.setText(date.toString()); metadata.add(subdate); } metadata.add(sub); } int internalSize = message.getSize(); if (internalSize > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name); sub.setText(Integer.toString(internalSize)); metadata.add(sub); } String encoding = message.getEncoding(); if (encoding != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.encoding.name); sub.setText(StringUtils.unescapeHTML(encoding, true, false)); metadata.add(sub); } String description = message.getDescription(); if (description != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.description.name); sub.setText(StringUtils.unescapeHTML(description, true, false)); metadata.add(sub); } String contentType = message.getContentType(); if (contentType != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentType.name); sub.setText(StringUtils.unescapeHTML(contentType, true, false)); metadata.add(sub); } headers = message.getHeader("Content-Transfer-Encoding"); if (headers != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentTransferEncoding.name); StringBuilder builder = new StringBuilder(); for (String string : headers) { builder.append(StringUtils.unescapeHTML(string, true, false)); builder.append(' '); } sub.setText(builder.toString()); metadata.add(sub); } String[] contentLanguage = message.getContentLanguage(); if (contentLanguage != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentLanguage.name); StringBuilder builder = new StringBuilder(); for (String string : contentLanguage) { builder.append(StringUtils.unescapeHTML(string, true, false)); builder.append(' '); } sub.setText(builder.toString()); metadata.add(sub); } String contentId = message.getContentID(); if (contentId != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.contentId.name); sub.setText(StringUtils.removeChevron(StringUtils.unescapeHTML(contentId, true, false))); metadata.add(sub); } String disposition = message.getDisposition(); if (disposition != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.disposition.name); sub.setText(StringUtils.removeChevron(StringUtils.unescapeHTML(disposition, true, false))); metadata.add(sub); } headers = message.getHeader("Keywords"); if (headers != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.msgKeywords.name); StringBuilder builder = new StringBuilder(); for (String string : headers) { builder.append(StringUtils.unescapeHTML(string, true, false)); builder.append(' '); } sub.setText(builder.toString()); metadata.add(sub); } String messageId = message.getMessageID(); if (messageId != null) { messageId = StringUtils.removeChevron(StringUtils.unescapeHTML(messageId, true, false)).trim(); if (messageId.length() > 1) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name); sub.setText(messageId); metadata.add(sub); } } headers = message.getHeader("In-Reply-To"); String inreplyto = null; if (headers != null) { StringBuilder builder = new StringBuilder(); for (String string : headers) { builder.append(StringUtils.removeChevron(StringUtils.unescapeHTML(string, true, false))); builder.append(' '); } inreplyto = builder.toString().trim(); if (inreplyto.length() > 0) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name); sub.setText(inreplyto); if (messageId != null && messageId.length() > 1) { String old = filEmls.get(inreplyto); if (old == null) { old = messageId; } else { old += "," + messageId; } filEmls.put(inreplyto, old); } metadata.add(sub); } } headers = message.getHeader("References"); if (headers != null) { Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.references.name); StringBuilder builder = new StringBuilder(); for (String string : headers) { builder.append(StringUtils.removeChevron(StringUtils.unescapeHTML(string, true, false))); builder.append(' '); } String[] refs = builder.toString().trim().split(" "); for (String string : refs) { if (string.length() > 0) { Element ref = XmlDom.factory.createElement(EMAIL_FIELDS.reference.name); ref.setText(string); sub.add(ref); } } metadata.add(sub); } Element prop = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name); headers = message.getHeader("X-Priority"); if (headers == null) { headers = message.getHeader("Priority"); if (headers != null && headers.length > 0) { prop.addAttribute(EMAIL_FIELDS.priority.name, headers[0]); } } else if (headers != null && headers.length > 0) { String imp = headers[0]; try { int Priority = Integer.parseInt(imp); switch (Priority) { case 5: imp = "LOWEST"; break; case 4: imp = "LOW"; break; case 3: imp = "NORMAL"; break; case 2: imp = "HIGH"; break; case 1: imp = "HIGHEST"; break; default: imp = "LEV" + Priority; } } catch (NumberFormatException e) { // ignore since imp will be used as returned } prop.addAttribute(EMAIL_FIELDS.priority.name, imp); } headers = message.getHeader("Sensitivity"); if (headers != null && headers.length > 0) { prop.addAttribute(EMAIL_FIELDS.sensitivity.name, headers[0]); } headers = message.getHeader("X-RDF"); if (headers != null && headers.length > 0) { System.err.println("Found X-RDF"); StringBuilder builder = new StringBuilder(); for (String string : headers) { builder.append(string); builder.append("\n"); } try { byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(builder.toString()); String rdf = new String(decoded); Document tempDocument = DocumentHelper.parseText(rdf); Element xrdf = prop.addElement("x-rdf"); xrdf.add(tempDocument.getRootElement()); } catch (Exception e) { System.err.println("Cannot decode X-RDF: " + e.getMessage()); } } try { File old = argument.currentOutputDir; if (config.extractFile) { File newOutDir = new File(argument.currentOutputDir, id); newOutDir.mkdirs(); argument.currentOutputDir = newOutDir; } if (argument.extractKeyword) { skey = handleMessage(message, metadata, prop, id, argument, config); // should have hasAttachment if (prop.hasContent()) { metadata.add(prop); } if (metadata.hasContent()) { root.add(metadata); } ExtractInfo.exportMetadata(keywords, skey, "", config, null); if (keywords.hasContent()) { root.add(keywords); } } else { handleMessage(message, metadata, prop, id, argument, config); // should have hasAttachment if (prop.hasContent()) { metadata.add(prop); } if (metadata.hasContent()) { root.add(metadata); } } argument.currentOutputDir = old; } catch (IOException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); } try { message.getInputStream().close(); } catch (IOException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); } root.addAttribute(EMAIL_FIELDS.status.name, "ok"); } catch (MessagingException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); e.printStackTrace(); String status = "Error during identification"; root.addAttribute(EMAIL_FIELDS.status.name, status); } catch (Exception e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); e.printStackTrace(); String status = "Error during identification"; root.addAttribute(EMAIL_FIELDS.status.name, status); } argument.currentOutputDir = oldDir; return skey; }
From source file:fr.gouv.culture.vitam.eml.PstExtract.java
License:Open Source License
private String extractInfoMessage(PSTMessage email) { if (email instanceof PSTContact) { Element node = extractInfoContact((PSTContact) email); config.addRankId(node);//from ww w . j a va2 s.co m //node.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Address Book"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identifications.add(identity); node.add(identifications); node.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(node); return ""; } else if (email instanceof PSTTask) { Element node = extractInfoTask((PSTTask) email); config.addRankId(node); //node.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Task"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identifications.add(identity); node.add(identifications); node.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(node); return ""; } else if (email instanceof PSTActivity) { Element node = extractInfoActivity((PSTActivity) email); config.addRankId(node); //node.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Activity"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identifications.add(identity); node.add(identifications); node.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(node); return ""; } else if (email instanceof PSTRss) { Element node = extractInfoRss((PSTRss) email); config.addRankId(node); //node.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Rss"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identifications.add(identity); node.add(identifications); node.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(node); return ""; } else if (email instanceof PSTAppointment) { Element node = extractInfoAppointment((PSTAppointment) email); config.addRankId(node); //node.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Appointment"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identifications.add(identity); node.add(identifications); node.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(node); return ""; } Element root = XmlDom.factory.createElement(EMAIL_FIELDS.formatMSG.name); Element keywords = XmlDom.factory.createElement(EMAIL_FIELDS.keywords.name); Element metadata = XmlDom.factory.createElement(EMAIL_FIELDS.metadata.name); String id = config.addRankId(root); //root.addAttribute(EMAIL_FIELDS.rankId.name, id); Element identifications = XmlDom.factory.createElement("identification"); Element identity = XmlDom.factory.createElement("identity"); identity.addAttribute("format", "Microsoft Outlook Email Message"); identity.addAttribute("mime", "application/vnd.ms-outlook"); identity.addAttribute("puid", "x-fmt/430"); identity.addAttribute("extensions", "msg"); identifications.add(identity); root.add(identifications); Element sub = XmlDom.factory.createElement(EMAIL_FIELDS.from.name); addAddress(sub, EMAIL_FIELDS.fromUnit.name, email.getSenderName(), email.getSenderEmailAddress()); metadata.add(sub); int NumberOfRecipients = 0; Element toRecipients = XmlDom.factory.createElement(EMAIL_FIELDS.toRecipients.name); Element ccRecipients = XmlDom.factory.createElement(EMAIL_FIELDS.ccRecipients.name); Element bccRecipients = XmlDom.factory.createElement(EMAIL_FIELDS.bccRecipients.name); try { NumberOfRecipients = email.getNumberOfRecipients(); } catch (PSTException e1) { } catch (IOException e1) { } for (int i = 0; i < NumberOfRecipients; i++) { try { PSTRecipient recipient = email.getRecipient(i); // MAPI_TO = 1; MAPI_CC = 2; MAPI_BCC = 3; Element choose = null; String type = "??"; switch (recipient.getRecipientType()) { case PSTRecipient.MAPI_TO: type = EMAIL_FIELDS.toUnit.name; choose = toRecipients; break; case PSTRecipient.MAPI_CC: type = EMAIL_FIELDS.ccUnit.name; choose = ccRecipients; break; case PSTRecipient.MAPI_BCC: type = EMAIL_FIELDS.bccUnit.name; choose = bccRecipients; break; } if (choose != null) { addAddress(choose, type, recipient.getDisplayName(), recipient.getEmailAddress()); } } catch (PSTException e) { } catch (IOException e) { } } if (toRecipients.hasContent()) { metadata.add(toRecipients); } if (ccRecipients.hasContent()) { metadata.add(ccRecipients); } if (bccRecipients.hasContent()) { metadata.add(bccRecipients); } // get the subject String Subject = email.getSubject(); if (Subject != null) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.subject.name); sub.setText(StringUtils.unescapeHTML(Subject, true, false)); metadata.add(sub); } // Conversation topic This is basically the subject from which Fwd:, Re, etc. Subject = email.getConversationTopic(); if (Subject != null) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.conversationTopic.name); sub.setText(StringUtils.unescapeHTML(Subject, true, false)); metadata.add(sub); } // get the client submit time (sent ?) Date ClientSubmitTime = email.getClientSubmitTime(); if (ClientSubmitTime != null) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.sentDate.name); sub.setText(ClientSubmitTime.toString()); metadata.add(sub); } // Message delivery time Date MessageDeliveryTime = email.getMessageDeliveryTime(); if (MessageDeliveryTime != null) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.receivedDate.name); sub.setText(MessageDeliveryTime.toString()); metadata.add(sub); } // Transport message headers ASCII or Unicode string These contain the SMTP e-mail headers. String TransportMessageHeaders = email.getTransportMessageHeaders(); if (TransportMessageHeaders != null) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.receptionTrace.name); sub.add(XmlDom.factory.createElement(EMAIL_FIELDS.trace.name) .addText(StringUtils.unescapeHTML(TransportMessageHeaders, true, false))); metadata.add(sub); if (TransportMessageHeaders.contains("X-RDF:")) { System.err.println("Found a X-RDF"); int pos = TransportMessageHeaders.indexOf("X-RDF:") + "X-RDF:".length(); while (pos < TransportMessageHeaders.length()) { char test = TransportMessageHeaders.charAt(pos); if (test != ' ' && test != '\r' && test != '\n') { pos++; } else { break; } } int pos2 = TransportMessageHeaders.indexOf(":", pos); while (pos2 > pos) { char test = TransportMessageHeaders.charAt(pos2); if (test != ' ' && test != '\r' && test != '\n') { pos2--; } else { break; } } String xrdf = TransportMessageHeaders.substring(pos, pos2); String rdf = null; try { byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(xrdf); //byte [] decoded = Base64.decode(xrdf); rdf = new String(decoded); System.err.println(rdf); try { Document tempDocument = DocumentHelper.parseText(rdf); Element erdf = sub.addElement("x-rdf"); erdf.add(tempDocument.getRootElement()); } catch (Exception e) { System.err.println("Cannot decode X-RDF: " + e.getMessage()); e.printStackTrace(); Element erdf = sub.addElement("x-rdf"); erdf.addText(rdf); } } catch (Exception e) { System.err.println("Cannot decode X-RDF: " + e.getMessage()); System.err.println(xrdf); e.printStackTrace(); } } TransportMessageHeaders = null; } long internalSize = email.getMessageSize(); if (internalSize > 0) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.emailSize.name); sub.setText(Long.toString(internalSize)); metadata.add(sub); } // Message ID for this email as allocated per rfc2822 String InternetMessageId = email.getInternetMessageId(); if (InternetMessageId != null) { InternetMessageId = StringUtils.removeChevron(StringUtils.unescapeHTML(InternetMessageId, true, false)) .trim(); if (InternetMessageId.length() > 1) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.messageId.name); sub.setText(InternetMessageId); metadata.add(sub); } } // In-Reply-To String InReplyToId = email.getInReplyToId(); if (InReplyToId != null) { InReplyToId = StringUtils.removeChevron(StringUtils.unescapeHTML(InReplyToId, true, false)).trim(); if (InReplyToId.length() > 1) { sub = XmlDom.factory.createElement(EMAIL_FIELDS.inReplyTo.name); sub.setText(InReplyToId); if (InternetMessageId != null && InternetMessageId.length() > 1) { String old = EmlExtract.filEmls.get(InReplyToId); if (old == null) { old = InternetMessageId; } else { old += "," + InternetMessageId; } EmlExtract.filEmls.put(InReplyToId, old); } metadata.add(sub); } InReplyToId = null; InternetMessageId = null; } sub = XmlDom.factory.createElement(EMAIL_FIELDS.properties.name); // is the action flag for this item "forward"? boolean Forwarded = email.hasForwarded(); sub.addAttribute(EMAIL_FIELDS.propForwarded.name, Boolean.toString(Forwarded)); // is the action flag for this item "replied"? boolean Replied = email.hasReplied(); sub.addAttribute(EMAIL_FIELDS.propReplied.name, Boolean.toString(Replied)); // boolean Read = email.isRead(); sub.addAttribute(EMAIL_FIELDS.propRead.name, Boolean.toString(Read)); // boolean Unsent = email.isUnsent(); sub.addAttribute(EMAIL_FIELDS.propUnsent.name, Boolean.toString(Unsent)); // Recipient Reassignment Prohibited Boolean 0 = false 0 != true boolean RecipientReassignmentProhibited = email.getRecipientReassignmentProhibited(); sub.addAttribute(EMAIL_FIELDS.propRecipientReassignmentProhibited.name, Boolean.toString(RecipientReassignmentProhibited)); // get the importance of the email // PSTMessage.IMPORTANCE_LOW + PSTMessage.IMPORTANCE_NORMAL + PSTMessage.IMPORTANCE_HIGH int Importance = email.getImportance(); String imp = "??"; switch (Importance) { case PSTMessage.IMPORTANCE_LOW: imp = "LOW"; break; case PSTMessage.IMPORTANCE_NORMAL: imp = "NORMAL"; break; case PSTMessage.IMPORTANCE_HIGH: imp = "HIGH"; break; } sub.addAttribute(EMAIL_FIELDS.importance.name, imp); // Priority Integer 32-bit signed -1 = NonUrgent 0 = Normal 1 = Urgent int Priority = email.getPriority(); switch (Priority) { case -1: imp = "LOW"; break; case 0: imp = "NORMAL"; break; case 1: imp = "HIGH"; break; default: imp = "LEV" + Priority; } sub.addAttribute(EMAIL_FIELDS.priority.name, imp); // Sensitivity Integer 32-bit signed sender's opinion of the sensitivity of an email 0 = // None 1 = Personal 2 = Private 3 = Company Confidential int Sensitivity = email.getSensitivity(); String sens = "??"; switch (Sensitivity) { case 0: sens = "None"; break; case 1: sens = "Personal"; break; case 2: sens = "Private"; break; case 3: sens = "Confidential"; break; } sub.addAttribute(EMAIL_FIELDS.sensitivity.name, sens); // boolean Attachments = email.hasAttachments(); sub.addAttribute(EMAIL_FIELDS.hasAttachment.name, Boolean.toString(Attachments)); metadata.add(sub); String result = ""; Element identification = null; if (Attachments) { File oldPath = curPath; if (config.extractFile) { File newDir = new File(curPath, id); newDir.mkdirs(); curPath = newDir; argument.currentOutputDir = curPath; } identification = XmlDom.factory.createElement(EMAIL_FIELDS.attachments.name); // get the number of attachments for this message int NumberOfAttachments = email.getNumberOfAttachments(); identification.addAttribute(EMAIL_FIELDS.attNumber.name, Integer.toString(NumberOfAttachments)); // get a specific attachment from this email. for (int attachmentNumber = 0; attachmentNumber < NumberOfAttachments; attachmentNumber++) { try { PSTAttachment attachment = email.getAttachment(attachmentNumber); if (argument.extractKeyword) { result += " " + extractInfoAttachment(attachment, identification); } else { extractInfoAttachment(attachment, identification); } } catch (PSTException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } curPath = oldPath; argument.currentOutputDir = curPath; } // Plain text e-mail body String body = ""; if (argument.extractKeyword || config.extractFile) { body = email.getBody(); boolean isTxt = true; boolean isHttp = false; if (body == null || body.isEmpty()) { isTxt = false; body = email.getBodyHTML(); isHttp = true; if (body == null || body.isEmpty()) { isHttp = false; try { body = email.getRTFBody(); } catch (PSTException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } if (body != null && !body.isEmpty()) { if (config.extractFile) { // XXX FIXME could saved email from HTML Body (clearer) if possible // use curRank in name, and attachment will be under directory named // add currank in field File newDir = new File(curPath, id); newDir.mkdirs(); File oldPath = curPath; curPath = newDir; argument.currentOutputDir = curPath; String filenamebody = InternetMessageId; if (filenamebody == null || !filenamebody.isEmpty()) { filenamebody = id; } String html = null; if (isHttp) { html = body; } String rtf = null; if (!isTxt && !isHttp) { rtf = body; } if (isTxt) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".txt")); byte[] bb = body.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } html = email.getBodyHTML(); } if (html != null && !html.isEmpty()) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".html")); byte[] bb = html.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } html = null; } if (isTxt || isHttp) { try { rtf = email.getRTFBody(); } catch (PSTException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } if (rtf != null && !rtf.isEmpty()) { FileOutputStream output = null; try { output = new FileOutputStream(new File(newDir, filenamebody + ".rtf")); byte[] bb = rtf.getBytes(); output.write(bb, 0, bb.length); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (output != null) { try { output.close(); } catch (IOException e) { } } } rtf = null; } curPath = oldPath; argument.currentOutputDir = curPath; } } } if (metadata.hasContent()) { root.add(metadata); } if (identification != null && identification.hasContent()) { root.add(identification); } if (argument.extractKeyword) { result = body + " " + result; body = null; ExtractInfo.exportMetadata(keywords, result, "", config, null); if (keywords.hasContent()) { root.add(keywords); } } root.addAttribute(EMAIL_FIELDS.status.name, "ok"); currentRoot.add(root); return result; }
From source file:fuliao.fuliaozhijia.data.UserData.java
License:Apache License
private static Map<String, String> deailXml(String xml) throws DocumentException { Map<String, String> mesage = Maps.newHashMap(); Document doc = DocumentHelper.parseText(xml); // XML Element root = doc.getRootElement(); // ? @SuppressWarnings("unchecked") List<Element> elementList = root.elements(); for (Element e : elementList) { mesage.put(e.getName(), e.getText()); }//from ww w. j a v a 2 s.c o m return mesage; }