List of usage examples for org.dom4j Element add
void add(Namespace namespace);
Namespace
to this element. From source file:eu.sisob.uma.crawler.ResearcherXMLFileSplitter.java
License:Open Source License
/** * Reader folder of one researcher and takes the uri of clean file for to make infoblock. * @param elementResearcher/*www . j a va2s.c om*/ * @param path * @param sInstitutionName * @param sWebAddress * @param sUnitOfAssessment_Description * @param sResearchGroupDescription * @param sResearchName * @param sResearchInitials * @param sStaffIndentifier */ @Override protected boolean actionsInInstitutionNode(org.dom4j.Element elementInstitution, String path, String sInstitutionName, String sWebAddress) { Document split_doc = DocumentHelper.createDocument(); Element split_root = split_doc.addElement("root"); split_root.add((Element) elementInstitution.clone()); File split_file = new File(work_dir, prefix_file + sInstitutionName.replaceAll("\\W+", "").toLowerCase()); try { FileUtils.write(split_file, split_doc.asXML(), "UTF-8"); } catch (IOException ex) { Logger.getLogger("roor").error("Cannot create xml file for " + split_file.getPath()); } return true; }
From source file:eu.sisob.uma.NPL.Researchers.GateResearcherAnnCollector.java
License:Open Source License
/** * * @param doc/*from ww w . j a va 2s . c om*/ * @param aoData */ @Override public void collect(Object doc, MiddleData aoData) { int n_expressions = 0; org.dom4j.Element eOut = org.dom4j.DocumentFactory.getInstance().createElement("blockinfo"); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ANNOTATIONRECOLLECTING, aoData.getId_annotationrecollecting()); // aoData[MiddleData.I_INDEX_DATA_TYPE].toString()); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ENTITY_ATT, aoData.getId_entity()); // aoData[MiddleData.I_INDEX_DATA_ID].toString()); gate.Document docGate = (gate.Document) doc; HashMap<String, String> extra_data = null; try { extra_data = (HashMap<String, String>) aoData.getData_extra(); } catch (Exception ex) { extra_data = null; } boolean collect_expressions = false; List<String> expression_to_collect = new ArrayList<String>(); if (extra_data != null) { if (extra_data.containsKey(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE)) { String block_type = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE); if (block_type.equals(CVBlocks.CVBLOCK_REST.toString())) { expression_to_collect.add("ProfessionalActivityCurrent"); expression_to_collect.add("ProfessionalActivityNoCurrent"); expression_to_collect.add("AccreditedUniversityStudiesOtherPostGrade"); expression_to_collect.add("AccreditedUniversityStudiesDegree"); expression_to_collect.add("AccreditedUniversityStudiesPhDStudies"); collect_expressions = true; } else if (block_type.equals(CVBlocks.CVBLOCK_PROFESSIONAL_ACTIVITY.toString())) { expression_to_collect.add("ProfessionalActivityCurrent"); expression_to_collect.add("ProfessionalActivityNoCurrent"); collect_expressions = true; } else if (block_type.equals(CVBlocks.CVBLOCK_UNIVERSITY_STUDIES.toString())) { expression_to_collect.add("AccreditedUniversityStudiesOtherPostGrade"); expression_to_collect.add("AccreditedUniversityStudiesDegree"); expression_to_collect.add("AccreditedUniversityStudiesPhDStudies"); collect_expressions = true; } } else { expression_to_collect.add("ProfessionalActivityCurrent"); expression_to_collect.add("ProfessionalActivityNoCurrent"); expression_to_collect.add("AccreditedUniversityStudiesOtherPostGrade"); expression_to_collect.add("AccreditedUniversityStudiesDegree"); expression_to_collect.add("AccreditedUniversityStudiesPhDStudies"); collect_expressions = true; } } else { expression_to_collect.add("ProfessionalActivityCurrent"); expression_to_collect.add("ProfessionalActivityNoCurrent"); expression_to_collect.add("AccreditedUniversityStudiesOtherPostGrade"); expression_to_collect.add("AccreditedUniversityStudiesDegree"); expression_to_collect.add("AccreditedUniversityStudiesPhDStudies"); collect_expressions = true; } eOut.addAttribute("URL", docGate.getSourceUrl() != null ? docGate.getSourceUrl().toString() : ""); AnnotationSet annoset = docGate.getAnnotations(); if (collect_expressions) { List<Annotation> anns = new ArrayList<Annotation>(); for (String exp : expression_to_collect) anns.addAll(annoset.get(exp)); //Collections.sort(anns, new OffsetBeginEndComparator()); //need to bee order if (anns.size() > 0) { for (Annotation an : anns) { String cvnItemName = an.getType(); org.dom4j.Element eAux = new org.dom4j.DocumentFactory().createElement(cvnItemName); // eAux.addElement("Domain").addText(gate.Utils.stringFor(docGate, // an.getStartNode().getOffset() > 100 ? an.getStartNode().getOffset() - 100 : an.getStartNode().getOffset(), // an.getEndNode().getOffset() + 100 < docGate.getContent().size() ? an.getEndNode().getOffset() + 100 : an.getEndNode().getOffset())); eAux.addAttribute("action_mode", "add"); eAux.addElement("Content").addText(gate.Utils.stringFor(docGate, an)); FeatureMap fmap = an.getFeatures(); for (Object key : fmap.keySet()) { String fieldName = key.toString(); eAux.addElement(fieldName).addText(fmap.get(key).toString()); } eOut.add(eAux); } } n_expressions += eOut.elements().size(); } collect_expressions = false; if (extra_data != null) { if (extra_data.containsKey(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE)) { String block_type = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE); if (block_type.equals(CVBlocks.CVBLOCK_REST.toString())) { collect_expressions = true; } else if (block_type.equals(CVBlocks.CVBLOCK_PERSONAL.toString())) { collect_expressions = true; } } else { collect_expressions = true; } } else { collect_expressions = true; } if (collect_expressions) { ArrayList<Annotation> anns = new ArrayList<Annotation>(); anns.addAll(annoset.get("AgentIdentification")); if (anns.size() > 0) { String lastname = ""; String initials = ""; String name = ""; String firstname = ""; if (extra_data != null) { lastname = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_LASTNAME); initials = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_INITIALS); name = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_NAME); firstname = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_FIRSTNAME); } if (firstname.equals("")) { firstname = initials; } for (Annotation an : anns) { String cvnItemName = an.getType(); org.dom4j.Element eAux = new org.dom4j.DocumentFactory().createElement(cvnItemName); // eAux.addElement("Domain").addText(gate.Utils.stringFor(docGate, // an.getStartNode().getOffset() > 100 ? an.getStartNode().getOffset() - 100 : an.getStartNode().getOffset(), // an.getEndNode().getOffset() + 100 < docGate.getContent().size() ? an.getEndNode().getOffset() + 100 : an.getEndNode().getOffset())); eAux.addAttribute("action_mode", "overwrite"); eAux.addAttribute("extra_gets", "getInformation"); eAux.addElement("Content").addText(gate.Utils.stringFor(docGate, an)); FeatureMap fmap = an.getFeatures(); for (Object key : fmap.keySet()) { String fieldName = key.toString(); eAux.addElement(fieldName).addText(fmap.get(key).toString()); } eAux.addElement(CVItemExtracted.AgentIdentification.GivenName).addText(firstname); eAux.addElement(CVItemExtracted.AgentIdentification.FirstFamilyName).addText(lastname); eOut.add(eAux); } } n_expressions += eOut.elements().size(); } ProjectLogger.LOGGER.info(String.format("%3d expressions in %s : ", n_expressions, docGate.getName())); // + docXML.asXML() if (eOut == null) ProjectLogger.LOGGER.info("Output is null"); // + docXML.asXML() aoData.setData_out(eOut); if (aoData.getVerbose()) { File dest_dir = aoData.getVerboseDir(); File path = null; String fileName = ""; if (extra_data != null && extra_data.containsKey(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_DOCUMENT_NAME)) { fileName = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_DOCUMENT_NAME); } else { URL url = docGate.getSourceUrl(); try { path = new File(url.toURI()); fileName = path.getName(); } catch (Exception e) { String filename; try { filename = URLEncoder.encode(url.toString(), "UTF-8") + ".html"; } catch (Exception ex) { filename = docGate.getName() + ".html"; } path = new File(filename); fileName = path.getName(); } if (!fileName.equals("")) { fileName = fileName.substring(0, fileName.lastIndexOf(".")); } } File file_result = new File(dest_dir, fileName + "_verbose.html"); try { writeResultsInHTMLFile(docGate, file_result); } catch (Exception ex) { Logger.getRootLogger().error("Error writing verbose results. " + ex.toString()); } } }
From source file:eu.sisob.uma.NPL.Researchers.GateResearcherAnnCollector_Deprecated.java
License:Open Source License
/** * * @param doc//from w ww . ja va 2s .co m * @param aoData */ @Override public void collect(Object doc, MiddleData aoData) { int n_expressions = 0; org.dom4j.Element eOut = org.dom4j.DocumentFactory.getInstance().createElement("blockinfo"); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ANNOTATIONRECOLLECTING, aoData.getId_annotationrecollecting()); // aoData[MiddleData.I_INDEX_DATA_TYPE].toString()); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ENTITY_ATT, aoData.getId_entity()); // aoData[MiddleData.I_INDEX_DATA_ID].toString()); gate.Document docGate = (gate.Document) doc; HashMap<String, String> extra_data = null; try { extra_data = (HashMap<String, String>) aoData.getData_extra(); } catch (Exception ex) { extra_data = null; } boolean collect_expressions = true; if (extra_data != null) { if (extra_data.containsKey(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE)) { String block_type = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_BLOCK_TYPE); if (block_type.equals(CVBlocks.CVBLOCK_OTHERS)) { collect_expressions = false; } } else { } } eOut.addAttribute("URL", docGate.getSourceUrl() != null ? docGate.getSourceUrl().toString() : ""); if (collect_expressions) { AnnotationSet annoset = docGate.getAnnotations(); List<Annotation> anns = new ArrayList<Annotation>(); //Expressions anns.addAll(annoset.get("ProfessionalActivityCurrent")); anns.addAll(annoset.get("ProfessionalActivityNoCurrent")); anns.addAll(annoset.get("AccreditedUniversityStudiesOtherPostGrade")); anns.addAll(annoset.get("AccreditedUniversityStudiesDegree")); anns.addAll(annoset.get("AccreditedUniversityStudiesPhDStudies")); //Collections.sort(anns, new OffsetBeginEndComparator()); //need to bee order if (anns.size() > 0) { for (Annotation an : anns) { String cvnItemName = an.getType(); org.dom4j.Element eAux = new org.dom4j.DocumentFactory().createElement(cvnItemName); // eAux.addElement("Domain").addText(gate.Utils.stringFor(docGate, // an.getStartNode().getOffset() > 100 ? an.getStartNode().getOffset() - 100 : an.getStartNode().getOffset(), // an.getEndNode().getOffset() + 100 < docGate.getContent().size() ? an.getEndNode().getOffset() + 100 : an.getEndNode().getOffset())); eAux.addAttribute("action_mode", "add"); eAux.addElement("Content").addText(gate.Utils.stringFor(docGate, an)); FeatureMap fmap = an.getFeatures(); for (Object key : fmap.keySet()) { String fieldName = key.toString(); eAux.addElement(fieldName).addText(fmap.get(key).toString()); } eOut.add(eAux); } } n_expressions += eOut.elements().size(); anns = new ArrayList<Annotation>(); anns.addAll(annoset.get("AgentIdentification")); if (anns.size() > 0) { String lastname = ""; String initials = ""; String name = ""; String firstname = ""; if (extra_data != null) { lastname = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_LASTNAME); initials = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_INITIALS); name = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_NAME); firstname = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_FIRSTNAME); } if (firstname.equals("")) { firstname = initials; } for (Annotation an : anns) { String cvnItemName = an.getType(); org.dom4j.Element eAux = new org.dom4j.DocumentFactory().createElement(cvnItemName); // eAux.addElement("Domain").addText(gate.Utils.stringFor(docGate, // an.getStartNode().getOffset() > 100 ? an.getStartNode().getOffset() - 100 : an.getStartNode().getOffset(), // an.getEndNode().getOffset() + 100 < docGate.getContent().size() ? an.getEndNode().getOffset() + 100 : an.getEndNode().getOffset())); eAux.addAttribute("action_mode", "overwrite"); eAux.addAttribute("extra_gets", "getInformation"); eAux.addElement("Content").addText(gate.Utils.stringFor(docGate, an)); FeatureMap fmap = an.getFeatures(); for (Object key : fmap.keySet()) { String fieldName = key.toString(); eAux.addElement(fieldName).addText(fmap.get(key).toString()); } eAux.addElement(CVItemExtracted.AgentIdentification.GivenName).addText(firstname); eAux.addElement(CVItemExtracted.AgentIdentification.FirstFamilyName).addText(lastname); eOut.add(eAux); } } n_expressions += eOut.elements().size(); ProjectLogger.LOGGER .info(String.format("%3d expressions in %s : ", n_expressions, docGate.getSourceUrl())); // + docXML.asXML() if (eOut == null) ProjectLogger.LOGGER.info("Output is null"); // + docXML.asXML() aoData.setData_out(eOut); } if (aoData.getVerbose()) { File dest_dir = aoData.getVerboseDir(); File path = null; String fileName = ""; if (extra_data != null && extra_data.containsKey(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_DOCUMENT_NAME)) { fileName = extra_data.get(DataExchangeLiterals.MIDDLE_ELEMENT_XML_EXTRADATA_DOCUMENT_NAME); } else { URL url = docGate.getSourceUrl(); try { path = new File(url.toURI()); fileName = path.getName(); } catch (Exception e) { String filename; try { filename = URLEncoder.encode(url.toString(), "UTF-8") + ".html"; } catch (Exception ex) { filename = docGate.getName() + ".html"; } path = new File(filename); fileName = path.getName(); } if (!fileName.equals("")) { fileName = fileName.substring(0, fileName.lastIndexOf(".")); } } File file_result = new File(dest_dir, fileName + "_verbose.html"); try { writeResultsInHTMLFile(docGate, file_result); } catch (Exception ex) { Logger.getRootLogger().error("Error writing verbose results. " + ex.toString()); } } }
From source file:eu.sisob.uma.NPL.Researchers.TextMiningParserGateDetector.java
License:Open Source License
/** * Define annotator collector acoording index * I_TYPE_CONTENT_ENTIRE_WEB_PAGE => Extract info from CV and personal web page of researchers * @param lstAnnColl_ list of annotator collector *///from ww w . ja v a2s.c o m @Override protected void iniAnnotatorCollectors(TreeMap lstAnnColl_) { AnnotatorCollector a = null; a = new AnnotatorCollector(DataExchangeLiterals.ID_TEXTMININGPARSER_GATERESEARCHER_DEFAULTANNREC) { @Override public void collect(Object doc, MiddleData aoData) { org.dom4j.Element eOut = org.dom4j.DocumentFactory.getInstance().createElement("blockinfo"); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ANNOTATIONRECOLLECTING, aoData.getId_annotationrecollecting()); // aoData[MiddleData.I_INDEX_DATA_TYPE].toString()); eOut.addAttribute(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ENTITY_ATT, aoData.getId_entity()); // aoData[MiddleData.I_INDEX_DATA_ID].toString()); gate.Document docGate = (gate.Document) doc; AnnotationSet annoset = docGate.getAnnotations(); List<Annotation> anns = new ArrayList<Annotation>(); //Expressions //anns.addAll(annoset.get("JobTitleTest")); //anns.addAll(annoset.get("DegreeTest")); anns.addAll(annoset.get("OrgTest")); //Collections.sort(anns, new OffsetBeginEndComparator()); //need to bee order if (anns.size() > 0) { for (Annotation an : anns) { String cvnItemName = an.getType(); org.dom4j.Element eAux = new org.dom4j.DocumentFactory().createElement(cvnItemName); //eAux.addElement("Domain").addText(gate.Utils.stringFor(docGate, // an.getStartNode().getOffset() > 100 ? an.getStartNode().getOffset() - 100 : an.getStartNode().getOffset(), // an.getEndNode().getOffset() + 100 < docGate.getContent().size() ? an.getEndNode().getOffset() + 100 : an.getEndNode().getOffset())); eAux.addText(gate.Utils.stringFor(docGate, an)); eOut.add(eAux); } } Logger.getLogger("MyLog").info(String.format("%3d expressions in %s : ", eOut != null ? eOut.elements().size() : 0, docGate.getSourceUrl())); // + docXML.asXML() aoData.setData_out(eOut); } }; lstAnnColl_.put(a.type, a); }
From source file:fr.gouv.culture.vitam.command.VitamCommand.java
License:Open Source License
public static void computeDigest() { XMLWriter writer = null;/*from ww w . j av a 2 s . c o m*/ try { writer = new XMLWriter(outputStream, StaticValues.defaultOutputFormat); } catch (UnsupportedEncodingException e1) { System.err.println(StaticValues.LBL.error_writer.get() + ": " + e1.toString()); return; } File basedir = new File(checkDigest); List<File> files; try { files = DroidHandler.matchedFiled(new File[] { basedir }, extensions, StaticValues.config.argument.recursive); } catch (CommandExecutionException e1) { System.err.println(StaticValues.LBL.error_error.get() + e1.toString()); return; } System.out.println("Digest..."); Element root = null; VitamResult vitamResult = new VitamResult(); if (basedir.isFile()) { basedir = basedir.getParentFile(); } if (StaticValues.config.argument.outputModel == VitamOutputModel.OneXML) { root = XmlDom.factory.createElement("digests"); root.addAttribute("source", basedir.getAbsolutePath()); vitamResult.unique = XmlDom.factory.createDocument(root); } int currank = 0; int error = 0; for (File file : files) { currank++; String shortname; shortname = StaticValues.getSubPath(file, basedir); FileInputStream inputstream; try { inputstream = new FileInputStream(file); } catch (FileNotFoundException e) { System.err.println(StaticValues.LBL.error_computedigest.get() + ": " + shortname); continue; } String[] shas = DigestCompute.computeDigest(inputstream, StaticValues.config.argument); //SEDA type since already configured Element result = XmlDom.factory.createElement(StaticValues.config.DOCUMENT_FIELD); Element attachment = XmlDom.factory.createElement(StaticValues.config.ATTACHMENT_FIELD); attachment.addAttribute(StaticValues.config.FILENAME_ATTRIBUTE.substring(1), shortname); result.add(attachment); if (shas[0] != null) { Element integrity = XmlDom.factory.createElement(StaticValues.config.INTEGRITY_FIELD); integrity.addAttribute(StaticValues.config.ALGORITHME_ATTRIBUTE.substring(1), StaticValues.XML_SHA1); integrity.setText(shas[0]); result.add(integrity); } if (shas[1] != null) { Element integrity = XmlDom.factory.createElement(StaticValues.config.INTEGRITY_FIELD); integrity.addAttribute(StaticValues.config.ALGORITHME_ATTRIBUTE.substring(1), StaticValues.XML_SHA256); integrity.setText(shas[1]); result.add(integrity); } if (shas[2] != null) { Element integrity = XmlDom.factory.createElement(StaticValues.config.INTEGRITY_FIELD); integrity.addAttribute(StaticValues.config.ALGORITHME_ATTRIBUTE.substring(1), StaticValues.XML_SHA512); integrity.setText(shas[2]); result.add(integrity); } if ((shas[0] == null && StaticValues.config.argument.sha1) || (shas[1] == null && StaticValues.config.argument.sha256) || (shas[2] == null && StaticValues.config.argument.sha512)) { result.addAttribute("status", "error"); error++; } else { result.addAttribute("status", "ok"); } XmlDom.addDate(StaticValues.config.argument, StaticValues.config, result); if (root != null) { root.add(result); } else { // multiple root = XmlDom.factory.createElement("digests"); root.addAttribute("source", basedir.getAbsolutePath()); root.add(result); try { writer.write(root); } catch (IOException e) { System.err.println(StaticValues.LBL.error_error.get() + e.toString()); } root = null; } } if (root != null) { if (error == 0) { root.addAttribute("status", "ok"); } else { root.addAttribute("status", "error on " + error + " / " + currank + " file checks"); } XmlDom.addDate(StaticValues.config.argument, StaticValues.config, root); try { writer.write(vitamResult.unique); } catch (IOException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } } System.out.println(StaticValues.LBL.action_digest.get() + " [ " + currank + (error > 0 ? " (" + StaticValues.LBL.error_error.get() + error + " ) " : "") + " ]"); }
From source file:fr.gouv.culture.vitam.command.VitamCommand.java
License:Open Source License
private static Element addPdfaElement(Element root, Element pdfa, File basedir, File baseoutdir, boolean error, String serror, String puid, VitamResult vitamResult) { XmlDom.addDate(StaticValues.config.argument, StaticValues.config, pdfa); if (puid != null) { pdfa.addAttribute("puid", puid); }/* w w w.j a va2 s .c o m*/ if (root != null) { root.add(pdfa); } else { // multiple root = XmlDom.factory.createElement("transform"); root.addAttribute("source", basedir.getAbsolutePath()); root.addAttribute("target", baseoutdir.getAbsolutePath()); Document document = XmlDom.factory.createDocument(root); root.add(pdfa); if (error) { root.addAttribute("status", serror); } else { root.addAttribute("status", "ok"); } vitamResult.multiples.add(document); } return root; }
From source file:fr.gouv.culture.vitam.command.VitamCommand.java
License:Open Source License
public static void convertPdfa() { XMLWriter writer = null;/*from ww w . j ava 2 s . com*/ try { writer = new XMLWriter(outputStream, StaticValues.defaultOutputFormat); } catch (UnsupportedEncodingException e1) { System.err.println(StaticValues.LBL.error_writer.get() + ": " + e1.toString()); return; } File basedir = new File(fromPdfA); List<File> files; try { files = DroidHandler.matchedFiled(new File[] { basedir }, extensions, StaticValues.config.argument.recursive); } catch (CommandExecutionException e1) { System.err.println(StaticValues.LBL.error_error.get() + e1.toString()); return; } if (basedir.isFile()) { basedir = basedir.getParentFile(); } File baseoutdir = new File(toPdfA); if (!baseoutdir.exists()) { baseoutdir.mkdirs(); } if (baseoutdir.isFile()) { baseoutdir = baseoutdir.getParentFile(); } int errorcpt = 0; boolean checkDroid = false; try { StaticValues.config.initDroid(); checkDroid = true; } catch (CommandExecutionException e) { System.err.println(StaticValues.LBL.error_initdroid.get() + e.toString()); } System.out.println("\nTransform PDF/A-1B\n"); Element root = null; Element temp = null; VitamResult vitamResult = new VitamResult(); if (StaticValues.config.argument.outputModel == VitamOutputModel.OneXML) { root = XmlDom.factory.createElement("transform"); root.addAttribute("source", basedir.getAbsolutePath()); root.addAttribute("target", baseoutdir.getAbsolutePath()); vitamResult.unique = XmlDom.factory.createDocument(root); } else { // force multiple vitamResult.multiples = new ArrayList<Document>(); } for (File file : files) { String basename = file.getName(); File rootdir; String subpath = null; if (file.getParentFile().equals(basedir)) { rootdir = basedir; subpath = File.separator; } else { rootdir = file.getParentFile(); subpath = rootdir.getAbsolutePath().replace(basedir.getAbsolutePath(), "") + File.separator; } String fullname = subpath + basename; String puid = null; if (checkDroid) { try { List<DroidFileFormat> list = StaticValues.config.droidHandler.checkFileFormat(file, StaticValues.config.argument); if (list == null || list.isEmpty()) { System.err.println("Ignore: " + fullname); Element pdfa = XmlDom.factory.createElement("convert"); Element newElt = XmlDom.factory.createElement("file"); newElt.addAttribute("filename", fullname); pdfa.add(newElt); addPdfaElement(root, pdfa, basedir, baseoutdir, true, "Error: filetype not found", null, vitamResult); errorcpt++; continue; } DroidFileFormat type = list.get(0); puid = type.getPUID(); if (puid.startsWith(StaticValues.FORMAT_XFMT) || puid.equals("fmt/411")) { // x-fmt or RAR System.err.println("Ignore: " + fullname + " " + puid); Element pdfa = XmlDom.factory.createElement("convert"); Element newElt = XmlDom.factory.createElement("file"); newElt.addAttribute("filename", fullname); pdfa.add(newElt); addPdfaElement(root, pdfa, basedir, baseoutdir, true, "Error: filetype not allowed", puid, vitamResult); errorcpt++; continue; } } catch (CommandExecutionException e) { // ignore } } System.out.println("PDF/A-1B convertion... " + fullname); long start = System.currentTimeMillis(); Element pdfa = PdfaConverter.convertPdfA(subpath, basename, basedir, baseoutdir, StaticValues.config); long end = System.currentTimeMillis(); boolean error = false; if (pdfa.selectSingleNode(".[@status='ok']") == null) { error = true; errorcpt++; } if (error) { System.err.println(StaticValues.LBL.error_pdfa.get() + " PDF/A-1B KO: " + fullname + " " + ((end - start) * 1024 / file.length()) + " ms/KB " + (end - start) + " ms " + "\n"); } else { System.out.println("PDF/A-1B OK: " + fullname + " " + ((end - start) * 1024 / file.length()) + " ms/KB " + (end - start) + " ms " + "\n"); } temp = addPdfaElement(root, pdfa, basedir, baseoutdir, error, "error", puid, vitamResult); try { Thread.sleep(500); } catch (InterruptedException e) { } } if (root != null) { XmlDom.addDate(StaticValues.config.argument, StaticValues.config, root); if (errorcpt > 0) { root.addAttribute("status", "error found"); } else { root.addAttribute("status", "ok"); } try { writer.write(vitamResult.unique); } catch (IOException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } } else { XmlDom.addDate(StaticValues.config.argument, StaticValues.config, temp); try { writer.write(temp); } catch (IOException e) { } } if (errorcpt < files.size()) { System.out.println(StaticValues.LBL.action_pdfa.get() + " [ " + files.size() + (errorcpt > 0 ? " (" + StaticValues.LBL.error_error.get() + errorcpt + " )" : "") + " ]"); } else { System.err.println(StaticValues.LBL.error_pdfa.get() + " [ " + StaticValues.LBL.error_error.get() + errorcpt + " ]"); } }
From source file:fr.gouv.culture.vitam.command.VitamCommand.java
License:Open Source License
public static void checkFilesType() { File fic = new File(FILEarg); if (!fic.exists()) { System.err.println(StaticValues.LBL.error_filenotfile.get() + ": " + FILEarg); return;//from w w w .j a v a 2s.c o m } else { System.out.println("\n" + StaticValues.LBL.tools_dir_format_output.get() + "\n"); Document global = null; Element root = null; XMLWriter writer = null; try { writer = new XMLWriter(outputStream, StaticValues.defaultOutputFormat); } catch (UnsupportedEncodingException e1) { System.err.println(StaticValues.LBL.error_writer.get() + ": " + e1.toString()); return; } if (StaticValues.config.argument.outputModel == VitamOutputModel.OneXML) { root = XmlDom.factory.createElement("checkfiles"); root.addAttribute("source", FILEarg); global = XmlDom.factory.createDocument(root); EmlExtract.filEmls.clear(); } if (showFormat) { if (StaticValues.config.droidHandler == null && StaticValues.config.exif == null && StaticValues.config.jhove == null) { System.err.println(StaticValues.LBL.error_initfits.get()); return; } try { List<File> files = DroidHandler.matchedFiled(new File[] { fic }, extensions, StaticValues.config.argument.recursive); for (File file : files) { String shortname; if (fic.isDirectory()) { shortname = StaticValues.getSubPath(file, fic); } else { shortname = FILEarg; } Element result = Commands.showFormat(shortname, null, null, file, StaticValues.config, StaticValues.config.argument); XmlDom.addDate(StaticValues.config.argument, StaticValues.config, result); if (root != null) { root.add(result); } else { writer.write(result); System.out.println("\n========================================================"); } } } catch (CommandExecutionException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); e.printStackTrace(); } catch (IOException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } } else { if (StaticValues.config.droidHandler == null) { System.err.println(StaticValues.LBL.error_initdroid.get()); return; } if (root != null) { Element newElt = XmlDom.factory.createElement("toolsversion"); if (StaticValues.config.droidHandler != null) { newElt.addAttribute("pronom", StaticValues.config.droidHandler.getVersionSignature()); } if (StaticValues.config.droidHandler != null) { newElt.addAttribute("droid", "6.1"); } root.add(newElt); } List<DroidFileFormat> list; try { VitamArgument argument = new VitamArgument(StaticValues.config.argument.archive, StaticValues.config.argument.recursive, true, true, true, StaticValues.config.argument.outputModel, StaticValues.config.argument.checkSubFormat, StaticValues.config.argument.extractKeyword); List<File> files = DroidHandler.matchedFiled(new File[] { fic }, extensions, argument.recursive); list = StaticValues.config.droidHandler.checkFilesFormat(files, argument, null); String pathBeforeArg = fic.getCanonicalPath(); pathBeforeArg = pathBeforeArg.substring(0, pathBeforeArg.indexOf(FILEarg)); for (DroidFileFormat droidFileFormat : list) { Element fileformat = droidFileFormat.toElement(true); Attribute filename = fileformat.attribute("filename"); if (filename != null) { String value = filename.getText(); filename.setText(value.replace(pathBeforeArg, "")); } XmlDom.addDate(StaticValues.config.argument, StaticValues.config, fileformat); if (root != null) { root.add(fileformat); } else { writer.write(fileformat); System.out.println("\n========================================================"); } } } catch (CommandExecutionException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); e.printStackTrace(); } catch (UnsupportedEncodingException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } catch (IOException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } } if (global != null) { XmlDom.addDate(StaticValues.config.argument, StaticValues.config, root); if (!EmlExtract.filEmls.isEmpty()) { Element sortEml = XmlDom.factory.createElement("emlsort"); for (String parent : EmlExtract.filEmls.keySet()) { Element eparent = XmlDom.factory.createElement("parent"); String fil = EmlExtract.filEmls.get(parent); eparent.addAttribute("messageId", parent); String[] fils = fil.split(","); for (String mesg : fils) { if (mesg != null && mesg.length() > 1) { Element elt = XmlDom.factory.createElement("descendant"); elt.addAttribute("messageId", mesg); eparent.add(elt); } } sortEml.add(eparent); } root.add(sortEml); } try { writer.write(global); } catch (IOException e) { System.err.println(StaticValues.LBL.error_analysis.get() + e); } } } }
From source file:fr.gouv.culture.vitam.database.DbSchema.java
License:Open Source License
public Element getElement(boolean recursive, boolean data) { DocumentFactory factory = DocumentFactory.getInstance(); Element schema = factory.createElement(SCHEMA_FIELD); schema.addAttribute(NAME_ATTRIBUTE, name); schema.addAttribute(DESCRIPTION_ATTRIBUTE, description); if (identifier != null) { Element eltidentifier = factory.createElement(IDENTIFIER_FIELD); eltidentifier.addAttribute(TYPE_ATTRIBUTE, identifier.type.name()); eltidentifier.addAttribute(DATAFILE_ATTRIBUTE, identifier.refDataFile); switch (identifier.type) { case CSVTYPE: eltidentifier.addAttribute(SEPARATOR_ATTRIBUTE, identifier.separator); break; case MULTIPLETYPE: { Element poslistm = factory.createElement(MULTIPLEPOSITIONS_FIELD); for (int[] posmul : identifier.positionsMultiple) { Element poslist = factory.createElement(POSITIONS_FIELD); for (int ipos : posmul) { Element pos = factory.createElement(POSITION_FIELD); pos.setText(Integer.toString(ipos)); poslist.add(pos); }/*from w w w.j a v a2 s. co m*/ poslistm.add(poslist); } eltidentifier.add(poslistm); } break; case UNIQUETYPE: { Element poslist = factory.createElement(POSITIONS_FIELD); for (int ipos : identifier.positions) { Element pos = factory.createElement(POSITION_FIELD); pos.setText(Integer.toString(ipos)); poslist.add(pos); } eltidentifier.add(poslist); } break; } schema.add(eltidentifier); } if (constructionOrder != null) { Element order = factory.createElement(ORDER_FIELD); int rank = 0; for (String table : constructionOrder) { Element etable = factory.createElement(TABLE_FIELD); etable.addAttribute(RANK_ATTRIBUTE, Integer.toString(rank)); etable.setText(table); order.add(etable); rank++; } schema.add(order); } if (recursive) { schema.addAttribute(NB_ATTRIBUTE, Integer.toString(tables.size())); for (DbTable table : tables) { if (data && table.getRows() != null && table.getRows().size() > 0) { Element etable = table.getElement(data); schema.add(etable); } else { Element etable = table.getElement(false); if (data) { // fill data using file try { XmlWriter.loadDbTableDataFromFile(this, table, etable); } catch (IOException e) { logger.warn(StaticValues.LBL.error_error.get() + e); } catch (WaarpDatabaseSqlException e) { logger.warn(StaticValues.LBL.error_error.get() + e); } } schema.add(etable); } } } return schema; }
From source file:fr.gouv.culture.vitam.database.DbTable.java
License:Open Source License
public Element getElement(boolean data) { DocumentFactory factory = DocumentFactory.getInstance(); Element table = factory.createElement(DbSchema.TABLE_FIELD); table.addAttribute(DbSchema.NAME_ATTRIBUTE, name); table.addAttribute(DbSchema.DESCRIPTION_ATTRIBUTE, description); table.addAttribute(DbSchema.RANK_ATTRIBUTE, Integer.toString(rank)); table.addAttribute(DbSchema.TYPE_ATTRIBUTE, type.toString()); if (datafile != null) { table.addAttribute(DbSchema.DATAFILE_ATTRIBUTE, datafile); }/* w w w. j av a 2 s . c om*/ if (fields != null) { Element efields = factory.createElement(DbSchema.FIELDS_FIELD); for (DbField field : fields) { Element efield = field.getElement(); efields.add(efield); } efields.addAttribute(DbSchema.NB_ATTRIBUTE, Integer.toString(fields.size())); table.add(efields); } if (primaryKeys != null) { Element efields = factory.createElement(DbSchema.PRIMARY_KEYS_FIELD); for (DbField field : primaryKeys) { Element pk = factory.createElement(DbSchema.PRIMARY_KEY_FIELD); pk.addAttribute(DbSchema.NAME_ATTRIBUTE, field.name); efields.add(pk); } table.add(efields); } if (indexes != null) { Element efields = factory.createElement(DbSchema.INDEXES_FIELD); for (String idx : indexes.keySet()) { Element index = factory.createElement(DbSchema.INDEX_FIELD); index.addAttribute(DbSchema.NAME_ATTRIBUTE, idx); for (DbField field : indexes.get(idx)) { Element efield = factory.createElement(DbSchema.FIELD_FIELD); efield.addAttribute(DbSchema.NAME_ATTRIBUTE, field.name); index.add(efield); } efields.add(index); } efields.addAttribute(DbSchema.NB_ATTRIBUTE, Integer.toString(indexes.size())); table.add(efields); } if (constraints != null) { Element efields = factory.createElement(DbSchema.CONSTRAINTS_FIELD); for (String name : constraints.keySet()) { Element index = factory.createElement(DbSchema.CONSTRAINT_FIELD); index.addAttribute(DbSchema.FIELD_FIELD, name); DbField field = constraints.get(name); index.addAttribute(DbSchema.TARGETTABLE_ATTRIBUTE, field.table.name); index.addAttribute(DbSchema.TARGETFIELD_ATTRIBUTE, field.name); efields.add(index); } efields.addAttribute(DbSchema.NB_ATTRIBUTE, Integer.toString(constraints.size())); table.add(efields); } if (data && rows != null && rows.size() > 0) { Element erows = factory.createElement(DbSchema.ROWS_FIELD); erows.addAttribute(DbSchema.NB_ATTRIBUTE, Integer.toString(rows.size())); for (DbTableRow row : rows) { Element erow = row.getElement(); erows.add(erow); } table.add(erows); } return table; }