List of usage examples for org.jdom2 Element clone
@Override
public Element clone()
This returns a deep clone of this element.
From source file:de.huberlin.german.korpling.laudatioteitool.SplitTEI.java
License:Apache License
private TEIValidator.Errors extractMainCorpusHeader(Document doc) throws LaudatioException, IOException, SAXException { TEIValidator validator = corpusSchemeURL == null ? new TEICorpusValidator() : new FromURLValidator(corpusSchemeURL); Element corpusHeader = doc.getRootElement().getChild("teiHeader", null); if (corpusHeader != null) { File corpusDir = new File(outputDirectory, "CorpusHeader"); if (!corpusDir.exists() && !corpusDir.mkdir()) { throw new LaudatioException( messages.getString("COULD NOT CREATE DIRECTORY") + corpusDir.getAbsolutePath()); }/* w ww. j a va2s . c om*/ // create the subtree for the global corpus header Namespace teiNS = Namespace.getNamespace("http://www.tei-c.org/ns/1.0"); Element newRootForCorpus = new Element("TEI", teiNS); newRootForCorpus.addContent(corpusHeader.clone()); Document corpusDoc = new Document(newRootForCorpus); if (corpusSchemeURL == null) { corpusDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + TEICorpusValidator.DEFAULT_SCHEME_URL + "\"")); } else { corpusDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + corpusSchemeURL + "\"")); } // we need to append an empty "text" element after the header Element text = new Element("text", teiNS); text.setText(""); newRootForCorpus.addContent(text); // we work with the copy from now corpusHeader = newRootForCorpus.getChild("teiHeader", null); Preconditions.checkNotNull(corpusHeader, messages.getString("ERROR NO CORPUS TITLE GIVEN")); Preconditions.checkState("CorpusHeader".equals(corpusHeader.getAttributeValue("type"))); Preconditions.checkNotNull(corpusHeader.getChild("fileDesc", null), messages.getString("ERROR NO CORPUS TITLE GIVEN")); Preconditions.checkNotNull(corpusHeader.getChild("fileDesc", null).getChild("titleStmt", null), messages.getString("ERROR NO CORPUS TITLE GIVEN")); String title = corpusHeader.getChild("fileDesc", null).getChild("titleStmt", null) .getChildTextNormalize("title", null); Preconditions.checkNotNull(title, messages.getString("ERROR NO CORPUS TITLE GIVEN")); // save the file with the title as file name File outputFile = new File(corpusDir, title + ".xml"); XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat()); xmlOut.output(corpusDoc, new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")); log.info(messages.getString("WRITTEN CORPUS HEADER"), outputFile.getPath()); validator.validate(outputFile); } return validator.getErrors(); }
From source file:de.huberlin.german.korpling.laudatioteitool.SplitTEI.java
License:Apache License
private TEIValidator.Errors extractDocumentHeaders(Document doc) throws LaudatioException, IOException, SAXException { TEIValidator validator = documentSchemeURL == null ? new TEIDocumentValidator() : new FromURLValidator(documentSchemeURL); File documentDir = new File(outputDirectory, "DocumentHeader"); if (!documentDir.exists() && !documentDir.mkdir()) { throw new LaudatioException( messages.getString("COULD NOT CREATE DIRECTORY") + documentDir.getAbsolutePath()); }/*from w w w. ja v a 2s. com*/ Element documentRoot = Preconditions.checkNotNull(doc.getRootElement().getChild("teiCorpus", null)); for (Element docHeader : documentRoot.getChildren("teiHeader", null)) { Preconditions.checkState("DocumentHeader".equals(docHeader.getAttributeValue("type"))); // create the subtree for the global corpus header Namespace teiNS = Namespace.getNamespace("http://www.tei-c.org/ns/1.0"); Element tei = new Element("TEI", teiNS); tei.addContent(docHeader.clone()); Document newDoc = new Document(tei); if (documentSchemeURL == null) { newDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + TEIDocumentValidator.DEFAULT_SCHEME_URL + "\"")); } else { newDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + documentSchemeURL + "\"")); } // we need to append an empty "text" element after the header Element text = new Element("text", teiNS); text.setText(""); tei.addContent(text); Element fileDesc = Preconditions .checkNotNull(tei.getChild("teiHeader", null).getChild("fileDesc", null)); String outName = UUID.randomUUID().toString(); String id = fileDesc.getAttributeValue("id", Namespace.XML_NAMESPACE); if (id != null) { outName = id; } else { Element titleStmt = Preconditions.checkNotNull(fileDesc.getChild("titleStmt", null)); String title = titleStmt.getChildText("title", null); if (title != null) { outName = title; } } File outputFile = new File(documentDir, outName + ".xml"); XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat()); xmlOut.output(newDoc, new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")); log.info(messages.getString("WRITTEN DOCUMENT HEADER"), outputFile.getPath()); validator.validate(outputFile); } return validator.getErrors(); }
From source file:de.huberlin.german.korpling.laudatioteitool.SplitTEI.java
License:Apache License
private TEIValidator.Errors extractPreparationSteps(Document doc) throws LaudatioException, IOException, SAXException { TEIValidator validator = preparationSchemeURL == null ? new TEIPreparationValidator() : new FromURLValidator(preparationSchemeURL); Multiset<String> knownPreparationTitles = HashMultiset.create(); File documentDir = new File(outputDirectory, "PreparationHeader"); if (!documentDir.exists() && !documentDir.mkdir()) { throw new LaudatioException( messages.getString("COULD NOT CREATE DIRECTORY") + documentDir.getAbsolutePath()); }// w ww .j a va 2 s . c om Preconditions.checkNotNull(doc.getRootElement().getChild("teiCorpus", null)); Element preparationRoot = Preconditions .checkNotNull(doc.getRootElement().getChild("teiCorpus", null).getChild("teiCorpus", null)); for (Element preparationHeader : preparationRoot.getChildren("teiHeader", null)) { Preconditions.checkState("PreparationHeader".equals(preparationHeader.getAttributeValue("type"))); // create the subtree for the global corpus header Namespace teiNS = Namespace.getNamespace("http://www.tei-c.org/ns/1.0"); Element tei = new Element("TEI", teiNS); tei.addContent(preparationHeader.clone()); Document newDoc = new Document(tei); if (preparationSchemeURL == null) { newDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + TEIPreparationValidator.DEFAULT_SCHEME_URL + "\"")); } else { newDoc.addContent(0, new ProcessingInstruction("xml-model", "href=\"" + preparationSchemeURL + "\"")); } // we need to append an empty "text" element after the header Element text = new Element("text", teiNS); text.setText(""); tei.addContent(text); Element fileDesc = Preconditions .checkNotNull(tei.getChild("teiHeader", null).getChild("fileDesc", null)); String outName = UUID.randomUUID().toString(); Element titleStmt = Preconditions.checkNotNull(fileDesc.getChild("titleStmt", null)); Element title = Preconditions.checkNotNull(titleStmt.getChild("title", null)); String corresp = title.getAttributeValue("corresp"); if (corresp != null) { if (knownPreparationTitles.contains(corresp)) { knownPreparationTitles.add(corresp); outName = corresp + "_" + knownPreparationTitles.count(corresp); log.warn(messages.getString("MORE THAN ONE PREPARATION HEADER"), corresp); } else { outName = corresp; knownPreparationTitles.add(corresp); } } File outputFile = new File(documentDir, outName + ".xml"); XMLOutputter xmlOut = new XMLOutputter(Format.getPrettyFormat()); xmlOut.output(newDoc, new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")); log.info(messages.getString("WRITTEN PREPARATION HEADER"), outputFile.getPath()); validator.validate(outputFile); } return validator.getErrors(); }
From source file:de.unirostock.sems.cbarchive.web.dataholder.MetaObjectDataholder.java
License:Open Source License
/** * Generates a temporarily id for the meta entry *//* w ww. j av a2s. com*/ @JsonIgnore public void generateId() { Element xmlElement = metaObject.getXmlDescription(); String xmlString = null; try { Document doc = new Document(); doc.setRootElement(xmlElement.clone()); xmlString = Utils.prettyPrintDocument(doc); } catch (IOException | TransformerException e) { LOGGER.error(e, "Can't generate xml from meta object to generate meta id"); return; } id = Tools.generateHashId(xmlString); // LOGGER.debug("generated ID: ", id); }
From source file:devicemodel.conversions.XmlConversions.java
public static String element2XmlString(final Element element) throws IOException { return document2XmlStringNoHeader(new Document(element.clone())); }
From source file:eu.himeros.digitaledition.AlignedQuotationParser.java
License:Open Source License
private Element appendToAnchor(Element root) throws Exception { Element anchorRoot = new Element("text"); List<Element> words = root.getChildren(); Element currAnchor = new Element("w"); currAnchor.setAttribute("id", "0"); anchorRoot.addContent(currAnchor);/* w w w . jav a 2 s . co m*/ Element currNode; for (Element word : words) { currNode = word.clone(); if ("1".equals(word.getAttributeValue("occ"))) { currAnchor = currNode; anchorRoot.addContent(currAnchor); } else { currAnchor.addContent(currNode); } } return anchorRoot; }
From source file:eu.himeros.hocr.FlatXml.java
License:Open Source License
private void init(File inFile, File outFile) throws Exception { SAXBuilder builder = new SAXBuilder(); Document doc = builder.build(inFile); Element root = doc.getRootElement(); Namespace oldns = root.getNamespace(); Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml"); Namespace xmlns = newRoot.getNamespace(); Element head = root.getChild("head", oldns); head.setNamespace(xmlns);/*from w w w. jav a 2 s .c o m*/ for (Element child : head.getChildren()) child.setNamespace(xmlns); Element title = new Element("title", xmlns); title.addContent("ocr"); if (head != null) head.addContent(title); Element body = root.getChild("body", oldns); body.setNamespace(xmlns); /*Element oldPage; try{ oldPage=body.getChild("div",xmlns); }catch(Exception ex){ oldPage=new Element("div",xmlns); }*/ Element page = new Element("div", xmlns); page.setAttribute("class", "ocr_page"); page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png")); XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']", Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml")); List<Element> careaElL = xpath.evaluate(body); for (Element careaEl : careaElL) { page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\"" + careaEl.getAttributeValue("title") + "\">")); for (Element pEl : careaEl.getChildren()) { page.addContent(new Comment("<p>")); for (Element lineEl : pEl.getChildren()) { lineEl.removeAttribute("id"); lineEl.setNamespace(xmlns); for (Element child : lineEl.getChildren()) { child.removeAttribute("id"); child.removeAttribute("lang"); child.removeAttribute("lang", xmlns); child.setNamespace(xmlns); } page.addContent(lineEl.clone()); } page.addContent(new Comment("</p>")); } page.addContent(new Comment("</div>")); } //oldPage.detach(); if (body != null) { body.removeContent(); body.addContent(page); } newRoot.addContent(root.removeContent()); doc.detachRootElement(); doc.setRootElement(newRoot); XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat()); xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile))); }
From source file:jodtemplate.pptx.preprocessor.FormatTagsPreprocessor.java
License:Apache License
private List<Element> processArAndABrElements(final List<Element> apChildrenList, final Parser parser) { final List<Element> arabrElementsListResult = new ArrayList<>(apChildrenList.size()); for (Element child : apChildrenList) { if (PPTXDocument.R_ELEMENT.equals(child.getName())) { final String text = child.getChild(PPTXDocument.T_ELEMENT, getNamespace()).getText(); final List<String> parsed = parser.parse(text); for (String part : parsed) { if (StringUtils.isNotEmpty(part)) { final Element arOut = child.clone(); arOut.getChild(PPTXDocument.T_ELEMENT, getNamespace()).setText(part); arabrElementsListResult.add(arOut); }// w w w . j ava2 s. c om } } else if (PPTXDocument.BR_ELEMENT.equals(child.getName())) { final Element abrOut = child.clone(); arabrElementsListResult.add(abrOut); } } return arabrElementsListResult; }
From source file:jodtemplate.pptx.style.HtmlStylizer.java
License:Apache License
private Element applyListFormatting(final List<org.jsoup.nodes.Element> tags, final org.jsoup.nodes.Element element, final Element apPr) { final Element apPrToAdd; if (apPr == null) { apPrToAdd = new Element(PPTXDocument.PPR_ELEMENT, getDrawingmlNamespace()); } else {//www . jav a 2s. c om apPrToAdd = apPr.clone(); } apPrToAdd.setAttribute(PPTXDocument.INDENT_ATTR, String.valueOf(-DEFAULT_INDENTATION)); final Element abuFont = new Element(PPTXDocument.BUFONT_ELEMENT, getDrawingmlNamespace()); apPrToAdd.addContent(abuFont); if (UL_TAG.equals(element.parent().tagName())) { abuFont.setAttribute(PPTXDocument.CHARSET_ATTR, "0"); abuFont.setAttribute(PPTXDocument.PANOSE_ATTR, "020B0604020202020204"); abuFont.setAttribute(PPTXDocument.PITCH_FAMILY_ATTR, "34"); abuFont.setAttribute(PPTXDocument.TYPEFACE_ATTR, "Arial"); final Element abuChar = new Element(PPTXDocument.BUCHAR_ELEMENT, getDrawingmlNamespace()); abuChar.setAttribute(PPTXDocument.CHAR_ATTR, ""); apPrToAdd.addContent(abuChar); } else if (OL_TAG.equals(element.parent().tagName())) { abuFont.setAttribute(PPTXDocument.TYPEFACE_ATTR, "+mj-lt"); final Element abuAutonum = new Element(PPTXDocument.BUAUTONUM_ELEMENT, getDrawingmlNamespace()); abuAutonum.setAttribute(PPTXDocument.TYPE_ATTR, "arabicPeriod"); apPrToAdd.addContent(abuAutonum); } final Collection<org.jsoup.nodes.Element> listItemTags = CollectionUtils.select(tags, new Predicate<org.jsoup.nodes.Element>() { @Override public boolean evaluate(final org.jsoup.nodes.Element tag) { return LI_TAG.equals(tag.tagName()); } }); final int listLevel = listItemTags.size(); if (listLevel > 1) { apPrToAdd.setAttribute(PPTXDocument.LVL_ATTR, String.valueOf(listLevel - 1)); } apPrToAdd.setAttribute(PPTXDocument.MAR_L_ATTR, String.valueOf(DEFAULT_INDENTATION * listLevel)); return apPrToAdd; }
From source file:jodtemplate.pptx.style.HtmlStylizer.java
License:Apache License
private Element createParagraphElement(final List<Element> elements, final Element apPr) { final Element ap = new Element(PPTXDocument.P_ELEMENT, getDrawingmlNamespace()); final Element apPrToAdd; if (apPr == null) { apPrToAdd = new Element(PPTXDocument.PPR_ELEMENT, getDrawingmlNamespace()); } else {// w w w. j a v a 2 s . c o m apPrToAdd = apPr.clone(); } final Element abuNone = new Element(PPTXDocument.BUNONE_ELEMENT, getDrawingmlNamespace()); apPrToAdd.addContent(abuNone); ap.addContent(apPrToAdd); ap.addContent(elements); return ap; }