List of usage examples for org.w3c.dom Node appendChild
public Node appendChild(Node newChild) throws DOMException;
newChild
to the end of the list of children of this node. From source file:Main.java
public static void replaceText(Node node, String text) { for (;;) {/*from www . jav a 2 s.co m*/ Node n = node.getFirstChild(); if (n == null) { break; } node.removeChild(n); } Text t = node.getOwnerDocument().createTextNode(text); node.appendChild(t); }
From source file:Main.java
/** * based on public Java5 javadoc of org.w3c.dom.Node.setTextContent method *///from w ww . ja v a 2s . c o m public static void setTextContent(Node node, final String text) { while (node.hasChildNodes()) { node.removeChild(node.getFirstChild()); } if (text != null && text.length() > 0) { Node textNode = node.getOwnerDocument().createTextNode(text); node.appendChild(textNode); } }
From source file:Main.java
public static void appendChild(Document doc, Node parentNode, String childName, String childContents) { if (childContents == null) throw (new NullPointerException("ChildNode is null.")); Element child = doc.createElement(childName); child.setTextContent(childContents); parentNode.appendChild(child); }
From source file:Main.java
public static Element createElement(Node parent, String tagName) { Document doc;/* ww w. j a v a2s . c o m*/ if (parent instanceof Document) { doc = (Document) parent; } else { doc = parent.getOwnerDocument(); } Element e = doc.createElement(tagName); parent.appendChild(e); return e; }
From source file:Main.java
public static Comment addComment(Node node, String comment) { Document doc = null;/*w w w .j a v a2 s . c o m*/ if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } Comment e = doc.createComment(comment); node.appendChild(e); return e; }
From source file:Main.java
/** * Add a value to the specified node/*from w w w . ja v a2 s. c o m*/ */ public static Node addValue(Node node, String value) { final Node newNode; if (node instanceof Document) newNode = ((Document) node).createTextNode(value); else newNode = node.getOwnerDocument().createTextNode(value); if (newNode != null) node.appendChild(newNode); return newNode; }
From source file:Main.java
public static CDATASection addCDataText(Node node, String data) { Document doc = null;/* w w w. j ava 2 s .co m*/ if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } CDATASection e = doc.createCDATASection(data); node.appendChild(e); return e; }
From source file:Main.java
/** * This method is used for updating the value of a tag in a * <code>Document</code> object. * //www . j a v a 2 s . c o m * @param doc * Document object * @param tagName * name of the tag * @param tagValue * the updated value of the tag */ public static void replaceTagValue(Document doc, String tagName, String tagValue) { NodeList nodeList = doc.getElementsByTagName(tagName); int j = nodeList.getLength(); Node node; for (int i = 0; i < j; i++) { Node newNode = doc.createTextNode(tagValue); node = nodeList.item(i); if (node.getFirstChild() != null) { node.replaceChild(newNode, node.getFirstChild()); } else { node.appendChild(newNode); } } }
From source file:Main.java
public static Element addElement(Node parent, String name) { Element node;//from w w w . j a v a 2 s . co m if (parent.getOwnerDocument() != null) node = parent.getOwnerDocument().createElement(name); else if (parent instanceof Document) node = ((Document) parent).createElement(name); else return null; parent.appendChild(node); return node; }
From source file:com.ephesoft.dcma.util.OCREngineUtil.java
/** * To format HOCR for Tesseract./* ww w. j a v a 2s. com*/ * @param outputFilePath {@link String} * @param actualFolderLocation {@link String} * @param pageId {@link String} * @throws XPathExpressionException if error occurs * @throws TransformerException if error occurs * @throws IOException if error occurs */ public static void formatHOCRForTesseract(final String outputFilePath, final String actualFolderLocation, final String pageId) throws XPathExpressionException, TransformerException, IOException { LOGGER.info("Entering format HOCR for tessearct . outputfilepath : " + outputFilePath); InputStream inputStream = new FileInputStream(outputFilePath); XPathFactory xFactory = new org.apache.xpath.jaxp.XPathFactoryImpl(); XPath xpath = xFactory.newXPath(); XPathExpression pageExpr = xpath.compile("//div[@class=\"ocr_page\"]"); XPathExpression wordExpr = xpath.compile("//span[@class=\"ocr_word\"]"); // Output format supported by Tesseract 3.00 XPathExpression xOcrWordExpr = xpath.compile("//span[@class=\"xocr_word\"]"); // Output format supported by Tesseract 3.01 XPathExpression ocrXWordExpr = xpath.compile("//span[@class=\"ocrx_word\"]"); org.w3c.dom.Document doc2 = null; try { doc2 = XMLUtil.createDocumentFrom(inputStream); } catch (Exception e) { LOGGER.info("Premature end of file for " + outputFilePath + e); } finally { IOUtils.closeQuietly(inputStream); } if (doc2 != null) { LOGGER.info("document is not null."); NodeList wordList = (NodeList) wordExpr.evaluate(doc2, XPathConstants.NODESET); for (int wordNodeIndex = 0; wordNodeIndex < wordList.getLength(); wordNodeIndex++) { setWordNodeTextContent(xOcrWordExpr, ocrXWordExpr, wordList, wordNodeIndex); } NodeList pageList = (NodeList) pageExpr.evaluate(doc2, XPathConstants.NODESET); for (int pageNodeIndex = 0; pageNodeIndex < pageList.getLength(); pageNodeIndex++) { Node pageNode = pageList.item(pageNodeIndex); if (pageNode != null && ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) != null) { String pageID = ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) .getTextContent(); wordExpr = xpath.compile("//div[@id='" + pageID + "']//span[@class='ocr_word']"); NodeList wordInPageList = (NodeList) wordExpr.evaluate(pageNode, XPathConstants.NODESET); Node pageNodeClone = pageNode.cloneNode(false); for (int i = 0; i < wordInPageList.getLength(); i++) { pageNodeClone.appendChild(wordInPageList.item(i)); } pageNode.getParentNode().appendChild(pageNodeClone); pageNode.getParentNode().removeChild(pageNode); } } XMLUtil.flushDocumentToFile(doc2.getDocumentElement().getOwnerDocument(), outputFilePath); File tempFile = new File(actualFolderLocation + File.separator + pageId + "_tempFile_hocr.html"); FileUtils.copyFile(new File(outputFilePath), tempFile); XMLUtil.htmlOutputStream(tempFile.getAbsolutePath(), outputFilePath); boolean isTempFileDeleted = tempFile.delete(); if (!isTempFileDeleted) { tempFile.delete(); } } LOGGER.info("Exiting format HOCR for tessearct . outputfilepath : " + outputFilePath); }