List of usage examples for org.w3c.dom Node getParentNode
public Node getParentNode();
From source file:DomUtils.java
/** * Count the DOM nodes between the supplied sibling nodes, not including * the nodes themselves./*from ww w . ja v a 2 s. co m*/ * <p/> * Counts the sibling nodes. * @param node1 First sibling node. * @param node2 Second sibling node. * @return The number of siblings between the supplied sibling nodes. * @throws UnsupportedOperationException if the supplied {@link Node Nodes} * don't have the same parent node i.e. are not sibling nodes. */ public static int countNodesBetween(Node node1, Node node2) { Node parent1 = node1.getParentNode(); if (parent1 == null) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. [" + node1 + "] has no parent."); return 0; } Node parent2 = node2.getParentNode(); if (parent2 == null) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. [" + node2 + "] has no parent."); return 0; } if (parent1 != parent2) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. These nodes do not share the same sparent."); return 0; } int countBeforeNode1 = countNodesBefore(node1); int countBeforeNode2 = countNodesBefore(node2); int count = countBeforeNode2 - countBeforeNode1 - 1; return count; }
From source file:DomUtils.java
/** * Insert the supplied nodes before the supplied reference node (refNode). * @param newNodes Nodes to be inserted. * @param refNode Reference node before which the supplied nodes should * be inserted.//w w w .java2 s . c om */ public static void insertBefore(NodeList newNodes, Node refNode) { Node parentNode = refNode.getParentNode(); if (parentNode == null) { System.out .println("Cannot insert a NodeList before [" + refNode + "]. [" + refNode + "] has no parent."); return; } int nodeCount = newNodes.getLength(); List nodeList = DomUtils.copyNodeList(newNodes); if (nodeCount == 0) { return; } if (parentNode instanceof Document) { List elements = DomUtils.getElements(newNodes, "*", null); if (!elements.isEmpty()) { System.out.println( "Request to insert a NodeList before the Document root node. Will replace the root element with the 1st element node from the NodeList."); parentNode.removeChild(refNode); parentNode.appendChild((Node) elements.get(0)); } else { System.out.println( "Cannot insert beforen the document root element from a NodeList that doesn't contain an element node."); } for (int i = 0; i < nodeCount; i++) { Node node = (Node) nodeList.get(i); if (node.getNodeType() != Node.ELEMENT_NODE) { System.out.println("****" + node); parentNode.insertBefore(node, refNode); } } } else { for (int i = 0; i < nodeCount; i++) { parentNode.insertBefore((Node) nodeList.get(i), refNode); } } }
From source file:DomUtils.java
public static Node getPreviousSibling(Node node, short nodeType) { Node parent = node.getParentNode(); if (parent == null) { System.out.println("Cannot get node [" + node + "] previous sibling. [" + node + "] has no parent."); return null; }/*w ww .j av a 2s.c om*/ NodeList siblings = parent.getChildNodes(); int siblingCount = siblings.getLength(); int nodeIndex = 0; // Locate the node for (int i = 0; i < siblingCount; i++) { Node sibling = siblings.item(i); if (sibling == node) { nodeIndex = i; break; } } if (nodeIndex == 0) { return null; } // Wind back to sibling for (int i = nodeIndex - 1; i >= 0; i--) { Node sibling = siblings.item(i); if (sibling.getNodeType() == nodeType) { return sibling; } } return null; }
From source file:DomUtils.java
/** * Get all the text DOM sibling nodes before the supplied node and * concatenate them together into a single String. * @param node1 Test node./*www. j a v a 2s .c om*/ * @return String containing the concatentated text. */ public static String getTextBetween(Node node1, Node node2) { Node parent1 = node1.getParentNode(); if (parent1 == null) { System.out.println("Cannot get text between nodes [" + node1 + "] and [" + node2 + "]. [" + node1 + "] has no parent."); return ""; } Node parent2 = node2.getParentNode(); if (parent2 == null) { System.out.println("Cannot get text between nodes [" + node1 + "] and [" + node2 + "]. [" + node2 + "] has no parent."); return ""; } if (parent1 != parent2) { System.out.println("Cannot get text between nodes [" + node1 + "] and [" + node2 + "]. These nodes do not share the same sparent."); return ""; } NodeList siblings = parent1.getChildNodes(); StringBuffer text = new StringBuffer(); boolean append = false; int siblingCount = siblings.getLength(); for (int i = 0; i < siblingCount; i++) { Node sibling = siblings.item(i); if (sibling == node1) { append = true; } if (sibling == node2) { break; } if (append && sibling.getNodeType() == Node.TEXT_NODE) { text.append(((Text) sibling).getData()); } } return text.toString(); }
From source file:DomUtils.java
/** * Count the DOM nodes of the supplied type (nodeType) between the supplied * sibling nodes, not including the nodes themselves. * <p/>/* www.j av a 2 s . c o m*/ * Counts the sibling nodes. * @param node1 First sibling node. * @param node2 Second sibling node. * @param nodeType The DOM {@link Node} type of the siblings to be counted. * @return The number of siblings of the supplied type between the supplied * sibling nodes. * @throws UnsupportedOperationException if the supplied {@link Node Nodes} * don't have the same parent node i.e. are not sibling nodes. */ public static int countNodesBetween(Node node1, Node node2, short nodeType) { Node parent1 = node1.getParentNode(); if (parent1 == null) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. [" + node1 + "] has no parent."); return 0; } Node parent2 = node2.getParentNode(); if (parent2 == null) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. [" + node2 + "] has no parent."); return 0; } if (parent1 != parent2) { System.out.println("Cannot count nodes between [" + node1 + "] and [" + node2 + "]. These nodes do not share the same sparent."); return 0; } int countBeforeNode1 = countNodesBefore(node1, nodeType); int countBeforeNode2 = countNodesBefore(node2, nodeType); int count = countBeforeNode2 - countBeforeNode1; if (node1.getNodeType() == nodeType) { count--; } return count; }
From source file:DomUtils.java
/** * Replace one node with a list of nodes. * @param newNodes New nodes - added in same location as oldNode. * @param oldNode Old node - removed./*from w w w. ja v a 2 s. c om*/ * @param clone Clone Nodelist Nodes. */ public static void replaceNode(NodeList newNodes, Node oldNode, boolean clone) { Node parentNode = oldNode.getParentNode(); if (parentNode == null) { System.out .println("Cannot replace [" + oldNode + "] with a NodeList. [" + oldNode + "] has no parent."); return; } int nodeCount = newNodes.getLength(); List nodeList = DomUtils.copyNodeList(newNodes); if (nodeCount == 0) { if (!(parentNode instanceof Document)) { parentNode.removeChild(oldNode); } return; } if (parentNode instanceof Document) { List elements = DomUtils.getElements(newNodes, "*", null); if (!elements.isEmpty()) { System.out.println( "Request to replace the Document root node with a 1+ in length NodeList. Replacing root node with the first element node from the NodeList."); parentNode.removeChild(oldNode); parentNode.appendChild((Node) elements.get(0)); } else { System.out.println( "Cannot replace document root element with a NodeList that doesn't contain an element node."); } } else { for (int i = 0; i < nodeCount; i++) { if (clone) { parentNode.insertBefore(((Node) nodeList.get(i)).cloneNode(true), oldNode); } else { parentNode.insertBefore((Node) nodeList.get(i), oldNode); } } parentNode.removeChild(oldNode); } }
From source file:com.ephesoft.dcma.util.OCREngineUtil.java
/** * To format HOCR for Tesseract.//from ww w . ja v a 2 s .c om * @param outputFilePath {@link String} * @param actualFolderLocation {@link String} * @param pageId {@link String} * @throws XPathExpressionException if error occurs * @throws TransformerException if error occurs * @throws IOException if error occurs */ public static void formatHOCRForTesseract(final String outputFilePath, final String actualFolderLocation, final String pageId) throws XPathExpressionException, TransformerException, IOException { LOGGER.info("Entering format HOCR for tessearct . outputfilepath : " + outputFilePath); InputStream inputStream = new FileInputStream(outputFilePath); XPathFactory xFactory = new org.apache.xpath.jaxp.XPathFactoryImpl(); XPath xpath = xFactory.newXPath(); XPathExpression pageExpr = xpath.compile("//div[@class=\"ocr_page\"]"); XPathExpression wordExpr = xpath.compile("//span[@class=\"ocr_word\"]"); // Output format supported by Tesseract 3.00 XPathExpression xOcrWordExpr = xpath.compile("//span[@class=\"xocr_word\"]"); // Output format supported by Tesseract 3.01 XPathExpression ocrXWordExpr = xpath.compile("//span[@class=\"ocrx_word\"]"); org.w3c.dom.Document doc2 = null; try { doc2 = XMLUtil.createDocumentFrom(inputStream); } catch (Exception e) { LOGGER.info("Premature end of file for " + outputFilePath + e); } finally { IOUtils.closeQuietly(inputStream); } if (doc2 != null) { LOGGER.info("document is not null."); NodeList wordList = (NodeList) wordExpr.evaluate(doc2, XPathConstants.NODESET); for (int wordNodeIndex = 0; wordNodeIndex < wordList.getLength(); wordNodeIndex++) { setWordNodeTextContent(xOcrWordExpr, ocrXWordExpr, wordList, wordNodeIndex); } NodeList pageList = (NodeList) pageExpr.evaluate(doc2, XPathConstants.NODESET); for (int pageNodeIndex = 0; pageNodeIndex < pageList.getLength(); pageNodeIndex++) { Node pageNode = pageList.item(pageNodeIndex); if (pageNode != null && ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) != null) { String pageID = ((Node) pageNode.getAttributes().getNamedItem(UtilConstants.ID_ATTR)) .getTextContent(); wordExpr = xpath.compile("//div[@id='" + pageID + "']//span[@class='ocr_word']"); NodeList wordInPageList = (NodeList) wordExpr.evaluate(pageNode, XPathConstants.NODESET); Node pageNodeClone = pageNode.cloneNode(false); for (int i = 0; i < wordInPageList.getLength(); i++) { pageNodeClone.appendChild(wordInPageList.item(i)); } pageNode.getParentNode().appendChild(pageNodeClone); pageNode.getParentNode().removeChild(pageNode); } } XMLUtil.flushDocumentToFile(doc2.getDocumentElement().getOwnerDocument(), outputFilePath); File tempFile = new File(actualFolderLocation + File.separator + pageId + "_tempFile_hocr.html"); FileUtils.copyFile(new File(outputFilePath), tempFile); XMLUtil.htmlOutputStream(tempFile.getAbsolutePath(), outputFilePath); boolean isTempFileDeleted = tempFile.delete(); if (!isTempFileDeleted) { tempFile.delete(); } } LOGGER.info("Exiting format HOCR for tessearct . outputfilepath : " + outputFilePath); }
From source file:Main.java
/** * Convert the given Node to an XML String. * <p>/* w w w . ja v a2 s.c om*/ * This method is a simplified version of... * <p> * <code> * ByteArrayOutputStream out = new ByteArrayOutputStream();<br/> * javax.xml.Transformer transformer = TransformerFactory.newInstance().newTransformer();<br/> * transformer.transform( new DOMSource( node ), new StreamResult( out ));<br/> * return out.toString(); * </code> * <p> * ...but not all platforms (eg. Android) support <code>javax.xml.transform.Transformer</code>. * * @param indent * how much to indent the output. -1 for no indent. */ private static String nodeToString(Node node, int indent) { // Text nodes if (node == null) { return null; } if (!(node instanceof Element)) { String value = node.getNodeValue(); if (value == null) { return null; } return escapeForXml(value.trim()); } // (use StringBuffer for J2SE 1.4 compatibility) StringBuffer buffer = new StringBuffer(); // Open tag indent(buffer, indent); String nodeName = escapeForXml(node.getNodeName()); buffer.append("<"); buffer.append(nodeName); // Changing namespace String namespace = node.getNamespaceURI(); Node parentNode = node.getParentNode(); if (namespace != null && (parentNode == null || !namespace.equals(parentNode.getNamespaceURI()))) { buffer.append(" xmlns=\""); buffer.append(namespace); buffer.append("\""); } // Attributes NamedNodeMap attributes = node.getAttributes(); // Always put name first for easy unit tests Node name = attributes.getNamedItem("name"); if (name != null) { buffer.append(" name=\""); buffer.append(escapeForXml(name.getNodeValue())); buffer.append("\""); } for (int loop = 0; loop < attributes.getLength(); loop++) { Node attribute = attributes.item(loop); String attributeName = attribute.getNodeName(); // (I'm a bit surprised xmlns is an attribute - is that a bug?) if ("xmlns".equals(attributeName)) { continue; } // (always put name first for easy unit tests) if ("name".equals(attributeName)) { continue; } buffer.append(" "); buffer.append(escapeForXml(attributeName)); buffer.append("=\""); buffer.append(escapeForXml(attribute.getNodeValue())); buffer.append("\""); } // Children (if any) NodeList children = node.getChildNodes(); int length = children.getLength(); if (length == 0) { buffer.append("/>"); } else { buffer.append(">"); int nextIndent = indent; if (indent != -1) { nextIndent++; } for (int loop = 0; loop < length; loop++) { Node childNode = children.item(loop); if (indent != -1 && childNode instanceof Element) { buffer.append("\n"); } buffer.append(nodeToString(childNode, nextIndent)); } if (indent != -1 && buffer.charAt(buffer.length() - 1) == '>') { buffer.append("\n"); indent(buffer, indent); } // Close tag buffer.append("</"); buffer.append(nodeName); buffer.append(">"); } return buffer.toString(); }
From source file:DomUtils.java
/** * Construct the XPath of the supplied DOM Node. * <p/>// www .ja va 2s.c o m * Supports element, comment and cdata sections DOM Node types. * @param node DOM node for XPath generation. * @return XPath string representation of the supplied DOM Node. */ public static String getXPath(Node node) { StringBuffer xpath = new StringBuffer(); Node parent = node.getParentNode(); switch (node.getNodeType()) { case Node.ELEMENT_NODE: xpath.append(getXPathToken((Element) node)); break; case Node.COMMENT_NODE: int commentNum = DomUtils.countNodesBefore(node, Node.COMMENT_NODE); xpath.append("/{COMMENT}[" + commentNum + 1 + "]"); break; case Node.CDATA_SECTION_NODE: int cdataNum = DomUtils.countNodesBefore(node, Node.CDATA_SECTION_NODE); xpath.append("/{CDATA}[" + cdataNum + 1 + "]"); break; default: throw new UnsupportedOperationException( "XPath generation for supplied DOM Node type not supported. Only supports element, comment and cdata section DOM nodes."); } while (parent != null && parent.getNodeType() == Node.ELEMENT_NODE) { xpath.insert(0, getXPathToken((Element) parent)); parent = parent.getParentNode(); } return xpath.toString(); }
From source file:Utils.java
/** * Remove this node from its parent./*w ww . j a va 2 s . co m*/ * @param node the node to remove * @return Node removed */ public Node removeNode(Node node) { return node.getParentNode().removeChild(node); }