List of usage examples for org.dom4j Node ELEMENT_NODE
short ELEMENT_NODE
To view the source code for org.dom4j Node ELEMENT_NODE.
Click Source Link
From source file:com.finderbots.miner2.pinterest.AnalyzeHtml.java
License:Apache License
private String getTextFromNode(Node node) { String attributeValue = null; if (node.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element) node; attributeValue = e.getTextTrim(); }/*from w w w. j a v a2s. c om*/ return (attributeValue == null ? "" : attributeValue); }
From source file:com.flaptor.util.parser.HtmlParser.java
License:Apache License
@SuppressWarnings("unchecked") private void removeNamespace(List list) { if (null != list) { for (Node node : (List<Node>) list) { if (node.getNodeType() == Node.ATTRIBUTE_NODE) { ((Attribute) node).setNamespace(Namespace.NO_NAMESPACE); } else if (node.getNodeType() == Node.ELEMENT_NODE) { removeNamespace((Element) node); }/* w w w.j a v a 2 s . co m*/ } } }
From source file:com.globalsight.everest.edit.offline.ttx.TTXParser.java
License:Apache License
/** * Parse main contents//from w w w. j a va 2s . com * * @param p_element */ private void domNodehandler(Node p_node, boolean isSource) { // public static final short ANY_NODE 0 // public static final short ATTRIBUTE_NODE 2 // public static final short CDATA_SECTION_NODE 4 // public static final short COMMENT_NODE 8 // public static final short DOCUMENT_NODE 9 // public static final short DOCUMENT_TYPE_NODE 10 // public static final short ELEMENT_NODE 1 // public static final short ENTITY_REFERENCE_NODE 5 // public static final short MAX_NODE_TYPE 14 // public static final short NAMESPACE_NODE 13 // public static final short PROCESSING_INSTRUCTION_NODE 7 // public static final short TEXT_NODE 3 // public static final short UNKNOWN_NODE 14 if (p_node == null) { return; } switch (p_node.getNodeType()) { case Node.ELEMENT_NODE: elementNodeProcessor(p_node, isSource); break; case Node.TEXT_NODE: String nodeValue = p_node.getStringValue(); if (nodeValue.startsWith("#")) { nodeValue = nodeValue.replaceFirst("#", OfflineConstants.PONUD_SIGN); } if (isParsingTTXForGS) { boolean isInTargetTuv = isInTargetTuv(p_node); if (nodeValue != null && isInTargetTuv) { results.append(nodeValue); } else if (nodeValue != null && isLockedSegment(p_node)) { results.append(AmbassadorDwUpConstants.SEGMENT_MATCH_TYPE_KEY).append(" ") .append("DO NOT TRANSLATE OR MODIFY (Locked).").append(TTXConstants.NEW_LINE); results.append(nodeValue); } } else { results.append(nodeValue); } break; default: return; } }
From source file:com.globalsight.everest.tm.util.ttx.TtxClean.java
License:Apache License
/** * Returns the inner text like Element.getText() but for all * embedded text nodes./*from w w w .j av a 2 s. c om*/ */ static public String getInnerText(Element p_node) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); if (node.getNodeType() == Node.TEXT_NODE) { result.append(node.getText()); } else if (node.getNodeType() == Node.ELEMENT_NODE) { result.append(getInnerText((Element) node)); } } return result.toString(); }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
/** * Returns the string value of an element with tags representing whitespace * replaced by either whitespace or nbsps. */// w w w .j av a 2s. c om static public String getTextWithWhite(Element p_node, boolean... bs) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); if (node.getNodeType() == Node.TEXT_NODE && bs.length == 0) { boolean isInternalText = isInternalText(content, i); if (!isInternalText) { result.append(node.getText()); } else { // add space around internal text result.append(" ").append(node.getText()).append(" "); } } else if (node.getNodeType() == Node.ELEMENT_NODE) { Element elem = (Element) node; String type = elem.attributeValue("type"); int childNodes = elem.content().size(); // For word counting, always treat TMX whitespace tags // as white. if (Text.isTmxWhitespaceNode(type) || Text.isTmxMsoWhitespaceNode(type)) { result.append(" "); } else { if (childNodes > 0) { boolean isExtract = false; for (int j = 0; j < childNodes; j++) { if (((Node) elem.content().get(j)).getNodeType() == Node.ELEMENT_NODE) { String s = ((Element) elem.content().get(j)).attributeValue("isTranslate"); String innerTextNodeIndex = ((Element) elem.content().get(j)) .attributeValue("innerTextNodeIndex"); if (s != null && Boolean.parseBoolean(s)) { isExtract = true; // getTextWithWhite((Element)elem.content().get(j), // true); // ((Element)elem.content().get(j)). // result.append(getTranslateInnerXml((Element) // elem.content().get(j))); } else { isExtract = false; } } else if (((Node) elem.content().get(j)).getNodeType() == Node.TEXT_NODE && isExtract) { result.append(((Node) elem.content().get(j)).getText()); } } } } } else { System.err.println("Please fix the word counter: " + node); } } return result.toString(); }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
private static boolean isInternalText(List content, int i) { if (i == 0 || i + 1 >= content.size()) { return false; }//w w w . j ava 2 s .c o m Node prenode = (Node) content.get(i - 1); Node nextnode = (Node) content.get(i + 1); if (prenode.getNodeType() != Node.ELEMENT_NODE || nextnode.getNodeType() != Node.ELEMENT_NODE) { return false; } Element preElem = (Element) prenode; Element nextElem = (Element) nextnode; String preelemName = preElem.getName(); String nextelemName = nextElem.getName(); String isInternal = preElem.attributeValue("internal"); if ("bpt".equalsIgnoreCase(preelemName) && "ept".equalsIgnoreCase(nextelemName) && "yes".equalsIgnoreCase(isInternal)) { return true; } else { return false; } }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Removes insignificant whitespace between elements in groups. * Whitespace inside non-Grps, i.e. the data elements is * significant and is preserved.</p> * * This method is needed for comparing nodes. *//*w w w . ja v a2 s. c om*/ static private boolean removeInsignificantWhitespace(Element p_node) { boolean dirty = false; boolean isGrp = p_node.getName().endsWith("Grp"); for (Iterator it = p_node.content().iterator(); it.hasNext();) { Node temp = (Node) it.next(); if (temp.getNodeType() != Node.ELEMENT_NODE) { if (isGrp) { it.remove(); dirty = true; } continue; } Element node = (Element) temp; // Depth-first recursion. dirty |= removeInsignificantWhitespace(node); } return dirty; }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Recursively prunes empty fields and groups from the given entry. * The entry is destructively modified.</p> * * <p>A depth-first traversal first removes empty leaf nodes, and * then groups that are empty or not fully filled.</p> * * <p>Example: a <descripGrp> must contain at least one <descrip> * child. A <languageGrp> must contain at least one <language> * and one <termGrp> child (2 children minimum).</p> * * <p>As of 6.2, non-relevant whitespace nodes are also removed.</p> * <p>As of 6.3, admissible empty HTML tags are not pruned: IMG, HR, BR.</p> */// w w w. ja v a 2 s .co m static private boolean pruneEmptyFields(Element p_node) { boolean dirty = false; if (!p_node.hasContent()) { return dirty; } // Cannot iterate child elements with node.elementIterator() // because that doesn't implement the remove() method. for (Iterator it = p_node.content().iterator(); it.hasNext();) { Node temp = (Node) it.next(); // Only work on child elements. if (temp.getNodeType() != Node.ELEMENT_NODE) { continue; } Element node = (Element) temp; // Depth-first recursion. dirty |= pruneEmptyFields(node); // Sat Jan 15 02:17:38 2005 CvdL Need to allow empty HTML tags. String name = node.getName().toLowerCase(); if (name.equals("language") || name.equals("img") || name.equals("hr") || name.equals("br")) { continue; } // Leaf nodes if (node.isTextOnly()) { String value = node.getText(); if (value == null || value.trim().length() == 0) { // prune empty leaf nodes it.remove(); dirty = true; } } else { // Group nodes int childCount = node.elements().size(); if (childCount == 0 || (node.getName().equals("languageGrp") && childCount < 2)) { // prune empty groups it.remove(); dirty = true; } } } return dirty; }
From source file:com.nokia.helium.ant.data.AntObjectMeta.java
License:Open Source License
/** * Gets an attribute if a value is available, otherwise returns an emtpy * string.//from ww w . j a v a2 s .c o m * * @param name Attribute name. * @return Attribute value. */ public String getAttr(String name) { if (node.getNodeType() == Node.ELEMENT_NODE) { String value = ((Element) node).attributeValue(name); if (value != null) { return value; } } return ""; }
From source file:com.nokia.helium.ant.data.MacroMeta.java
License:Open Source License
@SuppressWarnings("unchecked") public List<String> getAttributes() { List<String> attributes = new ArrayList<String>(); if (getNode().getNodeType() == Node.ELEMENT_NODE) { Element element = (Element) getNode(); List<Element> attributeNodes = element.elements("attribute"); for (Element attributeNode : attributeNodes) { attributes.add(attributeNode.attributeValue("name")); }// w ww.ja v a 2 s . c om } return attributes; }