List of usage examples for org.dom4j Element content
List<Node> content();
From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java
License:Apache License
/** * Creates new elements for TU and TUV with lowercase names by * constructing new elements and inserting them in place of the * old ones.//ww w .ja v a 2s .c om */ private Element lowercaseElements(Element p_tu) { Element tu = m_factory.createElement("tu"); tu.setAttributes(p_tu.attributes()); List tuvs = p_tu.selectNodes("//Tuv"); for (int i = 0, max = tuvs.size(); i < max; i++) { Element tuv = (Element) tuvs.get(i); // Detach the TUV or else TUVs will accumulate on // subsequent TUs (sic!). tuv.detach(); Element newTuv = m_factory.createElement("tuv"); newTuv.setAttributes(tuv.attributes()); newTuv.setContent(tuv.content()); tu.add(newTuv); } return tu; }
From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java
License:Apache License
/** * Wraps textual content of TTX TUVs inside TMX <seg>. *//*w w w.ja va 2 s . co m*/ private void addSegElement(Element p_tu) { ArrayList newTuvs = new ArrayList(); List tuvs = p_tu.selectNodes(".//tuv"); for (int i = 0, max = tuvs.size(); i < max; i++) { Element tuv = (Element) tuvs.get(i); Element seg = m_factory.createElement("seg"); seg.setContent(tuv.content()); tuv.content().clear(); tuv.add(seg); } }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
static public String getTranslateInnerXml(Element p_node) { StringBuilder result = new StringBuilder(); List content = p_node.content(); for (int i = 0; i < content.size(); i++) { Node node = (Node) content.get(i); if (node.getNodeType() == Node.TEXT_NODE) { result.append(encodeXmlEntities(node.getText())); }//from w w w . j a v a 2 s. com } return result.toString(); }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
/** * Returns the string value of an element with tags representing whitespace * replaced by either whitespace or nbsps. */// w w w. j ava 2 s . c o m static public String getTextWithWhite(Element p_node, boolean... bs) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); if (node.getNodeType() == Node.TEXT_NODE && bs.length == 0) { boolean isInternalText = isInternalText(content, i); if (!isInternalText) { result.append(node.getText()); } else { // add space around internal text result.append(" ").append(node.getText()).append(" "); } } else if (node.getNodeType() == Node.ELEMENT_NODE) { Element elem = (Element) node; String type = elem.attributeValue("type"); int childNodes = elem.content().size(); // For word counting, always treat TMX whitespace tags // as white. if (Text.isTmxWhitespaceNode(type) || Text.isTmxMsoWhitespaceNode(type)) { result.append(" "); } else { if (childNodes > 0) { boolean isExtract = false; for (int j = 0; j < childNodes; j++) { if (((Node) elem.content().get(j)).getNodeType() == Node.ELEMENT_NODE) { String s = ((Element) elem.content().get(j)).attributeValue("isTranslate"); String innerTextNodeIndex = ((Element) elem.content().get(j)) .attributeValue("innerTextNodeIndex"); if (s != null && Boolean.parseBoolean(s)) { isExtract = true; // getTextWithWhite((Element)elem.content().get(j), // true); // ((Element)elem.content().get(j)). // result.append(getTranslateInnerXml((Element) // elem.content().get(j))); } else { isExtract = false; } } else if (((Node) elem.content().get(j)).getNodeType() == Node.TEXT_NODE && isExtract) { result.append(((Node) elem.content().get(j)).getText()); } } } } } else { System.err.println("Please fix the word counter: " + node); } } return result.toString(); }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * Returns the XML representation like Element.asXML() but without * the top-level tag./* w w w . j a va 2 s. c om*/ */ static public String getInnerXml(Element p_node) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); // Work around a specific behaviour of DOM4J text nodes: // The text node asXML() returns the plain Unicode string, // so we need to encode entities manually. if (node.getNodeType() == Node.TEXT_NODE) { result.append(EditUtil.encodeXmlEntities(node.getText())); } else { // Element nodes write their text nodes correctly. result.append(node.asXML()); } } return result.toString(); }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * Returns the HTML representation of an element's text. This is * like getInnerXml() but doesn't encode apostrophes. *///from w w w. j av a2 s. c om static public String getInnerHtml(Element p_node) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); // Work around a specific behaviour of DOM4J text nodes: // The text node asXML() returns the plain Unicode string, // so we need to encode entities manually. if (node.getNodeType() == Node.TEXT_NODE) { result.append(EditUtil.encodeHtmlEntities(node.getText())); } else { // Element nodes write their text nodes correctly. result.append(node.asXML()); } } return result.toString(); }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Removes insignificant whitespace between elements in groups. * Whitespace inside non-Grps, i.e. the data elements is * significant and is preserved.</p> * * This method is needed for comparing nodes. *//* ww w.j a v a 2 s. com*/ static private boolean removeInsignificantWhitespace(Element p_node) { boolean dirty = false; boolean isGrp = p_node.getName().endsWith("Grp"); for (Iterator it = p_node.content().iterator(); it.hasNext();) { Node temp = (Node) it.next(); if (temp.getNodeType() != Node.ELEMENT_NODE) { if (isGrp) { it.remove(); dirty = true; } continue; } Element node = (Element) temp; // Depth-first recursion. dirty |= removeInsignificantWhitespace(node); } return dirty; }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Recursively prunes empty fields and groups from the given entry. * The entry is destructively modified.</p> * * <p>A depth-first traversal first removes empty leaf nodes, and * then groups that are empty or not fully filled.</p> * * <p>Example: a <descripGrp> must contain at least one <descrip> * child. A <languageGrp> must contain at least one <language> * and one <termGrp> child (2 children minimum).</p> * * <p>As of 6.2, non-relevant whitespace nodes are also removed.</p> * <p>As of 6.3, admissible empty HTML tags are not pruned: IMG, HR, BR.</p> *//*from w w w . j a va 2 s. c o m*/ static private boolean pruneEmptyFields(Element p_node) { boolean dirty = false; if (!p_node.hasContent()) { return dirty; } // Cannot iterate child elements with node.elementIterator() // because that doesn't implement the remove() method. for (Iterator it = p_node.content().iterator(); it.hasNext();) { Node temp = (Node) it.next(); // Only work on child elements. if (temp.getNodeType() != Node.ELEMENT_NODE) { continue; } Element node = (Element) temp; // Depth-first recursion. dirty |= pruneEmptyFields(node); // Sat Jan 15 02:17:38 2005 CvdL Need to allow empty HTML tags. String name = node.getName().toLowerCase(); if (name.equals("language") || name.equals("img") || name.equals("hr") || name.equals("br")) { continue; } // Leaf nodes if (node.isTextOnly()) { String value = node.getText(); if (value == null || value.trim().length() == 0) { // prune empty leaf nodes it.remove(); dirty = true; } } else { // Group nodes int childCount = node.elements().size(); if (childCount == 0 || (node.getName().equals("languageGrp") && childCount < 2)) { // prune empty groups it.remove(); dirty = true; } } } return dirty; }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
static private void mergeInnerGroups(Element p_one, Element p_two, NodeComparator p_comp) { for (Iterator it = p_two.content().iterator(); it.hasNext();) { Element node = (Element) it.next(); it.remove();/*www . j a va 2 s .co m*/ node.detach(); String name = node.getName(); if (name.equals("transacGrp")) { continue; } else if (name.equals("noteGrp")) { mergeNoteGrp(p_one, p_two, node, p_comp); } else if (name.equals("sourceGrp")) { mergeSourceGrp(p_one, p_two, node, p_comp); } else if (name.equals("descripGrp")) { mergeDescripGrp(p_one, p_two, node, p_comp); } else if (name.equals("languageGrp")) { mergeLanguageGrp(p_one, p_two, node, p_comp); } else if (name.equals("termGrp")) { mergeTermGrp(p_one, p_two, node, p_comp); } } }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
static private void mergeNoteGrp(Element p_one, Element p_two, Element p_noteGrp, NodeComparator p_comp) { Element p_note = p_noteGrp.element("note"); if (p_note == null || !p_note.hasContent()) { return;/*from w w w . j a va2 s . c om*/ } // Find all noteGrps in 1. List matches = p_one.selectNodes("noteGrp"); if (matches == null || matches.size() == 0) { // No notes exist, add the new one to the end. p_one.add(p_noteGrp); return; } // Check if one of the matches is the same note. for (int i = 0, max = matches.size(); i < max; i++) { Element noteGrp = (Element) matches.get(i); Element note = noteGrp.element("note"); if (fieldEquals(note, p_note, p_comp)) { // could be a case/formatting-insensitive match note.detach(); noteGrp.content().add(0, p_note); return; } } // Note does not exist, add it to the end. p_one.add(p_noteGrp); }