Example usage for org.dom4j Element content

List of usage examples for org.dom4j Element content

Introduction

In this page you can find the example usage for org.dom4j Element content.

Prototype

List<Node> content();

Source Link

Document

Returns the content nodes of this branch as a backed List so that the content of this branch may be modified directly using the List interface.

Usage

From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java

License:Apache License

/**
 * Creates new elements for TU and TUV with lowercase names by
 * constructing new elements and inserting them in place of the
 * old ones.//ww w  .ja  v a  2s  .c om
 */
private Element lowercaseElements(Element p_tu) {
    Element tu = m_factory.createElement("tu");
    tu.setAttributes(p_tu.attributes());

    List tuvs = p_tu.selectNodes("//Tuv");

    for (int i = 0, max = tuvs.size(); i < max; i++) {
        Element tuv = (Element) tuvs.get(i);
        // Detach the TUV or else TUVs will accumulate on
        // subsequent TUs (sic!).
        tuv.detach();

        Element newTuv = m_factory.createElement("tuv");
        newTuv.setAttributes(tuv.attributes());
        newTuv.setContent(tuv.content());

        tu.add(newTuv);
    }

    return tu;
}

From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java

License:Apache License

/**
 * Wraps textual content of TTX TUVs inside TMX &lt;seg&gt;.
 *//*w  w w.ja va  2 s . co  m*/
private void addSegElement(Element p_tu) {
    ArrayList newTuvs = new ArrayList();
    List tuvs = p_tu.selectNodes(".//tuv");

    for (int i = 0, max = tuvs.size(); i < max; i++) {
        Element tuv = (Element) tuvs.get(i);

        Element seg = m_factory.createElement("seg");
        seg.setContent(tuv.content());
        tuv.content().clear();
        tuv.add(seg);
    }
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

static public String getTranslateInnerXml(Element p_node) {
    StringBuilder result = new StringBuilder();
    List content = p_node.content();
    for (int i = 0; i < content.size(); i++) {
        Node node = (Node) content.get(i);
        if (node.getNodeType() == Node.TEXT_NODE) {
            result.append(encodeXmlEntities(node.getText()));
        }//from  w w  w .  j  a v  a  2  s. com
    }
    return result.toString();
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

/**
 * Returns the string value of an element with tags representing whitespace
 * replaced by either whitespace or nbsps.
 */// w w w. j ava 2 s . c o  m
static public String getTextWithWhite(Element p_node, boolean... bs) {
    StringBuffer result = new StringBuffer();

    List content = p_node.content();

    for (int i = 0, max = content.size(); i < max; i++) {
        Node node = (Node) content.get(i);

        if (node.getNodeType() == Node.TEXT_NODE && bs.length == 0) {
            boolean isInternalText = isInternalText(content, i);
            if (!isInternalText) {
                result.append(node.getText());
            } else {
                // add space around internal text
                result.append(" ").append(node.getText()).append(" ");
            }
        } else if (node.getNodeType() == Node.ELEMENT_NODE) {
            Element elem = (Element) node;
            String type = elem.attributeValue("type");
            int childNodes = elem.content().size();
            // For word counting, always treat TMX whitespace tags
            // as white.
            if (Text.isTmxWhitespaceNode(type) || Text.isTmxMsoWhitespaceNode(type)) {
                result.append(" ");
            } else {
                if (childNodes > 0) {
                    boolean isExtract = false;
                    for (int j = 0; j < childNodes; j++) {
                        if (((Node) elem.content().get(j)).getNodeType() == Node.ELEMENT_NODE) {
                            String s = ((Element) elem.content().get(j)).attributeValue("isTranslate");
                            String innerTextNodeIndex = ((Element) elem.content().get(j))
                                    .attributeValue("innerTextNodeIndex");
                            if (s != null && Boolean.parseBoolean(s)) {
                                isExtract = true;
                                // getTextWithWhite((Element)elem.content().get(j),
                                // true);
                                // ((Element)elem.content().get(j)).
                                // result.append(getTranslateInnerXml((Element)
                                // elem.content().get(j)));
                            } else {
                                isExtract = false;
                            }

                        } else if (((Node) elem.content().get(j)).getNodeType() == Node.TEXT_NODE
                                && isExtract) {
                            result.append(((Node) elem.content().get(j)).getText());
                        }
                    }
                }
            }
        } else {
            System.err.println("Please fix the word counter: " + node);
        }
    }

    return result.toString();
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

/**
 * Returns the XML representation like Element.asXML() but without
 * the top-level tag./* w  w w  . j a  va  2 s.  c  om*/
 */
static public String getInnerXml(Element p_node) {
    StringBuffer result = new StringBuffer();

    List content = p_node.content();

    for (int i = 0, max = content.size(); i < max; i++) {
        Node node = (Node) content.get(i);

        // Work around a specific behaviour of DOM4J text nodes:
        // The text node asXML() returns the plain Unicode string,
        // so we need to encode entities manually.
        if (node.getNodeType() == Node.TEXT_NODE) {
            result.append(EditUtil.encodeXmlEntities(node.getText()));
        } else {
            // Element nodes write their text nodes correctly.
            result.append(node.asXML());
        }
    }

    return result.toString();
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

/**
 * Returns the HTML representation of an element's text. This is
 * like getInnerXml() but doesn't encode apostrophes.
 *///from  w w w. j av a2  s. c om
static public String getInnerHtml(Element p_node) {
    StringBuffer result = new StringBuffer();

    List content = p_node.content();

    for (int i = 0, max = content.size(); i < max; i++) {
        Node node = (Node) content.get(i);

        // Work around a specific behaviour of DOM4J text nodes:
        // The text node asXML() returns the plain Unicode string,
        // so we need to encode entities manually.
        if (node.getNodeType() == Node.TEXT_NODE) {
            result.append(EditUtil.encodeHtmlEntities(node.getText()));
        } else {
            // Element nodes write their text nodes correctly.
            result.append(node.asXML());
        }
    }

    return result.toString();
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

/**
 * <p>Removes insignificant whitespace between elements in groups.
 * Whitespace inside non-Grps, i.e. the data elements is
 * significant and is preserved.</p>
 *
 * This method is needed for comparing nodes.
 *//* ww w.j a  v  a 2  s. com*/
static private boolean removeInsignificantWhitespace(Element p_node) {
    boolean dirty = false;

    boolean isGrp = p_node.getName().endsWith("Grp");

    for (Iterator it = p_node.content().iterator(); it.hasNext();) {
        Node temp = (Node) it.next();

        if (temp.getNodeType() != Node.ELEMENT_NODE) {
            if (isGrp) {
                it.remove();
                dirty = true;
            }

            continue;
        }

        Element node = (Element) temp;

        // Depth-first recursion.
        dirty |= removeInsignificantWhitespace(node);
    }

    return dirty;
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

/**
 * <p>Recursively prunes empty fields and groups from the given entry.
 * The entry is destructively modified.</p>
 *
 * <p>A depth-first traversal first removes empty leaf nodes, and
 * then groups that are empty or not fully filled.</p>
 *
 * <p>Example: a <descripGrp> must contain at least one <descrip>
 * child.  A <languageGrp> must contain at least one <language>
 * and one <termGrp> child (2 children minimum).</p>
 *
 * <p>As of 6.2, non-relevant whitespace nodes are also removed.</p>
 * <p>As of 6.3, admissible empty HTML tags are not pruned: IMG, HR, BR.</p>
 *//*from w  w w .  j a  va  2  s. c  o m*/
static private boolean pruneEmptyFields(Element p_node) {
    boolean dirty = false;

    if (!p_node.hasContent()) {
        return dirty;
    }

    // Cannot iterate child elements with node.elementIterator()
    // because that doesn't implement the remove() method.
    for (Iterator it = p_node.content().iterator(); it.hasNext();) {
        Node temp = (Node) it.next();

        // Only work on child elements.
        if (temp.getNodeType() != Node.ELEMENT_NODE) {
            continue;
        }

        Element node = (Element) temp;

        // Depth-first recursion.
        dirty |= pruneEmptyFields(node);

        // Sat Jan 15 02:17:38 2005 CvdL Need to allow empty HTML tags.
        String name = node.getName().toLowerCase();
        if (name.equals("language") || name.equals("img") || name.equals("hr") || name.equals("br")) {
            continue;
        }

        // Leaf nodes
        if (node.isTextOnly()) {
            String value = node.getText();
            if (value == null || value.trim().length() == 0) {
                // prune empty leaf nodes
                it.remove();
                dirty = true;
            }
        } else {
            // Group nodes
            int childCount = node.elements().size();
            if (childCount == 0 || (node.getName().equals("languageGrp") && childCount < 2)) {
                // prune empty groups
                it.remove();
                dirty = true;
            }
        }
    }

    return dirty;
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

static private void mergeInnerGroups(Element p_one, Element p_two, NodeComparator p_comp) {
    for (Iterator it = p_two.content().iterator(); it.hasNext();) {
        Element node = (Element) it.next();
        it.remove();/*www  . j  a va  2  s  .co m*/
        node.detach();

        String name = node.getName();

        if (name.equals("transacGrp")) {
            continue;
        } else if (name.equals("noteGrp")) {
            mergeNoteGrp(p_one, p_two, node, p_comp);
        } else if (name.equals("sourceGrp")) {
            mergeSourceGrp(p_one, p_two, node, p_comp);
        } else if (name.equals("descripGrp")) {
            mergeDescripGrp(p_one, p_two, node, p_comp);
        } else if (name.equals("languageGrp")) {
            mergeLanguageGrp(p_one, p_two, node, p_comp);
        } else if (name.equals("termGrp")) {
            mergeTermGrp(p_one, p_two, node, p_comp);
        }
    }
}

From source file:com.globalsight.terminology.EntryUtils.java

License:Apache License

static private void mergeNoteGrp(Element p_one, Element p_two, Element p_noteGrp, NodeComparator p_comp) {
    Element p_note = p_noteGrp.element("note");

    if (p_note == null || !p_note.hasContent()) {
        return;/*from  w  w w  . j a va2 s . c  om*/
    }

    // Find all noteGrps in 1.
    List matches = p_one.selectNodes("noteGrp");

    if (matches == null || matches.size() == 0) {
        // No notes exist, add the new one to the end.
        p_one.add(p_noteGrp);

        return;
    }

    // Check if one of the matches is the same note.
    for (int i = 0, max = matches.size(); i < max; i++) {
        Element noteGrp = (Element) matches.get(i);
        Element note = noteGrp.element("note");

        if (fieldEquals(note, p_note, p_comp)) {
            // could be a case/formatting-insensitive match
            note.detach();
            noteGrp.content().add(0, p_note);

            return;
        }
    }

    // Note does not exist, add it to the end.
    p_one.add(p_noteGrp);
}