Example usage for org.w3c.dom Node TEXT_NODE

List of usage examples for org.w3c.dom Node TEXT_NODE

Introduction

In this page you can find the example usage for org.w3c.dom Node TEXT_NODE.

Prototype

short TEXT_NODE

To view the source code for org.w3c.dom Node TEXT_NODE.

Click Source Link

Document

The node is a Text node.

Usage

From source file:com.twentyn.patentExtractor.PatentDocumentFeatures.java

private static List<String> appendTextContent(List<String> textList, Node n) {
    if (n.getNodeType() == Node.TEXT_NODE) {
        textList.add(n.getTextContent());
    } else {//from   w  ww . jav a2 s  .c o  m
        NodeList childNodes = n.getChildNodes();
        for (int j = 0; j < childNodes.getLength(); j++) {
            Node childNode = childNodes.item(j);
            textList = appendTextContent(textList, childNode);
        }
    }
    return textList;
}

From source file:com.krawler.esp.utils.mime.MimeTypesReader.java

/** Read Element named ext. */
private void readExt(Element element, MimeType type) {
    NodeList nodes = element.getChildNodes();
    for (int i = 0; i < nodes.getLength(); i++) {
        Node node = nodes.item(i);
        if (node.getNodeType() == Node.TEXT_NODE) {
            type.addExtension(((Text) node).getData());
        }/*from  w w w  .  jav  a2  s.c  om*/
    }
}

From source file:com.concursive.connect.web.modules.wiki.utils.HTMLToWikiUtils.java

public static void processChildNodes(ArrayList<Node> nodeList, StringBuffer sb, int indentLevel, boolean doText,
        boolean withFormatting, boolean trim, String appendToCRLF, String contextPath, int projectId) {
    Iterator nodeI = nodeList.iterator();
    while (nodeI.hasNext()) {
        Node n = (Node) nodeI.next();
        if (n != null) {
            if (n.getNodeType() == Node.TEXT_NODE || n.getNodeType() == Node.CDATA_SECTION_NODE) {
                if (doText) {
                    String value = n.getNodeValue();
                    // Escaped characters
                    value = StringUtils.replace(value, "*", "\\*");
                    value = StringUtils.replace(value, "#", "\\#");
                    value = StringUtils.replace(value, "=", "\\=");
                    value = StringUtils.replace(value, "|", "\\|");
                    value = StringUtils.replace(value, "[", "\\{");
                    value = StringUtils.replace(value, "]", "\\}");
                    if (trim && !nodeI.hasNext()) {
                        // If within a cell, make sure returns include the cell value
                        //              String value = (appendToCRLF.length() > 0 ? StringUtils.replace(n.getNodeValue(), CRLF, CRLF + appendToCRLF) : n.getNodeValue());
                        LOG.trace(" <text:trim>");
                        // Output the value, trim is required
                        sb.append(StringUtils.fromHtmlValue(value.trim()));
                    } else {
                        // If within a cell, make sure returns include the cell value
                        if (appendToCRLF.length() > 0
                                && (hasParentNodeType(n, "th") || hasParentNodeType(n, "td"))
                                && value.trim().length() == 0) {
                            // This is an empty value... check to see if the previous line has content or not before appending a new line
                        } else {
                            LOG.trace(" <text>");
                            sb.append(StringUtils.fromHtmlValue((appendToCRLF.length() > 0
                                    ? StringUtils.replace(value, CRLF, CRLF + appendToCRLF)
                                    : value)));
                        }/*from www. java  2s.c om*/
                    }
                }
            } else if (n.getNodeType() == Node.ELEMENT_NODE) {
                Element element = ((Element) n);
                String tag = element.getTagName();
                LOG.trace(tag);
                if ("h1".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("= ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" =").append(CRLF + appendToCRLF);
                } else if ("h2".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" ==").append(CRLF + appendToCRLF);
                } else if ("h3".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("=== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" ===").append(CRLF + appendToCRLF);
                } else if ("h4".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("==== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" ====").append(CRLF + appendToCRLF);
                } else if ("h5".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("===== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" =====").append(CRLF + appendToCRLF);
                } else if ("h6".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("====== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim()))
                            .append(" ======").append(CRLF + appendToCRLF);
                } else if ("p".equals(tag) || "div".equals(tag)) {
                    if (n.getChildNodes().getLength() > 0
                            && (hasTextContent(n) || hasImageNodes(n.getChildNodes()))) {
                        // If this contains a Table, UL, OL, or object skip everything else to get there
                        ArrayList<Node> subNodes = new ArrayList<Node>();
                        getNodes(n.getChildNodes(), subNodes, new String[] { "table", "ul", "ol", "object" },
                                false);
                        if (subNodes.size() > 0) {
                            LOG.trace("  nonTextNodes - yes");
                            processChildNodes(subNodes, sb, indentLevel, true, true, false, appendToCRLF,
                                    contextPath, projectId);
                        } else {
                            LOG.trace("  nonTextNodes - no");
                            startOnNewLine(sb, appendToCRLF);
                            processChildNodes(getNodeList(n), sb, indentLevel, true, true, false, appendToCRLF,
                                    contextPath, projectId);
                        }
                    }
                } else if ("strong".equals(tag) || "b".equals(tag)) {
                    if (n.getChildNodes().getLength() > 0) {
                        if ("".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) {
                            processChildNodes(getNodeList(n), sb, indentLevel, true, false, false, appendToCRLF,
                                    contextPath, projectId);
                        } else {
                            if (hasNonTextNodes(n.getChildNodes())) {
                                processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, false,
                                        appendToCRLF, contextPath, projectId);
                            } else {
                                if (withFormatting) {
                                    sb.append("'''");
                                }
                                processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, false,
                                        appendToCRLF, contextPath, projectId);
                                if (withFormatting) {
                                    sb.append("'''");
                                }
                            }
                        }
                    }
                } else if ("em".equals(tag) || "i".equals(tag)) {
                    if (n.getChildNodes().getLength() > 0) {
                        if ("".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) {
                            processChildNodes(getNodeList(n), sb, indentLevel, true, false, trim, appendToCRLF,
                                    contextPath, projectId);
                        } else {
                            if (hasNonTextNodes(n.getChildNodes())) {
                                processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim,
                                        appendToCRLF, contextPath, projectId);
                            } else {
                                if (withFormatting) {
                                    sb.append("''");
                                }
                                processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim,
                                        appendToCRLF, contextPath, projectId);
                                if (withFormatting) {
                                    sb.append("''");
                                }
                            }
                        }
                    }
                } else if ("span".equals(tag)) {
                    if (n.getChildNodes().getLength() > 0
                            && !"".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) {
                        if (element.hasAttribute("style")) {
                            String value = element.getAttribute("style");
                            if (withFormatting) {
                                if (value.contains("underline")) {
                                    sb.append("__");
                                }
                                if (value.contains("line-through")) {
                                    sb.append("<s>");
                                }
                                if (value.contains("bold")) {
                                    sb.append("'''");
                                }
                                if (value.contains("italic")) {
                                    sb.append("''");
                                }
                            }
                            processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim,
                                    appendToCRLF, contextPath, projectId);
                            if (withFormatting) {
                                if (value.contains("italic")) {
                                    sb.append("''");
                                }
                                if (value.contains("bold")) {
                                    sb.append("'''");
                                }
                                if (value.contains("line-through")) {
                                    sb.append("</s>");
                                }
                                if (value.contains("underline")) {
                                    sb.append("__");
                                }
                            }
                        } else {
                            processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim,
                                    appendToCRLF, contextPath, projectId);
                        }
                    }
                } else if ("ul".equals(tag) || "ol".equals(tag) || "dl".equals(tag)) {
                    ++indentLevel;
                    if (indentLevel == 1) {
                        if (appendToCRLF.length() == 0) {
                            startOnNewLine(sb, appendToCRLF);
                        } else {
                            // Something\n
                            // !
                            // !* Item 1
                            // !* Item 2
                            if (!sb.toString().endsWith("|") && !sb.toString().endsWith(CRLF + appendToCRLF)) {
                                LOG.trace("ul newline CRLF");
                                sb.append(CRLF + appendToCRLF);
                            }
                        }
                    }
                    if (indentLevel > 1 && !sb.toString().endsWith(CRLF + appendToCRLF)) {
                        LOG.trace("ul indent CRLF");
                        sb.append(CRLF + appendToCRLF);
                    }
                    processChildNodes(getNodeList(n), sb, indentLevel, false, false, trim, appendToCRLF,
                            contextPath, projectId);
                    --indentLevel;
                } else if ("li".equals(tag)) {
                    String parentTag = ((Element) element.getParentNode()).getTagName();
                    for (int counter = 0; counter < indentLevel; counter++) {
                        if ("ul".equals(parentTag)) {
                            sb.append("*");
                        } else if ("ol".equals(parentTag)) {
                            sb.append("#");
                        }
                    }
                    sb.append(" ");
                    processChildNodes(getNodeList(n), sb, indentLevel, true, false, true, appendToCRLF,
                            contextPath, projectId);
                    if (!sb.toString().endsWith(CRLF + appendToCRLF)) {
                        LOG.trace("li CRLF");
                        sb.append(CRLF + appendToCRLF);
                    }
                } else if ("dt".equals(tag) || "dd".equals(tag)) {
                    processChildNodes(getNodeList(n), sb, indentLevel, true, false, trim, appendToCRLF,
                            contextPath, projectId);
                    if (!sb.toString().endsWith(CRLF + appendToCRLF)) {
                        LOG.trace("dt CRLF");
                        sb.append(CRLF + appendToCRLF);
                    }
                } else if ("pre".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("<pre>");
                    processChildNodes(getNodeList(n), sb, indentLevel, true, true, trim, appendToCRLF,
                            contextPath, projectId);
                    sb.append("</pre>");
                    if (nodeI.hasNext()) {
                        sb.append(CRLF + appendToCRLF);
                        sb.append(CRLF + appendToCRLF);
                    }
                } else if ("code".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    sb.append("<code>");
                    processChildNodes(getNodeList(n), sb, indentLevel, true, true, trim, appendToCRLF,
                            contextPath, projectId);
                    sb.append("</code>");
                    if (nodeI.hasNext()) {
                        sb.append(CRLF + appendToCRLF);
                        sb.append(CRLF + appendToCRLF);
                    }
                } else if ("br".equals(tag)) {
                    LOG.trace("br CRLF");
                    sb.append(CRLF + appendToCRLF);
                } else if ("table".equals(tag)) {
                    // Always start a table on a new line
                    startOnNewLine(sb, appendToCRLF);
                    processTable(n.getChildNodes(), sb, 0, false, false, contextPath, projectId, 0);
                    //if (nodeI.hasNext()) {
                    //  sb.append(CRLF);
                    //}
                } else if ("form".equals(tag)) {
                    // Always start a form on a new line
                    startOnNewLine(sb, appendToCRLF);
                    CustomForm form = processForm(n);
                    convertFormToWiki(form, sb);
                } else if ("a".equals(tag)) {
                    // Determine if the link is around text or around an image
                    if (n.getChildNodes().getLength() > 0 && hasImageNodes(n.getChildNodes())) {
                        // The link is around an image
                        LOG.debug("Processing link as an image");
                        // Get the img tag and pass to processImage...
                        ArrayList<Node> subNodes = new ArrayList<Node>();
                        getNodes(n.getChildNodes(), subNodes, new String[] { "img" }, false);
                        processImage(sb, subNodes.get(0), (Element) subNodes.get(0), appendToCRLF, contextPath,
                                projectId);
                    } else {
                        // The link is around text
                        processLink(sb, element, appendToCRLF, contextPath, projectId);
                    }
                } else if ("img".equals(tag)) {
                    processImage(sb, n, element, appendToCRLF, contextPath, projectId);
                } else if ("object".equals(tag)) {
                    startOnNewLine(sb, appendToCRLF);
                    processVideo(sb, n, element, appendToCRLF, contextPath);
                } else {
                    processChildNodes(getNodeList(n), sb, indentLevel, false, true, trim, appendToCRLF,
                            contextPath, projectId);
                }
            }
        }
    }
}

From source file:com.git.original.common.config.XMLFileConfigDocument.java

/**
 * XML??// w  ww. j av a2s.  c  o  m
 * 
 * @param elem
 *            XML
 * @return ?
 */
static ConfigNode convertElement(Element elem) {
    if (elem == null) {
        return null;
    }

    ConfigNode cn = new ConfigNode(elem.getTagName(), null);

    NamedNodeMap attrNodeMap = elem.getAttributes();
    if (attrNodeMap != null) {
        for (int i = 0; i < attrNodeMap.getLength(); i++) {
            Node node = attrNodeMap.item(i);
            cn.addAttribute(node.getNodeName(), node.getNodeValue());
        }
    }

    NodeList nodeList = elem.getChildNodes();
    if (nodeList != null && nodeList.getLength() > 0) {
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node node = nodeList.item(i);

            switch (node.getNodeType()) {
            case Node.ATTRIBUTE_NODE:
                cn.addAttribute(node.getNodeName(), node.getNodeValue());
                break;
            case Node.ELEMENT_NODE:
                ConfigNode child = convertElement((Element) node);
                cn.addChild(child);
                break;
            case Node.TEXT_NODE:
                cn.value = node.getNodeValue();
                break;
            default:
                continue;
            }
        }
    }

    return cn;
}

From source file:com.cellngine.util.FXMLValidator.java

private void checkNode(final Node node) throws InvalidFXMLException {
    final String nodeName = node.getNodeName();
    final short nodeType = node.getNodeType();

    if (nodeType == Node.ELEMENT_NODE) {
        if (!ALLOWED_ELEMENTS.isElementAllowed(nodeName)) {
            throw new InvalidFXMLException("Element type \"" + nodeName + "\" not allowed");
        }//www. j a v a2s  . c  o  m

        final NamedNodeMap nodeAttributes = node.getAttributes();
        for (int i = 0; i < nodeAttributes.getLength(); i++) {
            checkAttributeNode(nodeAttributes.item(i), nodeName);
        }
    } else if (nodeType == Node.TEXT_NODE || nodeType == Node.DOCUMENT_NODE) {
    } else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE && node.getNodeName().equals("import")) {
        if (!ALLOWED_IMPORTS.contains(node.getNodeValue())) {
            throw new InvalidFXMLException("Import \"" + node.getNodeValue() + "\" not allowed.");
        }
    } else if (nodeType != Node.COMMENT_NODE) {
        throw new InvalidFXMLException("Unrecognized node: type: \"" + nodeType + "\", name: \""
                + node.getNodeName() + "\", value: \"" + node.getNodeValue() + "\"");
    }

    final NodeList nodeChildren = node.getChildNodes();
    for (int i = 0; i < nodeChildren.getLength(); i++) {
        checkNode(nodeChildren.item(i));
    }
}

From source file:fr.gouv.finances.dgfip.xemelios.utils.TextWriter.java

private String getChildText(Node node) {
    StringBuffer sb = new StringBuffer();
    NodeList l = node.getChildNodes();
    int size = l.getLength();
    for (int i = 0; i < size; i++) {
        Node n = l.item(i);// ww w .  ja v  a 2 s  .co  m
        if (n.getNodeType() == Node.TEXT_NODE) {
            Text t = (Text) n;
            sb.append(t.getData());
        } else
            return null;
    }
    return sb.toString();
}

From source file:com.crawljax.plugins.adi.Report.java

/**
 * Taken from ErrorReport./*from w  w  w.j a va 2  s.c o  m*/
 */
private Document addMarker(String id, Document doc, String xpath) {
    try {

        String prefixMarker = "###BEGINMARKER" + id + "###";
        String suffixMarker = "###ENDMARKER###";

        NodeList nodeList = XPathHelper.evaluateXpathExpression(doc, xpath);

        if (nodeList.getLength() == 0 || nodeList.item(0) == null) {
            return doc;
        }

        Node element = nodeList.item(0);

        if (element.getNodeType() == Node.ELEMENT_NODE) {
            Node beginNode = doc.createTextNode(prefixMarker);
            Node endNode = doc.createTextNode(suffixMarker);

            element.getParentNode().insertBefore(beginNode, element);
            if (element.getNextSibling() == null) {
                element.getParentNode().appendChild(endNode);
            } else {
                element.getParentNode().insertBefore(endNode, element.getNextSibling());
            }
        } else if (element.getNodeType() == Node.TEXT_NODE && element.getTextContent() != null) {
            element.setTextContent(prefixMarker + element.getTextContent() + suffixMarker);
        } else if (element.getNodeType() == Node.ATTRIBUTE_NODE) {
            element.setNodeValue(prefixMarker + element.getTextContent() + suffixMarker);
        }

        return doc;
    } catch (Exception e) {
        return doc;
    }
}

From source file:de.fau.cs.osr.hddiff.perfsuite.RunFcDiff.java

private int evaluateDiff(Node diffNode, DiffNode parentA, DiffNode womB, int level) throws ComparisonException {
    if (level == 0) {
        if (!cmpTag(diffNode, NS_DIFF, "diff"))
            fcDiffConfusesMe();/* ww w  .ja  v a  2 s .  com*/

        NodeList children = diffNode.getChildNodes();
        if (children.getLength() != 1)
            fcDiffConfusesMe();

        return evaluateDiff((Element) children.item(0), parentA, womB.getFirstChild(), level + 1);
    } else {
        if (cmpTag(diffNode, NS_REF, "node")) {
            /**
             * The edit script is referencing a node from the original
             * document. The node's content is refined by the child nodes of
             * the ref:node element.
             */
            DiffNode refWomA = getWom(parentA, (Element) diffNode);
            processNodeRef(diffNode, refWomA, refWomA, womB, level);
            return 1;
        } else if (cmpTag(diffNode, NS_DIFF, "copy")) {
            /**
             * The edit script is copies a range of sibling elements from
             * the original document.
             */
            DiffNode[] refRunA = getWomRun(parentA, (Element) diffNode);
            return copyRun(womB, refRunA);
        } else if (diffNode.getNodeType() == Node.TEXT_NODE) {
            fcDiffConfusesMe();
            return 1;
        } else {
            DiffNode newWomA = womB.createSame(root1);
            processNewNode(diffNode, parentA, newWomA, womB, level);

            /*
            Wom3Node nativeNode = (Wom3Node) newWomA.getNativeNode();
            if (cmpStr(diffNode.getNamespaceURI(), nativeNode.getNamespaceURI()) &&
                  cmpStr(diffNode.getLocalName(), nativeNode.getLocalName()))
               map();
            */

            return 1;
        }
    }
}

From source file:org.dozer.eclipse.plugin.sourcepage.contentassist.DozerContentAssistProcessor.java

@Override
@SuppressWarnings("restriction")
protected ContentAssistRequest computeEndTagOpenProposals(int documentPosition, String matchString,
        ITextRegion completionRegion, IDOMNode nodeAtOffset, IDOMNode node) {
    Node firstChild = nodeAtOffset.getFirstChild();

    if ("class-a".equals(node.getNodeName()) || "class-b".equals(node.getNodeName())
            || "a-hint".equals(node.getNodeName()) || "b-hint".equals(node.getNodeName())
            || "a-deep-index-hint".equals(node.getNodeName())
            || "b-deep-index-hint".equals(node.getNodeName())) {
        if (firstChild == null)
            firstChild = nodeAtOffset.appendChild(node.getOwnerDocument().createTextNode(""));
        if (firstChild.getNodeType() == Node.TEXT_NODE) {
            return computeDozerClassContentProposals(documentPosition, firstChild.getNodeValue(),
                    completionRegion, (IDOMNode) firstChild, node);
        }//  ww w.  ja v a 2  s  . c o  m
    } else if ("a".equals(node.getNodeName()) || "b".equals(node.getNodeName())) {
        if (firstChild == null)
            firstChild = nodeAtOffset.appendChild(node.getOwnerDocument().createTextNode(""));
        if (firstChild.getNodeType() == Node.TEXT_NODE && ("field".equals(node.getParentNode().getNodeName())
                || "field-exclude".equals(node.getParentNode().getNodeName()))) {
            return computeDozerPropertyContentProposals(documentPosition, firstChild.getNodeValue(),
                    completionRegion, (IDOMNode) firstChild, node);
        }
    }

    return super.computeEndTagOpenProposals(documentPosition, matchString, completionRegion, nodeAtOffset,
            node);
}

From source file:com.duroty.lucene.parser.HtmlParser.java

/**
 * DOCUMENT ME!/*from  w  w  w. java 2  s.c  o m*/
 *
 * @param node DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 */
private String getBodyText(Node node) {
    NodeList nl = node.getChildNodes();
    StringBuffer buffer = new StringBuffer();

    for (int i = 0; i < nl.getLength(); i++) {
        Node child = nl.item(i);

        switch (child.getNodeType()) {
        case Node.ELEMENT_NODE:

            if (!child.getNodeName().toLowerCase().equals("script")) {
                buffer.append(getBodyText(child));
                buffer.append(" \n");
            }

            break;

        case Node.TEXT_NODE:
            buffer.append(((Text) child).getData());

            break;
        }
    }

    return buffer.toString();
}