List of usage examples for org.w3c.dom Node TEXT_NODE
short TEXT_NODE
To view the source code for org.w3c.dom Node TEXT_NODE.
Click Source Link
Text
node. From source file:com.twentyn.patentExtractor.PatentDocumentFeatures.java
private static List<String> appendTextContent(List<String> textList, Node n) { if (n.getNodeType() == Node.TEXT_NODE) { textList.add(n.getTextContent()); } else {//from w ww . jav a2 s .c o m NodeList childNodes = n.getChildNodes(); for (int j = 0; j < childNodes.getLength(); j++) { Node childNode = childNodes.item(j); textList = appendTextContent(textList, childNode); } } return textList; }
From source file:com.krawler.esp.utils.mime.MimeTypesReader.java
/** Read Element named ext. */ private void readExt(Element element, MimeType type) { NodeList nodes = element.getChildNodes(); for (int i = 0; i < nodes.getLength(); i++) { Node node = nodes.item(i); if (node.getNodeType() == Node.TEXT_NODE) { type.addExtension(((Text) node).getData()); }/*from w w w . jav a2 s.c om*/ } }
From source file:com.concursive.connect.web.modules.wiki.utils.HTMLToWikiUtils.java
public static void processChildNodes(ArrayList<Node> nodeList, StringBuffer sb, int indentLevel, boolean doText, boolean withFormatting, boolean trim, String appendToCRLF, String contextPath, int projectId) { Iterator nodeI = nodeList.iterator(); while (nodeI.hasNext()) { Node n = (Node) nodeI.next(); if (n != null) { if (n.getNodeType() == Node.TEXT_NODE || n.getNodeType() == Node.CDATA_SECTION_NODE) { if (doText) { String value = n.getNodeValue(); // Escaped characters value = StringUtils.replace(value, "*", "\\*"); value = StringUtils.replace(value, "#", "\\#"); value = StringUtils.replace(value, "=", "\\="); value = StringUtils.replace(value, "|", "\\|"); value = StringUtils.replace(value, "[", "\\{"); value = StringUtils.replace(value, "]", "\\}"); if (trim && !nodeI.hasNext()) { // If within a cell, make sure returns include the cell value // String value = (appendToCRLF.length() > 0 ? StringUtils.replace(n.getNodeValue(), CRLF, CRLF + appendToCRLF) : n.getNodeValue()); LOG.trace(" <text:trim>"); // Output the value, trim is required sb.append(StringUtils.fromHtmlValue(value.trim())); } else { // If within a cell, make sure returns include the cell value if (appendToCRLF.length() > 0 && (hasParentNodeType(n, "th") || hasParentNodeType(n, "td")) && value.trim().length() == 0) { // This is an empty value... check to see if the previous line has content or not before appending a new line } else { LOG.trace(" <text>"); sb.append(StringUtils.fromHtmlValue((appendToCRLF.length() > 0 ? StringUtils.replace(value, CRLF, CRLF + appendToCRLF) : value))); }/*from www. java 2s.c om*/ } } } else if (n.getNodeType() == Node.ELEMENT_NODE) { Element element = ((Element) n); String tag = element.getTagName(); LOG.trace(tag); if ("h1".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("= ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" =").append(CRLF + appendToCRLF); } else if ("h2".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" ==").append(CRLF + appendToCRLF); } else if ("h3".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("=== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" ===").append(CRLF + appendToCRLF); } else if ("h4".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("==== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" ====").append(CRLF + appendToCRLF); } else if ("h5".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("===== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" =====").append(CRLF + appendToCRLF); } else if ("h6".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("====== ").append(StringUtils.fromHtmlValue(element.getTextContent().trim())) .append(" ======").append(CRLF + appendToCRLF); } else if ("p".equals(tag) || "div".equals(tag)) { if (n.getChildNodes().getLength() > 0 && (hasTextContent(n) || hasImageNodes(n.getChildNodes()))) { // If this contains a Table, UL, OL, or object skip everything else to get there ArrayList<Node> subNodes = new ArrayList<Node>(); getNodes(n.getChildNodes(), subNodes, new String[] { "table", "ul", "ol", "object" }, false); if (subNodes.size() > 0) { LOG.trace(" nonTextNodes - yes"); processChildNodes(subNodes, sb, indentLevel, true, true, false, appendToCRLF, contextPath, projectId); } else { LOG.trace(" nonTextNodes - no"); startOnNewLine(sb, appendToCRLF); processChildNodes(getNodeList(n), sb, indentLevel, true, true, false, appendToCRLF, contextPath, projectId); } } } else if ("strong".equals(tag) || "b".equals(tag)) { if (n.getChildNodes().getLength() > 0) { if ("".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) { processChildNodes(getNodeList(n), sb, indentLevel, true, false, false, appendToCRLF, contextPath, projectId); } else { if (hasNonTextNodes(n.getChildNodes())) { processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, false, appendToCRLF, contextPath, projectId); } else { if (withFormatting) { sb.append("'''"); } processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, false, appendToCRLF, contextPath, projectId); if (withFormatting) { sb.append("'''"); } } } } } else if ("em".equals(tag) || "i".equals(tag)) { if (n.getChildNodes().getLength() > 0) { if ("".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) { processChildNodes(getNodeList(n), sb, indentLevel, true, false, trim, appendToCRLF, contextPath, projectId); } else { if (hasNonTextNodes(n.getChildNodes())) { processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim, appendToCRLF, contextPath, projectId); } else { if (withFormatting) { sb.append("''"); } processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim, appendToCRLF, contextPath, projectId); if (withFormatting) { sb.append("''"); } } } } } else if ("span".equals(tag)) { if (n.getChildNodes().getLength() > 0 && !"".equals(StringUtils.fromHtmlValue(n.getTextContent()).trim())) { if (element.hasAttribute("style")) { String value = element.getAttribute("style"); if (withFormatting) { if (value.contains("underline")) { sb.append("__"); } if (value.contains("line-through")) { sb.append("<s>"); } if (value.contains("bold")) { sb.append("'''"); } if (value.contains("italic")) { sb.append("''"); } } processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim, appendToCRLF, contextPath, projectId); if (withFormatting) { if (value.contains("italic")) { sb.append("''"); } if (value.contains("bold")) { sb.append("'''"); } if (value.contains("line-through")) { sb.append("</s>"); } if (value.contains("underline")) { sb.append("__"); } } } else { processChildNodes(getNodeList(n), sb, indentLevel, true, withFormatting, trim, appendToCRLF, contextPath, projectId); } } } else if ("ul".equals(tag) || "ol".equals(tag) || "dl".equals(tag)) { ++indentLevel; if (indentLevel == 1) { if (appendToCRLF.length() == 0) { startOnNewLine(sb, appendToCRLF); } else { // Something\n // ! // !* Item 1 // !* Item 2 if (!sb.toString().endsWith("|") && !sb.toString().endsWith(CRLF + appendToCRLF)) { LOG.trace("ul newline CRLF"); sb.append(CRLF + appendToCRLF); } } } if (indentLevel > 1 && !sb.toString().endsWith(CRLF + appendToCRLF)) { LOG.trace("ul indent CRLF"); sb.append(CRLF + appendToCRLF); } processChildNodes(getNodeList(n), sb, indentLevel, false, false, trim, appendToCRLF, contextPath, projectId); --indentLevel; } else if ("li".equals(tag)) { String parentTag = ((Element) element.getParentNode()).getTagName(); for (int counter = 0; counter < indentLevel; counter++) { if ("ul".equals(parentTag)) { sb.append("*"); } else if ("ol".equals(parentTag)) { sb.append("#"); } } sb.append(" "); processChildNodes(getNodeList(n), sb, indentLevel, true, false, true, appendToCRLF, contextPath, projectId); if (!sb.toString().endsWith(CRLF + appendToCRLF)) { LOG.trace("li CRLF"); sb.append(CRLF + appendToCRLF); } } else if ("dt".equals(tag) || "dd".equals(tag)) { processChildNodes(getNodeList(n), sb, indentLevel, true, false, trim, appendToCRLF, contextPath, projectId); if (!sb.toString().endsWith(CRLF + appendToCRLF)) { LOG.trace("dt CRLF"); sb.append(CRLF + appendToCRLF); } } else if ("pre".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("<pre>"); processChildNodes(getNodeList(n), sb, indentLevel, true, true, trim, appendToCRLF, contextPath, projectId); sb.append("</pre>"); if (nodeI.hasNext()) { sb.append(CRLF + appendToCRLF); sb.append(CRLF + appendToCRLF); } } else if ("code".equals(tag)) { startOnNewLine(sb, appendToCRLF); sb.append("<code>"); processChildNodes(getNodeList(n), sb, indentLevel, true, true, trim, appendToCRLF, contextPath, projectId); sb.append("</code>"); if (nodeI.hasNext()) { sb.append(CRLF + appendToCRLF); sb.append(CRLF + appendToCRLF); } } else if ("br".equals(tag)) { LOG.trace("br CRLF"); sb.append(CRLF + appendToCRLF); } else if ("table".equals(tag)) { // Always start a table on a new line startOnNewLine(sb, appendToCRLF); processTable(n.getChildNodes(), sb, 0, false, false, contextPath, projectId, 0); //if (nodeI.hasNext()) { // sb.append(CRLF); //} } else if ("form".equals(tag)) { // Always start a form on a new line startOnNewLine(sb, appendToCRLF); CustomForm form = processForm(n); convertFormToWiki(form, sb); } else if ("a".equals(tag)) { // Determine if the link is around text or around an image if (n.getChildNodes().getLength() > 0 && hasImageNodes(n.getChildNodes())) { // The link is around an image LOG.debug("Processing link as an image"); // Get the img tag and pass to processImage... ArrayList<Node> subNodes = new ArrayList<Node>(); getNodes(n.getChildNodes(), subNodes, new String[] { "img" }, false); processImage(sb, subNodes.get(0), (Element) subNodes.get(0), appendToCRLF, contextPath, projectId); } else { // The link is around text processLink(sb, element, appendToCRLF, contextPath, projectId); } } else if ("img".equals(tag)) { processImage(sb, n, element, appendToCRLF, contextPath, projectId); } else if ("object".equals(tag)) { startOnNewLine(sb, appendToCRLF); processVideo(sb, n, element, appendToCRLF, contextPath); } else { processChildNodes(getNodeList(n), sb, indentLevel, false, true, trim, appendToCRLF, contextPath, projectId); } } } } }
From source file:com.git.original.common.config.XMLFileConfigDocument.java
/** * XML??// w ww. j av a2s. c o m * * @param elem * XML * @return ? */ static ConfigNode convertElement(Element elem) { if (elem == null) { return null; } ConfigNode cn = new ConfigNode(elem.getTagName(), null); NamedNodeMap attrNodeMap = elem.getAttributes(); if (attrNodeMap != null) { for (int i = 0; i < attrNodeMap.getLength(); i++) { Node node = attrNodeMap.item(i); cn.addAttribute(node.getNodeName(), node.getNodeValue()); } } NodeList nodeList = elem.getChildNodes(); if (nodeList != null && nodeList.getLength() > 0) { for (int i = 0; i < nodeList.getLength(); i++) { Node node = nodeList.item(i); switch (node.getNodeType()) { case Node.ATTRIBUTE_NODE: cn.addAttribute(node.getNodeName(), node.getNodeValue()); break; case Node.ELEMENT_NODE: ConfigNode child = convertElement((Element) node); cn.addChild(child); break; case Node.TEXT_NODE: cn.value = node.getNodeValue(); break; default: continue; } } } return cn; }
From source file:com.cellngine.util.FXMLValidator.java
private void checkNode(final Node node) throws InvalidFXMLException { final String nodeName = node.getNodeName(); final short nodeType = node.getNodeType(); if (nodeType == Node.ELEMENT_NODE) { if (!ALLOWED_ELEMENTS.isElementAllowed(nodeName)) { throw new InvalidFXMLException("Element type \"" + nodeName + "\" not allowed"); }//www. j a v a2s . c o m final NamedNodeMap nodeAttributes = node.getAttributes(); for (int i = 0; i < nodeAttributes.getLength(); i++) { checkAttributeNode(nodeAttributes.item(i), nodeName); } } else if (nodeType == Node.TEXT_NODE || nodeType == Node.DOCUMENT_NODE) { } else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE && node.getNodeName().equals("import")) { if (!ALLOWED_IMPORTS.contains(node.getNodeValue())) { throw new InvalidFXMLException("Import \"" + node.getNodeValue() + "\" not allowed."); } } else if (nodeType != Node.COMMENT_NODE) { throw new InvalidFXMLException("Unrecognized node: type: \"" + nodeType + "\", name: \"" + node.getNodeName() + "\", value: \"" + node.getNodeValue() + "\""); } final NodeList nodeChildren = node.getChildNodes(); for (int i = 0; i < nodeChildren.getLength(); i++) { checkNode(nodeChildren.item(i)); } }
From source file:fr.gouv.finances.dgfip.xemelios.utils.TextWriter.java
private String getChildText(Node node) { StringBuffer sb = new StringBuffer(); NodeList l = node.getChildNodes(); int size = l.getLength(); for (int i = 0; i < size; i++) { Node n = l.item(i);// ww w . ja v a 2 s .co m if (n.getNodeType() == Node.TEXT_NODE) { Text t = (Text) n; sb.append(t.getData()); } else return null; } return sb.toString(); }
From source file:com.crawljax.plugins.adi.Report.java
/** * Taken from ErrorReport./*from w w w.j a va 2 s.c o m*/ */ private Document addMarker(String id, Document doc, String xpath) { try { String prefixMarker = "###BEGINMARKER" + id + "###"; String suffixMarker = "###ENDMARKER###"; NodeList nodeList = XPathHelper.evaluateXpathExpression(doc, xpath); if (nodeList.getLength() == 0 || nodeList.item(0) == null) { return doc; } Node element = nodeList.item(0); if (element.getNodeType() == Node.ELEMENT_NODE) { Node beginNode = doc.createTextNode(prefixMarker); Node endNode = doc.createTextNode(suffixMarker); element.getParentNode().insertBefore(beginNode, element); if (element.getNextSibling() == null) { element.getParentNode().appendChild(endNode); } else { element.getParentNode().insertBefore(endNode, element.getNextSibling()); } } else if (element.getNodeType() == Node.TEXT_NODE && element.getTextContent() != null) { element.setTextContent(prefixMarker + element.getTextContent() + suffixMarker); } else if (element.getNodeType() == Node.ATTRIBUTE_NODE) { element.setNodeValue(prefixMarker + element.getTextContent() + suffixMarker); } return doc; } catch (Exception e) { return doc; } }
From source file:de.fau.cs.osr.hddiff.perfsuite.RunFcDiff.java
private int evaluateDiff(Node diffNode, DiffNode parentA, DiffNode womB, int level) throws ComparisonException { if (level == 0) { if (!cmpTag(diffNode, NS_DIFF, "diff")) fcDiffConfusesMe();/* ww w .ja v a 2 s . com*/ NodeList children = diffNode.getChildNodes(); if (children.getLength() != 1) fcDiffConfusesMe(); return evaluateDiff((Element) children.item(0), parentA, womB.getFirstChild(), level + 1); } else { if (cmpTag(diffNode, NS_REF, "node")) { /** * The edit script is referencing a node from the original * document. The node's content is refined by the child nodes of * the ref:node element. */ DiffNode refWomA = getWom(parentA, (Element) diffNode); processNodeRef(diffNode, refWomA, refWomA, womB, level); return 1; } else if (cmpTag(diffNode, NS_DIFF, "copy")) { /** * The edit script is copies a range of sibling elements from * the original document. */ DiffNode[] refRunA = getWomRun(parentA, (Element) diffNode); return copyRun(womB, refRunA); } else if (diffNode.getNodeType() == Node.TEXT_NODE) { fcDiffConfusesMe(); return 1; } else { DiffNode newWomA = womB.createSame(root1); processNewNode(diffNode, parentA, newWomA, womB, level); /* Wom3Node nativeNode = (Wom3Node) newWomA.getNativeNode(); if (cmpStr(diffNode.getNamespaceURI(), nativeNode.getNamespaceURI()) && cmpStr(diffNode.getLocalName(), nativeNode.getLocalName())) map(); */ return 1; } } }
From source file:org.dozer.eclipse.plugin.sourcepage.contentassist.DozerContentAssistProcessor.java
@Override @SuppressWarnings("restriction") protected ContentAssistRequest computeEndTagOpenProposals(int documentPosition, String matchString, ITextRegion completionRegion, IDOMNode nodeAtOffset, IDOMNode node) { Node firstChild = nodeAtOffset.getFirstChild(); if ("class-a".equals(node.getNodeName()) || "class-b".equals(node.getNodeName()) || "a-hint".equals(node.getNodeName()) || "b-hint".equals(node.getNodeName()) || "a-deep-index-hint".equals(node.getNodeName()) || "b-deep-index-hint".equals(node.getNodeName())) { if (firstChild == null) firstChild = nodeAtOffset.appendChild(node.getOwnerDocument().createTextNode("")); if (firstChild.getNodeType() == Node.TEXT_NODE) { return computeDozerClassContentProposals(documentPosition, firstChild.getNodeValue(), completionRegion, (IDOMNode) firstChild, node); }// ww w. ja v a 2 s . c o m } else if ("a".equals(node.getNodeName()) || "b".equals(node.getNodeName())) { if (firstChild == null) firstChild = nodeAtOffset.appendChild(node.getOwnerDocument().createTextNode("")); if (firstChild.getNodeType() == Node.TEXT_NODE && ("field".equals(node.getParentNode().getNodeName()) || "field-exclude".equals(node.getParentNode().getNodeName()))) { return computeDozerPropertyContentProposals(documentPosition, firstChild.getNodeValue(), completionRegion, (IDOMNode) firstChild, node); } } return super.computeEndTagOpenProposals(documentPosition, matchString, completionRegion, nodeAtOffset, node); }
From source file:com.duroty.lucene.parser.HtmlParser.java
/** * DOCUMENT ME!/*from w w w. java 2 s.c o m*/ * * @param node DOCUMENT ME! * * @return DOCUMENT ME! */ private String getBodyText(Node node) { NodeList nl = node.getChildNodes(); StringBuffer buffer = new StringBuffer(); for (int i = 0; i < nl.getLength(); i++) { Node child = nl.item(i); switch (child.getNodeType()) { case Node.ELEMENT_NODE: if (!child.getNodeName().toLowerCase().equals("script")) { buffer.append(getBodyText(child)); buffer.append(" \n"); } break; case Node.TEXT_NODE: buffer.append(((Text) child).getData()); break; } } return buffer.toString(); }