Java tutorial
//package com.java2s; /* * ==================== * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2008-2009 Sun Microsystems, Inc. All rights reserved. * * The contents of this file are subject to the terms of the Common Development * and Distribution License("CDDL") (the "License"). You may not use this file * except in compliance with the License. * * You can obtain a copy of the License at * http://opensource.org/licenses/cddl1.php * See the License for the specific language governing permissions and limitations * under the License. * * When distributing the Covered Code, include this CDDL Header Notice in each file * and include the License file at http://opensource.org/licenses/cddl1.php. * If applicable, add the following below this CDDL Header, with the fields * enclosed by brackets [] replaced by your own identifying information: * "Portions Copyrighted [year] [name of copyright owner]" * ==================== */ import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.Text; public class Main { /** * Return the content of the given element. * <p/> * We will descend to an arbitrary depth looking for the first text node. * <p/> * Note that the parser may break what was originally a single string of * pcdata into multiple adjacent text nodes. Xerces appears to do this when * it encounters a '$' in the text, not sure if there is specified behavior, * or if its parser specific. * <p/> * Here, we will congeal adjacent text nodes. * <p/> * We will NOT ignore text nodes that have only whitespace. */ public static String getContent(Element e) { String content = null; if (e != null) { // find the first inner text node, Text t = findText(e, false); if (t != null) { // we have at least some text StringBuilder b = new StringBuilder(); while (t != null) { b.append(t.getData()); Node n = t.getNextSibling(); t = null; if (n != null && ((n.getNodeType() == Node.TEXT_NODE) || (n.getNodeType() == Node.CDATA_SECTION_NODE))) { t = (Text) n; } } content = b.toString(); } } return content; } /** * Locate the first text node at any level below the given node. If the * ignoreEmpty flag is true, we will ignore text nodes that contain only * whitespace characteres. * <p/> * Note that if you're trying to extract element content, you probably don't * want this since parser's can break up pcdata into multiple adjacent text * nodes. See getContent() for a more useful method. */ private static Text findText(Node node, boolean ignoreEmpty) { Text found = null; if (node != null) { if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { Text t = (Text) node; if (!ignoreEmpty) { found = t; } else { String s = t.getData().trim(); if (s.length() > 0) { found = t; } } } if (found == null) { for (Node child = node.getFirstChild(); child != null && found == null; child = child.getNextSibling()) { found = findText(child, ignoreEmpty); } } } return found; } }