Java tutorial
import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class MainClass { public static void main(String[] args) throws IOException, ParserConfigurationException, org.xml.sax.SAXException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setIgnoringComments(true); factory.setCoalescing(true); // Convert CDATA to Text nodes factory.setNamespaceAware(false); // No namespaces: this is default factory.setValidating(false); // Don't validate DTD: also default DocumentBuilder parser = factory.newDocumentBuilder(); Document document = parser.parse(new File(args[0])); NodeList sections = document.getElementsByTagName("sect1"); int numSections = sections.getLength(); for (int i = 0; i < numSections; i++) { Element section = (Element) sections.item(i); // A <sect1> Node title = section.getFirstChild(); while (title != null && title.getNodeType() != Node.ELEMENT_NODE) title = title.getNextSibling(); if (title != null) System.out.println(title.getFirstChild().getNodeValue()); } } }