Register a SAX2 ContentHandler and receive callbacks to print information about the document. : SAX « XML « Java

Register a SAX2 ContentHandler and receive callbacks to print information about the document.

 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.


import org.xml.sax.Attributes;
import org.xml.sax.Parser;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.ParserAdapter;
import org.xml.sax.helpers.ParserFactory;
import org.xml.sax.helpers.XMLReaderFactory;

 * A sample SAX2 counter. This sample program illustrates how to register a SAX2
 * ContentHandler and receive the callbacks in order to print information about
 * the document. The output of this program shows the time and count of
 * elements, attributes, ignorable whitespaces, and characters appearing in the
 * document.
 * <p>
 * This class is useful as a "poor-man's" performance tester to compare the
 * speed and accuracy of various SAX parsers. However, it is important to note
 * that the first parse time of a parser will include both VM class load time
 * and parser initialization that would not be present in subsequent parses with
 * the same file.
 * <p>
 * <strong>Note:</strong> The results produced by this program should never be
 * accepted as true performance measurements.
 * @author Andy Clark, IBM
 * @version $Id: 447686 2006-09-19 02:38:34Z mrglavas $
public class Counter extends DefaultHandler {

  // Constants

  // feature ids

  /** Namespaces feature id ( */
  protected static final String NAMESPACES_FEATURE_ID = "";

   * Namespace prefixes feature id
   * (
  protected static final String NAMESPACE_PREFIXES_FEATURE_ID = "";

  /** Validation feature id ( */
  protected static final String VALIDATION_FEATURE_ID = "";

   * Schema validation feature id
   * (
  protected static final String SCHEMA_VALIDATION_FEATURE_ID = "";

   * Schema full checking feature id
   * (
  protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID = "";

   * Honour all schema locations feature id
   * (
  protected static final String HONOUR_ALL_SCHEMA_LOCATIONS_ID = "";

   * Validate schema annotations feature id
   * (
  protected static final String VALIDATE_ANNOTATIONS_ID = "";

   * Dynamic validation feature id
   * (
  protected static final String DYNAMIC_VALIDATION_FEATURE_ID = "";

  /** XInclude feature id ( */
  protected static final String XINCLUDE_FEATURE_ID = "";

   * XInclude fixup base URIs feature id
   * (
  protected static final String XINCLUDE_FIXUP_BASE_URIS_FEATURE_ID = "";

   * XInclude fixup language feature id
   * (
  protected static final String XINCLUDE_FIXUP_LANGUAGE_FEATURE_ID = "";

  // default settings

  /** Default parser name. */
  protected static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";

  /** Default repetition (1). */
  protected static final int DEFAULT_REPETITION = 1;

  /** Default namespaces support (true). */
  protected static final boolean DEFAULT_NAMESPACES = true;

  /** Default namespace prefixes (false). */
  protected static final boolean DEFAULT_NAMESPACE_PREFIXES = false;

  /** Default validation support (false). */
  protected static final boolean DEFAULT_VALIDATION = false;

  /** Default Schema validation support (false). */
  protected static final boolean DEFAULT_SCHEMA_VALIDATION = false;

  /** Default Schema full checking support (false). */
  protected static final boolean DEFAULT_SCHEMA_FULL_CHECKING = false;

  /** Default honour all schema locations (false). */
  protected static final boolean DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS = false;

  /** Default validate schema annotations (false). */
  protected static final boolean DEFAULT_VALIDATE_ANNOTATIONS = false;

  /** Default dynamic validation support (false). */
  protected static final boolean DEFAULT_DYNAMIC_VALIDATION = false;

  /** Default XInclude processing support (false). */
  protected static final boolean DEFAULT_XINCLUDE = false;

  /** Default XInclude fixup base URIs support (true). */
  protected static final boolean DEFAULT_XINCLUDE_FIXUP_BASE_URIS = true;

  /** Default XInclude fixup language support (true). */
  protected static final boolean DEFAULT_XINCLUDE_FIXUP_LANGUAGE = true;

  /** Default memory usage report (false). */
  protected static final boolean DEFAULT_MEMORY_USAGE = false;

  /** Default "tagginess" report (false). */
  protected static final boolean DEFAULT_TAGGINESS = false;

  // Data

  /** Number of elements. */
  protected long fElements;

  /** Number of attributes. */
  protected long fAttributes;

  /** Number of characters. */
  protected long fCharacters;

  /** Number of ignorable whitespace characters. */
  protected long fIgnorableWhitespace;

  /** Number of characters of tags. */
  protected long fTagCharacters;

  /** Number of other content characters for the "tagginess" calculation. */
  protected long fOtherCharacters;

  // Constructors

  /** Default constructor. */
  public Counter() {
  } // <init>()

  // Public methods

  /** Prints the results. */
  public void printResults(PrintWriter out, String uri, long time, long memory, boolean tagginess,
      int repetition) {

    // filename.xml: 631 ms (4 elems, 0 attrs, 78 spaces, 0 chars)
    out.print(": ");
    if (repetition == 1) {
    } else {
      out.print(time / repetition);
    out.print(" ms");
    if (memory != Long.MIN_VALUE) {
      out.print(", ");
      out.print(" bytes");
    out.print(" (");
    out.print(" elems, ");
    out.print(" attrs, ");
    out.print(" spaces, ");
    out.print(" chars)");
    if (tagginess) {
      out.print(' ');
      long totalCharacters = fTagCharacters + fOtherCharacters + fCharacters + fIgnorableWhitespace;
      long tagValue = fTagCharacters * 100 / totalCharacters;
      out.print("% tagginess");

  } // printResults(PrintWriter,String,long)

  // ContentHandler methods

  /** Start document. */
  public void startDocument() throws SAXException {

    fElements = 0;
    fAttributes = 0;
    fCharacters = 0;
    fIgnorableWhitespace = 0;
    fTagCharacters = 0;

  } // startDocument()

  /** Start element. */
  public void startElement(String uri, String local, String raw, Attributes attrs)
      throws SAXException {

    fTagCharacters++; // open angle bracket
    fTagCharacters += raw.length();
    if (attrs != null) {
      int attrCount = attrs.getLength();
      fAttributes += attrCount;
      for (int i = 0; i < attrCount; i++) {
        fTagCharacters++; // space
        fTagCharacters += attrs.getQName(i).length();
        fTagCharacters++; // '='
        fTagCharacters++; // open quote
        fOtherCharacters += attrs.getValue(i).length();
        fTagCharacters++; // close quote
    fTagCharacters++; // close angle bracket

  } // startElement(String,String,StringAttributes)

  /** Characters. */
  public void characters(char ch[], int start, int length) throws SAXException {

    fCharacters += length;

  } // characters(char[],int,int);

  /** Ignorable whitespace. */
  public void ignorableWhitespace(char ch[], int start, int length) throws SAXException {

    fIgnorableWhitespace += length;

  } // ignorableWhitespace(char[],int,int);

  /** Processing instruction. */
  public void processingInstruction(String target, String data) throws SAXException {
    fTagCharacters += 2; // "<?"
    fTagCharacters += target.length();
    if (data != null && data.length() > 0) {
      fTagCharacters++; // space
      fOtherCharacters += data.length();
    fTagCharacters += 2; // "?>"
  } // processingInstruction(String,String)

  // ErrorHandler methods

  /** Warning. */
  public void warning(SAXParseException ex) throws SAXException {
    printError("Warning", ex);
  } // warning(SAXParseException)

  /** Error. */
  public void error(SAXParseException ex) throws SAXException {
    printError("Error", ex);
  } // error(SAXParseException)

  /** Fatal error. */
  public void fatalError(SAXParseException ex) throws SAXException {
    printError("Fatal Error", ex);
    // throw ex;
  } // fatalError(SAXParseException)

  // Protected methods

  /** Prints the error message. */
  protected void printError(String type, SAXParseException ex) {

    System.err.print("] ");
    if (ex == null) {
    String systemId = ex.getSystemId();
    if (systemId != null) {
      int index = systemId.lastIndexOf('/');
      if (index != -1)
        systemId = systemId.substring(index + 1);
    System.err.print(": ");

  } // printError(String,SAXParseException)

  // MAIN

  /** Main program entry point. */
  public static void main(String argv[]) {

    // is there anything to do?
    if (argv.length == 0) {

    // variables
    Counter counter = new Counter();
    PrintWriter out = new PrintWriter(System.out);
    XMLReader parser = null;
    int repetition = DEFAULT_REPETITION;
    boolean namespaces = DEFAULT_NAMESPACES;
    boolean namespacePrefixes = DEFAULT_NAMESPACE_PREFIXES;
    boolean validation = DEFAULT_VALIDATION;
    boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION;
    boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING;
    boolean honourAllSchemaLocations = DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS;
    boolean validateAnnotations = DEFAULT_VALIDATE_ANNOTATIONS;
    boolean dynamicValidation = DEFAULT_DYNAMIC_VALIDATION;
    boolean xincludeProcessing = DEFAULT_XINCLUDE;
    boolean xincludeFixupBaseURIs = DEFAULT_XINCLUDE_FIXUP_BASE_URIS;
    boolean xincludeFixupLanguage = DEFAULT_XINCLUDE_FIXUP_LANGUAGE;
    boolean memoryUsage = DEFAULT_MEMORY_USAGE;
    boolean tagginess = DEFAULT_TAGGINESS;

    // process arguments
    for (int i = 0; i < argv.length; i++) {
      String arg = argv[i];
      if (arg.startsWith("-")) {
        String option = arg.substring(1);
        if (option.equals("p")) {
          // get parser name
          if (++i == argv.length) {
            System.err.println("error: Missing argument to -p option.");
          String parserName = argv[i];

          // create parser
          try {
            parser = XMLReaderFactory.createXMLReader(parserName);
          } catch (Exception e) {
            try {
              Parser sax1Parser = ParserFactory.makeParser(parserName);
              parser = new ParserAdapter(sax1Parser);
              System.err.println("warning: Features and properties not supported on SAX1 parsers.");
            } catch (Exception ex) {
              parser = null;
              System.err.println("error: Unable to instantiate parser (" + parserName + ")");
        if (option.equals("x")) {
          if (++i == argv.length) {
            System.err.println("error: Missing argument to -x option.");
          String number = argv[i];
          try {
            int value = Integer.parseInt(number);
            if (value < 1) {
              System.err.println("error: Repetition must be at least 1.");
            repetition = value;
          } catch (NumberFormatException e) {
            System.err.println("error: invalid number (" + number + ").");
        if (option.equalsIgnoreCase("n")) {
          namespaces = option.equals("n");
        if (option.equalsIgnoreCase("np")) {
          namespacePrefixes = option.equals("np");
        if (option.equalsIgnoreCase("v")) {
          validation = option.equals("v");
        if (option.equalsIgnoreCase("s")) {
          schemaValidation = option.equals("s");
        if (option.equalsIgnoreCase("f")) {
          schemaFullChecking = option.equals("f");
        if (option.equalsIgnoreCase("hs")) {
          honourAllSchemaLocations = option.equals("hs");
        if (option.equalsIgnoreCase("va")) {
          validateAnnotations = option.equals("va");
        if (option.equalsIgnoreCase("dv")) {
          dynamicValidation = option.equals("dv");
        if (option.equalsIgnoreCase("xi")) {
          xincludeProcessing = option.equals("xi");
        if (option.equalsIgnoreCase("xb")) {
          xincludeFixupBaseURIs = option.equals("xb");
        if (option.equalsIgnoreCase("xl")) {
          xincludeFixupLanguage = option.equals("xl");
        if (option.equalsIgnoreCase("m")) {
          memoryUsage = option.equals("m");
        if (option.equalsIgnoreCase("t")) {
          tagginess = option.equals("t");
        if (option.equals("-rem")) {
          if (++i == argv.length) {
            System.err.println("error: Missing argument to -# option.");
          System.out.print("# ");
        if (option.equals("h")) {
        System.err.println("error: unknown option (" + option + ").");

      // use default parser?
      if (parser == null) {

        // create parser
        try {
          parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME);
        } catch (Exception e) {
          System.err.println("error: Unable to instantiate parser (" + DEFAULT_PARSER_NAME + ")");

      // set parser features
      try {
        parser.setFeature(NAMESPACES_FEATURE_ID, namespaces);
      } catch (SAXException e) {
        System.err.println("warning: Parser does not support feature (" + NAMESPACES_FEATURE_ID
            + ")");
      try {
        parser.setFeature(NAMESPACE_PREFIXES_FEATURE_ID, namespacePrefixes);
      } catch (SAXException e) {
        System.err.println("warning: Parser does not support feature ("
      try {
        parser.setFeature(VALIDATION_FEATURE_ID, validation);
      } catch (SAXException e) {
        System.err.println("warning: Parser does not support feature (" + VALIDATION_FEATURE_ID
            + ")");
      try {
        parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, schemaValidation);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("
            + SCHEMA_VALIDATION_FEATURE_ID + ")");

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("
            + SCHEMA_VALIDATION_FEATURE_ID + ")");
      try {
        parser.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("
      try {
        parser.setFeature(HONOUR_ALL_SCHEMA_LOCATIONS_ID, honourAllSchemaLocations);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("
            + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("
            + HONOUR_ALL_SCHEMA_LOCATIONS_ID + ")");
      try {
        parser.setFeature(VALIDATE_ANNOTATIONS_ID, validateAnnotations);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature (" + VALIDATE_ANNOTATIONS_ID
            + ")");

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature (" + VALIDATE_ANNOTATIONS_ID
            + ")");
      try {
        parser.setFeature(DYNAMIC_VALIDATION_FEATURE_ID, dynamicValidation);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("
      try {
        parser.setFeature(XINCLUDE_FEATURE_ID, xincludeProcessing);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature (" + XINCLUDE_FEATURE_ID
            + ")");

      } catch (SAXNotSupportedException e) {
            .println("warning: Parser does not support feature (" + XINCLUDE_FEATURE_ID + ")");
      try {
        parser.setFeature(XINCLUDE_FIXUP_BASE_URIS_FEATURE_ID, xincludeFixupBaseURIs);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("
      try {
        parser.setFeature(XINCLUDE_FIXUP_LANGUAGE_FEATURE_ID, xincludeFixupLanguage);
      } catch (SAXNotRecognizedException e) {
        System.err.println("warning: Parser does not recognize feature ("

      } catch (SAXNotSupportedException e) {
        System.err.println("warning: Parser does not support feature ("

      // parse file
      try {
        long timeBefore = System.currentTimeMillis();
        long memoryBefore = Runtime.getRuntime().freeMemory();
        for (int j = 0; j < repetition; j++) {
        long memoryAfter = Runtime.getRuntime().freeMemory();
        long timeAfter = System.currentTimeMillis();

        long time = timeAfter - timeBefore;
        long memory = memoryUsage ? memoryBefore - memoryAfter : Long.MIN_VALUE;
        counter.printResults(out, arg, time, memory, tagginess, repetition);
      } catch (SAXParseException e) {
        // ignore
      } catch (Exception e) {
        System.err.println("error: Parse error occurred - " + e.getMessage());
        Exception se = e;
        if (e instanceof SAXException) {
          se = ((SAXException) e).getException();
        if (se != null)


  } // main(String[])

  // Private static methods

  /** Prints the usage. */
  private static void printUsage() {

    System.err.println("usage: java sax.Counter (options) uri ...");

    System.err.println("  -p name     Select parser by name.");
    System.err.println("  -x number   Select number of repetitions.");
    System.err.println("  -n  | -N    Turn on/off namespace processing.");
    System.err.println("  -np | -NP   Turn on/off namespace prefixes.");
    System.err.println("              NOTE: Requires use of -n.");
    System.err.println("  -v  | -V    Turn on/off validation.");
    System.err.println("  -s  | -S    Turn on/off Schema validation support.");
    System.err.println("              NOTE: Not supported by all parsers.");
    System.err.println("  -f  | -F    Turn on/off Schema full checking.");
    System.err.println("              NOTE: Requires use of -s and not supported by all parsers.");
    System.err.println("  -hs | -HS   Turn on/off honouring of all schema locations.");
    System.err.println("              NOTE: Requires use of -s and not supported by all parsers.");
    System.err.println("  -va | -VA   Turn on/off validation of schema annotations.");
    System.err.println("              NOTE: Requires use of -s and not supported by all parsers.");
    System.err.println("  -dv | -DV   Turn on/off dynamic validation.");
    System.err.println("              NOTE: Not supported by all parsers.");
    System.err.println("  -xi | -XI   Turn on/off XInclude processing.");
    System.err.println("              NOTE: Not supported by all parsers.");
    System.err.println("  -xb | -XB   Turn on/off base URI fixup during XInclude processing.");
    System.err.println("              NOTE: Requires use of -xi and not supported by all parsers.");
    System.err.println("  -xl | -XL   Turn on/off language fixup during XInclude processing.");
    System.err.println("              NOTE: Requires use of -xi and not supported by all parsers.");
    System.err.println("  -m  | -M    Turn on/off memory usage report");
    System.err.println("  -t  | -T    Turn on/off \"tagginess\" report.");
    System.err.println("  --rem text  Output user defined comment before next parse.");
    System.err.println("  -h          This help screen.");

    System.err.println("  Parser:     " + DEFAULT_PARSER_NAME);
    System.err.println("  Repetition: " + DEFAULT_REPETITION);
    System.err.print("  Namespaces: ");
    System.err.println(DEFAULT_NAMESPACES ? "on" : "off");
    System.err.print("  Prefixes:   ");
    System.err.println(DEFAULT_NAMESPACE_PREFIXES ? "on" : "off");
    System.err.print("  Validation: ");
    System.err.println(DEFAULT_VALIDATION ? "on" : "off");
    System.err.print("  Schema:     ");
    System.err.println(DEFAULT_SCHEMA_VALIDATION ? "on" : "off");
    System.err.print("  Schema full checking:            ");
    System.err.println(DEFAULT_SCHEMA_FULL_CHECKING ? "on" : "off");
    System.err.print("  Honour all schema locations:     ");
    System.err.println(DEFAULT_HONOUR_ALL_SCHEMA_LOCATIONS ? "on" : "off");
    System.err.print("  Validate annotations:            ");
    System.err.println(DEFAULT_VALIDATE_ANNOTATIONS ? "on" : "off");
    System.err.print("  Dynamic:    ");
    System.err.println(DEFAULT_DYNAMIC_VALIDATION ? "on" : "off");
    System.err.print("  XInclude:   ");
    System.err.println(DEFAULT_XINCLUDE ? "on" : "off");
    System.err.print("  XInclude base URI fixup:  ");
    System.err.println(DEFAULT_XINCLUDE_FIXUP_BASE_URIS ? "on" : "off");
    System.err.print("  XInclude language fixup:  ");
    System.err.println(DEFAULT_XINCLUDE_FIXUP_LANGUAGE ? "on" : "off");
    System.err.print("  Memory:     ");
    System.err.println(DEFAULT_MEMORY_USAGE ? "on" : "off");
    System.err.print("  Tagginess:  ");
    System.err.println(DEFAULT_TAGGINESS ? "on" : "off");

        .println("  The speed and memory results from this program should NOT be used as the");
        .println("  basis of parser performance comparison! Real analytical methods should be");
        .println("  used. For better results, perform multiple document parses within the same");
        .println("  virtual machine to remove class loading from parse time and memory usage.");
        .println("  The \"tagginess\" measurement gives a rough estimate of the percentage of");
    System.err.println("  markup versus content in the XML document. The percent tagginess of a ");
    System.err.println("  document is equal to the minimum amount of tag characters required for ");
    System.err.println("  elements, attributes, and processing instructions divided by the total");
        .println("  amount of characters (characters, ignorable whitespace, and tag characters)");
    System.err.println("  in the document.");
    System.err.println("  Not all features are supported by different parsers.");

  } // printUsage()

} // class Counter


Related examples in the same category

1.Parsing XML Files with SAX
2.SAX Demo
3.Duplicates XML Files
4.A Program to Display the Input from a SAX ParserA Program to Display the Input from a SAX Parser
5.SAX Checker
6.A Content Handler to Output a Sorted List as HTML
7.A Content Handler to Output a Sorted List
8.Simple lister - extract name and children tags
9.SAX Tree Validator
10.SAX Tree ViewerSAX Tree Viewer
11.Accessing character data (CDATA) of XML element
12.Accessing features of the SAX parser implementation
13.Configuring SAX parser factory to produce alternate parser
14.Extracting attribute values from XML elements
15.Handling SAX errors during parsing
16.Using XML locator to indicate current parser position
17.Filter to write an XML document from a SAX event stream
18.XML utility methods that only depend on the JDK
19.Utility class for xml/sax handling
20.Create Xml Reader
21.Sax to DOM converter
22.Produce a SAX stream from a DOM Document
23.SAX2 writer: register a SAX2 ContentHandler and receive the callbacks in order to print a document that is parsed.
24.Provides a complete trace of SAX2 events for files parsed.
25.The program prints all hyperlinks links of an XHTML web page