Java tutorial
//package com.java2s; /* // $Id: //open/mondrian/src/main/mondrian/xom/XMLUtil.java#1 $ // This software is subject to the terms of the Common Public License // Agreement, available at the following URL: // http://www.opensource.org/licenses/cpl.html. // (C) Copyright 2001-2002 Kana Software, Inc. and others. // All Rights Reserved. // You must accept the terms of that agreement to use this software. // // jhyde, 3 October, 2001 */ import java.io.Reader; import java.io.IOException; public class Main { /** * Retrieve the name of the first tag in the XML document specified by the * given Reader, without parsing the full file/string. This function is * useful to identify the DocType of an XML document before parsing, * possibly to send the document off to different pieces of code. * For performance reasons, the function attempts to read as little of * the file or string as possible before making its decision about the * first tag. Leading comments are ignored. * @param xml a Reader containing an XML document. * @return the first tag name, as a String, or null if no first tag * can be found. */ public static String getFirstTagName(Reader xml) { final int OUTSIDE = 0; // constant: identify outside state final int BRACKET = 1; // constant: bracket, contents unknown final int COMMENT = 2; // constant: identify a comment section final int IGNORE = 3; // constant: identify an ignored section final int TAG = 4; // constant: identify a tag section int state = OUTSIDE; String commentMatch = null; StringBuffer tagBuffer = null; boolean sawBang = false; try { int c = xml.read(); for (;;) { // No tag found if we hit EOF first. if (c == -1) return null; switch (state) { case OUTSIDE: // Start of any sort of tag if (c == '<') { state = BRACKET; commentMatch = "!--"; sawBang = false; c = xml.read(); } // Other non-whitespace characters outside of any tag else if (!Character.isWhitespace((char) c)) return null; // Whitespace characters are ignored else c = xml.read(); break; case BRACKET: // Check for the start of a comment. if (commentMatch != null) { if (c == commentMatch.charAt(0)) { // This match indicates a comment if (commentMatch.length() == 1) { c = xml.read(); commentMatch = "-->"; state = COMMENT; } else { // Remove the first character from commentMatch, // then process the character as usual. commentMatch = commentMatch.substring(1, commentMatch.length()); } } else // No longer eligible for comment. commentMatch = null; } // Hit whitespace; ignore the character. if (Character.isWhitespace((char) c)) { c = xml.read(); break; } switch (c) { case '?': c = xml.read(); state = IGNORE; break; case '!': // Enter an ignored section unless eligible for comment. c = xml.read(); sawBang = true; if (commentMatch == null) state = IGNORE; break; case '-': // Enter an ignored section unless eligible for comment. c = xml.read(); if (commentMatch == null) state = IGNORE; break; case '>': // Return to OUTSIDE state immediately c = xml.read(); state = OUTSIDE; break; default: // State depends on whether we saw a ! or not. if (sawBang) state = IGNORE; else state = TAG; tagBuffer = new StringBuffer(); } break; case COMMENT: // Did we match the next expected end-of-comment character? if (c == commentMatch.charAt(0)) { c = xml.read(); if (commentMatch.length() == 1) // Done with the comment state = OUTSIDE; else commentMatch = commentMatch.substring(1, commentMatch.length()); } // If not, restart our quest for the end-of-comment character. else { c = xml.read(); commentMatch = "-->"; } break; case IGNORE: // Drop out on a close >. Ignore all other characters. if (c == '>') { c = xml.read(); state = OUTSIDE; } else c = xml.read(); break; case TAG: // Store characters in the tag buffer until we hit whitespace. // When we hit whitespace or '>' or '/', return the name of the tag. if (Character.isWhitespace((char) c) || c == '>' || c == '/') return tagBuffer.toString(); else { tagBuffer.append((char) c); c = xml.read(); } break; } } } catch (IOException ex) { // On exception, we can't determine the first tag, so return null. return null; } } }