Java examples for XML:XML String
Retrieve the name of the first tag in the XML document specified by the given Reader, without parsing the full file/string.
/*/* ww w . j a v a2 s. co m*/ // Licensed to Julian Hyde under one or more contributor license // agreements. See the NOTICE file distributed with this work for // additional information regarding copyright ownership. // // Julian Hyde licenses this file to you under the Apache License, // Version 2.0 (the "License"); you may not use this file except in // compliance with the License. You may obtain a copy of the License at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. */ //package com.java2s; import java.io.IOException; import java.io.Reader; public class Main { /** * Retrieve the name of the first tag in the XML document specified by the * given Reader, without parsing the full file/string. This function is * useful to identify the DocType of an XML document before parsing, * possibly to send the document off to different pieces of code. * For performance reasons, the function attempts to read as little of * the file or string as possible before making its decision about the * first tag. Leading comments are ignored. * @param xml a Reader containing an XML document. * @return the first tag name, as a String, or null if no first tag * can be found. */ public static String getFirstTagName(Reader xml) { final int OUTSIDE = 0; // constant: identify outside state final int BRACKET = 1; // constant: bracket, contents unknown final int COMMENT = 2; // constant: identify a comment section final int IGNORE = 3; // constant: identify an ignored section final int TAG = 4; // constant: identify a tag section int state = OUTSIDE; String commentMatch = null; StringBuffer tagBuffer = null; boolean sawBang = false; try { int c = xml.read(); for (;;) { // No tag found if we hit EOF first. if (c == -1) { return null; } switch (state) { case OUTSIDE: // Start of any sort of tag if (c == '<') { state = BRACKET; commentMatch = "!--"; sawBang = false; c = xml.read(); // Other non-whitespace characters outside of any tag } else if (!Character.isWhitespace((char) c)) { return null; // Whitespace characters are ignored } else { c = xml.read(); } break; case BRACKET: // Check for the start of a comment. if (commentMatch != null) { if (c == commentMatch.charAt(0)) { // This match indicates a comment if (commentMatch.length() == 1) { c = xml.read(); commentMatch = "-->"; state = COMMENT; } else { // Remove the first character from commentMatch, // then process the character as usual. commentMatch = commentMatch.substring(1, commentMatch.length()); } } else { // No longer eligible for comment. commentMatch = null; } } // Hit whitespace; ignore the character. if (Character.isWhitespace((char) c)) { c = xml.read(); break; } switch (c) { case '?': c = xml.read(); state = IGNORE; break; case '!': // Enter an ignored section unless eligible for comment. c = xml.read(); sawBang = true; if (commentMatch == null) { state = IGNORE; } break; case '-': // Enter an ignored section unless eligible for comment. c = xml.read(); if (commentMatch == null) { state = IGNORE; } break; case '>': // Return to OUTSIDE state immediately c = xml.read(); state = OUTSIDE; break; default: // State depends on whether we saw a ! or not. if (sawBang) { state = IGNORE; } else { state = TAG; } tagBuffer = new StringBuffer(); } break; case COMMENT: // Did we match the next expected end-of-comment character? if (c == commentMatch.charAt(0)) { c = xml.read(); if (commentMatch.length() == 1) { // Done with the comment state = OUTSIDE; } else { commentMatch = commentMatch.substring(1, commentMatch.length()); } } else { // If not, restart our quest for the end-of-comment character. c = xml.read(); commentMatch = "-->"; } break; case IGNORE: // Drop out on a close >. Ignore all other characters. if (c == '>') { c = xml.read(); state = OUTSIDE; } else { c = xml.read(); } break; case TAG: // Store characters in the tag buffer until we hit whitespace. // When we hit whitespace or '>' or '/', return the name of the tag. if (Character.isWhitespace((char) c) || c == '>' || c == '/') { return tagBuffer.toString(); } else { tagBuffer.append((char) c); c = xml.read(); } break; } } } catch (IOException ex) { // On exception, we can't determine the first tag, so return null. return null; } } }