Java tutorial
//package com.java2s; /** * Copyright (C) 2006-2013 phloc systems * http://www.phloc.com * office[at]phloc[dot]com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import javax.annotation.Nullable; public class Main { /** * Contains a boolean mask for all characters from 0x00-0xff which are invalid * (marked as true) and which are valid (marked as false) */ private static final boolean[] ILLEGAL_XML_CHARS = new boolean[] { true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, // 16 true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, // 32 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 48 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 64 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 80 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 96 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 112 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, // 128 true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, // 144 true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, // 160 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 176 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 192 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 208 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 224 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, // 240 false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false }; public static boolean containsInvalidXMLCharacter(@Nullable final String s) { return s != null && containsInvalidXMLCharacter(s.toCharArray()); } public static boolean containsInvalidXMLCharacter(@Nullable final char[] aChars) { if (aChars != null) for (final char c : aChars) if (isInvalidXMLCharacter(c)) return true; return false; } /** * Check if the passed character is valid for XML content. Works for XML 1.0 * and XML 1.1.<br> * Note: makes no difference between the runtime JAXP solution and the * explicit Xerces version * * @param c * The character to be checked. * @return <code>true</code> if the character is valid in XML, * <code>false</code> otherwise. */ public static boolean isInvalidXMLCharacter(final char c) { // Based on: http://www.w3.org/TR/2006/REC-xml11-20060816/#charsets // Speed up by separating the most common use cases first if (c < 256) { // Character <= 0x00ff - use precomposed table return ILLEGAL_XML_CHARS[c]; } // Character >= 0x0100 // For completeness, the Unicode line separator character, #x2028, is // also supported. // Surrogate blocks (no Java IDs found) // High surrogate: 0xd800-0xdbff // Low surrogate: 0xdc00-0xdfff return c == '\u2028' || (c >= '\ufdd0' && c <= '\ufddf') || c == '\ufffe' || c == '\uffff' || Character.isHighSurrogate(c) || Character.isLowSurrogate(c); } }