Returns true if the argument, a UCS-4 character code, is valid in XML documents.
/*
* $Id: XmlChars.java,v 1.1 2004/08/19 05:30:22 aslom Exp $
*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Crimson" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, Sun Microsystems, Inc.,
* http://www.sun.com. For more information on the Apache Software
* Foundation, please see <http://www.apache.org/>.
*/
/**
* Methods in this class are used to determine whether characters may
* appear in certain roles in XML documents. Such methods are used
* both to parse and to create such documents.
*
* @version 1.8
* @author David Brownell
*/
public class XmlChars
{
// can't construct instances
private XmlChars () { }
/**
* Returns true if the argument, a UCS-4 character code, is valid in
* XML documents. Unicode characters fit into the low sixteen
* bits of a UCS-4 character, and pairs of Unicode <em>surrogate
* characters</em> can be combined to encode UCS-4 characters in
* documents containing only Unicode. (The <code>char</code> datatype
* in the Java Programming Language represents Unicode characters,
* including unpaired surrogates.)
*
* <P> In XML, UCS-4 characters can also be encoded by the use of
* <em>character references</em> such as <b>&#x12345678;</b>, which
* happens to refer to a character that is disallowed in XML documents.
* UCS-4 characters allowed in XML documents can be expressed with
* one or two Unicode characters.
*
* @param ucs4char The 32-bit UCS-4 character being tested.
*/
static public boolean isChar (int ucs4char)
{
// [2] Char ::= #x0009 | #x000A | #x000D
// | [#x0020-#xD7FF]
// ... surrogates excluded!
// | [#xE000-#xFFFD]
// | [#x10000-#x10ffff]
return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
|| ucs4char == 0x000A || ucs4char == 0x0009
|| ucs4char == 0x000D
|| (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
|| (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
}
}
Related examples in the same category