Java tutorial
//package com.java2s; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class Main { private static final int NUM_SLASH_POSITIONS = 4; /** * Mangle a string so that it can be represented in an XML document. * * There are three kinds of code points in XML: * - Those that can be represented normally, * - Those that have to be escaped (for example, & must be represented * as &) * - Those that cannot be represented at all in XML. * * The built-in SAX functions will handle the first two types for us just * fine. However, sometimes we come across a code point of the third type. * In this case, we have to mangle the string in order to represent it at * all. We also mangle backslash to avoid confusing a backslash in the * string with part our escape sequence. * * The encoding used here is as follows: an illegal code point is * represented as '\ABCD;', where ABCD is the hexadecimal value of * the code point. * * @param str The input string. * * @return The mangled string. */ public static String mangleXmlString(String str, boolean createEntityRefs) { final StringBuilder bld = new StringBuilder(); final int length = str.length(); for (int offset = 0; offset < length;) { final int cp = str.codePointAt(offset); final int len = Character.charCount(cp); if (codePointMustBeMangled(cp)) { bld.append(mangleCodePoint(cp)); } else { String entityRef = null; if (createEntityRefs) { entityRef = codePointToEntityRef(cp); } if (entityRef != null) { bld.append(entityRef); } else { for (int i = 0; i < len; i++) { bld.append(str.charAt(offset + i)); } } } offset += len; } return bld.toString(); } /** * Given a code point, determine if it should be mangled before being * represented in an XML document. * * Any code point that isn't valid in XML must be mangled. * See http://en.wikipedia.org/wiki/Valid_characters_in_XML for a * quick reference, or the w3 standard for the authoritative reference. * * @param cp The code point * @return True if the code point should be mangled */ private static boolean codePointMustBeMangled(int cp) { if (cp < 0x20) { return ((cp != 0x9) && (cp != 0xa) && (cp != 0xd)); } else if ((0xd7ff < cp) && (cp < 0xe000)) { return true; } else if ((cp == 0xfffe) || (cp == 0xffff)) { return true; } else if (cp == 0x5c) { // we mangle backslash to simplify decoding... it's // easier if backslashes always begin mangled sequences. return true; } return false; } private static String mangleCodePoint(int cp) { return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp); } private static String codePointToEntityRef(int cp) { switch (cp) { case '&': return "&"; case '\"': return """; case '\'': return "'"; case '<': return "<"; case '>': return ">"; default: return null; } } }