Main.java Source code

Java tutorial

Introduction

Here is the source code for Main.java

Source

//package com.java2s;
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class Main {
    private static final int NUM_SLASH_POSITIONS = 4;

    /**
     * Mangle a string so that it can be represented in an XML document.
     * 
     * There are three kinds of code points in XML:
     * - Those that can be represented normally,
     * - Those that have to be escaped (for example, & must be represented 
     *     as &)
     * - Those that cannot be represented at all in XML.
     *
     * The built-in SAX functions will handle the first two types for us just
     * fine.  However, sometimes we come across a code point of the third type.
     * In this case, we have to mangle the string in order to represent it at
     * all.  We also mangle backslash to avoid confusing a backslash in the
     * string with part our escape sequence.
     * 
     * The encoding used here is as follows: an illegal code point is
     * represented as '\ABCD;', where ABCD is the hexadecimal value of 
     * the code point.
     *
     * @param str     The input string.
     *
     * @return        The mangled string.
     */
    public static String mangleXmlString(String str, boolean createEntityRefs) {
        final StringBuilder bld = new StringBuilder();
        final int length = str.length();
        for (int offset = 0; offset < length;) {
            final int cp = str.codePointAt(offset);
            final int len = Character.charCount(cp);
            if (codePointMustBeMangled(cp)) {
                bld.append(mangleCodePoint(cp));
            } else {
                String entityRef = null;
                if (createEntityRefs) {
                    entityRef = codePointToEntityRef(cp);
                }
                if (entityRef != null) {
                    bld.append(entityRef);
                } else {
                    for (int i = 0; i < len; i++) {
                        bld.append(str.charAt(offset + i));
                    }
                }
            }
            offset += len;
        }
        return bld.toString();
    }

    /**
     * Given a code point, determine if it should be mangled before being
     * represented in an XML document.
     * 
     * Any code point that isn't valid in XML must be mangled.
     * See http://en.wikipedia.org/wiki/Valid_characters_in_XML for a
     * quick reference, or the w3 standard for the authoritative reference.
     * 
     * @param cp      The code point
     * @return        True if the code point should be mangled
     */
    private static boolean codePointMustBeMangled(int cp) {
        if (cp < 0x20) {
            return ((cp != 0x9) && (cp != 0xa) && (cp != 0xd));
        } else if ((0xd7ff < cp) && (cp < 0xe000)) {
            return true;
        } else if ((cp == 0xfffe) || (cp == 0xffff)) {
            return true;
        } else if (cp == 0x5c) {
            // we mangle backslash to simplify decoding... it's
            // easier if backslashes always begin mangled sequences. 
            return true;
        }
        return false;
    }

    private static String mangleCodePoint(int cp) {
        return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp);
    }

    private static String codePointToEntityRef(int cp) {
        switch (cp) {
        case '&':
            return "&amp;";
        case '\"':
            return "&quot;";
        case '\'':
            return "&apos;";
        case '<':
            return "&lt;";
        case '>':
            return "&gt;";
        default:
            return null;
        }
    }
}