Java examples for XML:XML String Escape
Unescapes a String, replacing &#nn;, <, >, &, ", and &apos to the corresponding characters.
/*// w ww .ja va 2 s . c o m * $Id: c252e97234599bbc2c9094ce3ee83beaa8e24d93 $ * * This file is part of the iText (R) project. * Copyright (c) 1998-2016 iText Group NV * Authors: Bruno Lowagie, Paulo Soares, Balder Van Camp, et al. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License version 3 * as published by the Free Software Foundation with the addition of the * following permission added to Section 15 as permitted in Section 7(a): * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY * ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT * OF THIRD PARTY RIGHTS * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program; if not, see http://www.gnu.org/licenses or write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA, 02110-1301 USA, or download the license from the following URL: * http://itextpdf.com/terms-of-use/ * * The interactive user interfaces in modified source and object code versions * of this program must display Appropriate Legal Notices, as required under * Section 5 of the GNU Affero General Public License. * * In accordance with Section 7(b) of the GNU Affero General Public License, * a covered work must retain the producer line in every PDF that is created * or manipulated using iText. * * You can be released from the requirements of the license by purchasing * a commercial license. Buying such a license is mandatory as soon as you * develop commercial activities involving the iText software without * disclosing the source code of your own applications. * These activities include: offering paid services to customers as an ASP, * serving PDFs on the fly in a web application, shipping iText with a closed * source product. * * For more information, please contact iText Software Corp. at this * address: sales@itextpdf.com */ //package com.java2s; public class Main { public static void main(String[] argv) throws Exception { String s = "java2s.com"; System.out.println(unescapeXML(s)); } /** * Unescapes a String, replacing &#nn;, <, >, &, ", * and &apos to the corresponding characters. * @param s a String with entities * @return the unescaped string */ public static String unescapeXML(final String s) { char[] cc = s.toCharArray(); int len = cc.length; StringBuffer sb = new StringBuffer(); int pos; String esc; for (int i = 0; i < len; i++) { int c = cc[i]; if (c == '&') { pos = findInArray(';', cc, i + 3); if (pos > -1) { esc = new String(cc, i + 1, pos - i - 1); if (esc.startsWith("#")) { esc = esc.substring(1); if (isValidCharacterValue(esc)) { c = (char) Integer.parseInt(esc); i = pos; } else { i = pos; continue; } } else { int tmp = unescape(esc); if (tmp > 0) { c = tmp; i = pos; } } } } sb.append((char) c); } return sb.toString(); } /** * Looks for a character in a character array, starting from a certain position * @param needle the character you're looking for * @param haystack the character array * @param start the start position * @return the position where the character was found, or -1 if it wasn't found. */ public static int findInArray(char needle, char[] haystack, int start) { for (int i = start; i < haystack.length; i++) { if (haystack[i] == ';') return i; } return -1; } /** * Checks if a character value should be escaped/unescaped. * @param s the String representation of an integer * @return true if it's OK to escape or unescape this value */ public static boolean isValidCharacterValue(String s) { try { int i = Integer.parseInt(s); return isValidCharacterValue(i); } catch (NumberFormatException nfe) { return false; } } /** * Checks if a character value should be escaped/unescaped. * @param c a character value * @return true if it's OK to escape or unescape this value */ public static boolean isValidCharacterValue(int c) { return (c == 0x9 || c == 0xA || c == 0xD || c >= 0x20 && c <= 0xD7FF || c >= 0xE000 && c <= 0xFFFD || c >= 0x10000 && c <= 0x10FFFF); } /** * Unescapes 'lt', 'gt', 'apos', 'quote' and 'amp' to the * corresponding character values. * @param s a string representing a character * @return a character value */ public static int unescape(String s) { if ("apos".equals(s)) return '\''; if ("quot".equals(s)) return '"'; if ("lt".equals(s)) return '<'; if ("gt".equals(s)) return '>'; if ("amp".equals(s)) return '&'; return -1; } }