Java tutorial
//package com.java2s; /* ************************************************************************* * * TMPotter - Bi-text Aligner/TMX Editor * * Copyright (C) 2015 Hiroshi Miura * * Part of this come from OmegaT. * * Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk * * This file is part of TMPotter. * * TMPotter is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * TMPotter is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with TMPotter. If not, see http://www.gnu.org/licenses/. * * *************************************************************************/ public class Main { /** * Converts a stream of plaintext into valid XML. Output stream must convert * stream to UTF-8 when saving to disk. */ public static String makeValidXML(String plaintext) { StringBuilder out = new StringBuilder(); String text = removeXMLInvalidChars(plaintext); for (int cp, i = 0; i < text.length(); i += Character.charCount(cp)) { cp = text.codePointAt(i); out.append(escapeXMLChars(cp)); } return out.toString(); } public static String removeXMLInvalidChars(String str) { StringBuilder sb = new StringBuilder(str.length()); for (int c, i = 0; i < str.length(); i += Character.charCount(c)) { c = str.codePointAt(i); if (!isValidXMLChar(c)) { c = ' '; } sb.appendCodePoint(c); } return sb.toString(); } /** * Converts a single code point into valid XML. Output stream must convert stream * to UTF-8 when saving to disk. */ public static String escapeXMLChars(int cp) { switch (cp) { // case '\'': // return "'"; case '&': return "&"; case '>': return ">"; case '<': return "<"; case '"': return """; default: return String.valueOf(Character.toChars(cp)); } } public static boolean isValidXMLChar(int codePoint) { if (codePoint < 0x20) { if (codePoint != 0x09 && codePoint != 0x0A && codePoint != 0x0D) { return false; } } else if (codePoint >= 0x20 && codePoint <= 0xD7FF) { } else if (codePoint >= 0xE000 && codePoint <= 0xFFFD) { } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) { } else { return false; } return true; } }