Here you can find the source of replaceHtmlEntities(final String text)
public static String replaceHtmlEntities(final String text)
//package com.java2s; /******************************************************************************* * Copyright 2016/*from w w w .j av a2 s. c om*/ * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universit?t Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { private static final Pattern HTML_ENTITIES = Pattern.compile("^(.*?)&#(\\d+);"); /** Replaces HTML entities such as ̣ with the corresponding * characters. */ public static String replaceHtmlEntities(final String text) { String t = text; StringBuilder result = new StringBuilder(); do { Matcher matcher = HTML_ENTITIES.matcher(t); if (matcher.find()) { result.append(matcher.group(1)); result.append((char) Integer.parseInt(matcher.group(2))); t = matcher.replaceFirst(""); } else break; } while (true); result.append(t); // if (!text.equals(result.toString())) // System.out.println(">" + text + "< to >" + result.toString() + "<"); return result.toString(); } }