Here you can find the source of clean(String html, Whitelist whitelist)
Parameter | Description |
---|---|
html | The HTML code to clean. |
whitelist | The whitelist. |
public static String clean(String html, Whitelist whitelist)
//package com.java2s; /*/*from www . j a v a2s . co m*/ * Copyright (C) 2012 Klaus Reimer <k@ailis.de> * See LICENSE.md for licensing information. */ import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.TextNode; import org.jsoup.safety.Cleaner; import org.jsoup.safety.Whitelist; public class Main { /** * Cleans the specified HTML with the specified white list. * * @param html * The HTML code to clean. * @param whitelist * The whitelist. * @return The cleaned HTML. */ public static String clean(String html, Whitelist whitelist) { Document doc = parse(mask(html)); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(doc); clean.outputSettings().prettyPrint(false); return unmask(normalizeWhitespaces(clean).body().html()); } /** * Cleans the specified HTML code so it only contains valid and allowed * tags. * * @param html * The HTML to clean. * @return The cleaned HTML. */ public static String clean(final String html) { return clean(html, Whitelist.basic()); } /** * Parses the specified html code. * * @param html * The HTML code to parse. * @return The parsed document. */ public static Document parse(final String html) { Document doc = Jsoup.parseBodyFragment(html); doc.outputSettings().prettyPrint(false); return doc; } /** * Masks problematic code which Jsoup doesn't handle as we need it. After * Jsoup did its work the string must be piped through unmask() to restore * the original code. * * @param html * The HTML text to mask. * @return The masked HTML text. */ private static String mask(final String html) { return html.replace("}", "@jasdoc.unicode#125;"); } /** * Unmasks previously masked html text to restore masked code it. * * @param html * The previously masked html text. * @return The unmasked HTML text. */ private static String unmask(final String html) { return html.replace("@jasdoc.unicode", "&"); } /** * Normalizes the whitespaces in text nodes of the specified document. * Normally this is done by pretty printing but I disabled it because * indentation done by Jsoup is pretty buggy. So I have to normalize the * whitespaces manually here. * * @param doc * The document to normalise whitespaces in. * @return The normalized document. */ private static Document normalizeWhitespaces(Document doc) { for (TextNode node : doc.body().textNodes()) { node.text(node.text()); } return doc; } }