Back to project page CATaZine-Live.
The source code is released under:
GNU General Public License
If you think the Android project CATaZine-Live listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
package com.melegy.catazine.utils; /*from w w w .j a va2 s . co m*/ import android.content.Context; import android.content.Intent; import android.text.TextUtils; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import com.melegy.catazine.Constants; import com.melegy.catazine.MainApplication; import com.melegy.catazine.service.FetcherService; import java.io.File; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HtmlUtils { private static final Whitelist JSOUP_WHITELIST = Whitelist.relaxed().addTags("iframe", "video", "audio", "source", "track") .addAttributes("iframe", "src", "frameborder", "height", "width") .addAttributes("video", "src", "controls", "height", "width", "poster") .addAttributes("audio", "src", "controls") .addAttributes("source", "src", "type") .addAttributes("track", "src", "kind", "srclang", "label"); // middle() is group 1; s* is important for non-whitespaces; ' also usable private static final Pattern IMG_PATTERN = Pattern.compile("<img\\s+[^>]*src=\\s*['\"]([^'\"]+)['\"][^>]*>", Pattern.CASE_INSENSITIVE); private static final String URL_SPACE = "%20"; public static String improveHtmlContent(String content, String baseUri) { if (content != null) { // remove some ads content = content.replaceAll("(?i)<div class=('|\")mf-viral('|\")><table border=('|\")0('|\")>.*", ""); // remove lazy loading images stuff content = content.replaceAll("(?i)\\s+src=[^>]+\\s+original[-]*src=(\"|')", " src=$1"); // remove bad image paths content = content.replaceAll("(?i)\\s+(href|src)=(\"|')//", " $1=$2http://"); // clean by jsoup content = Jsoup.clean(content, baseUri, JSOUP_WHITELIST); } return content; } public static ArrayList<String> getImageURLs(String content) { ArrayList<String> images = new ArrayList<String>(); if (!TextUtils.isEmpty(content)) { Matcher matcher = IMG_PATTERN.matcher(content); while (matcher.find()) { images.add(matcher.group(1).replace(" ", URL_SPACE)); } } return images; } public static String replaceImageURLs(String content, final long entryId) { if (!TextUtils.isEmpty(content)) { Matcher matcher = IMG_PATTERN.matcher(content); final ArrayList<String> imagesToDl = new ArrayList<String>(); while (matcher.find()) { String match = matcher.group(1).replace(" ", URL_SPACE); if (!match.startsWith(Constants.FILE_SCHEME)) { // Just for legacy, could be removed later String imgPath = NetworkUtils.getDownloadedImagePath(entryId, match); content = content.replace(match, Constants.FILE_SCHEME + imgPath); if (!new File(imgPath).exists()) { imagesToDl.add(match); } } } // Download the images if needed if (!imagesToDl.isEmpty()) { new Thread(new Runnable() { @Override public void run() { FetcherService.addImagesToDownload(String.valueOf(entryId), imagesToDl); Context context = MainApplication.getContext(); context.startService(new Intent(context, FetcherService.class).setAction(FetcherService.ACTION_DOWNLOAD_IMAGES)); } }).start(); } } return content; } }