Back to project page Reader.
The source code is released under:
GNU General Public License
If you think the Android project Reader listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
/** * Flym/*w w w.j a v a 2 s . c o m*/ * * Copyright (c) 2012-2013 Frederic Julian * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.carlrice.reader.utils; import android.content.Context; import android.content.Intent; import android.text.TextUtils; import com.carlrice.reader.Application; import com.carlrice.reader.Constants; import com.carlrice.reader.service.FetcherService; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import java.io.File; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class HtmlUtils { private static final Whitelist JSOUP_WHITELIST = Whitelist.relaxed().addTags("iframe", "video", "audio", "source", "track") .addAttributes("iframe", "src", "frameborder", "height", "width") .addAttributes("video", "src", "controls", "height", "width", "poster") .addAttributes("audio", "src", "controls") .addAttributes("source", "src", "type") .addAttributes("track", "src", "kind", "srclang", "label"); // middle() is group 1; s* is important for non-whitespaces; ' also usable private static final Pattern IMG_PATTERN = Pattern.compile("<img\\s+[^>]*src=\\s*['\"]([^'\"]+)['\"][^>]*>", Pattern.CASE_INSENSITIVE); private static final String URL_SPACE = "%20"; public static String improveHtmlContent(String content, String baseUri) { if (content != null) { // remove some ads content = content.replaceAll("(?i)<div class=('|\")mf-viral('|\")><table border=('|\")0('|\")>.*", ""); // remove lazy loading images stuff content = content.replaceAll("(?i)\\s+src=[^>]+\\s+original[-]*src=(\"|')", " src=$1"); // remove bad image paths content = content.replaceAll("(?i)\\s+(href|src)=(\"|')//", " $1=$2http://"); // clean by jsoup content = Jsoup.clean(content, baseUri, JSOUP_WHITELIST); } return content; } public static ArrayList<String> getImageURLs(String content) { ArrayList<String> images = new ArrayList<>(); if (!TextUtils.isEmpty(content)) { Matcher matcher = IMG_PATTERN.matcher(content); while (matcher.find()) { images.add(matcher.group(1).replace(" ", URL_SPACE)); } } return images; } public static String replaceImageURLs(String content, final long entryId) { if (!TextUtils.isEmpty(content)) { boolean needDownloadPictures = NetworkUtils.needDownloadPictures(); final ArrayList<String> imagesToDl = new ArrayList<>(); Matcher matcher = IMG_PATTERN.matcher(content); while (matcher.find()) { String match = matcher.group(1).replace(" ", URL_SPACE); String imgPath = NetworkUtils.getDownloadedImagePath(entryId, match); if (new File(imgPath).exists()) { content = content.replace(match, Constants.FILE_SCHEME + imgPath); } else if (needDownloadPictures) { imagesToDl.add(match); } } // Download the images if needed if (!imagesToDl.isEmpty()) { new Thread(new Runnable() { @Override public void run() { FetcherService.addImagesToDownload(String.valueOf(entryId), imagesToDl); Context context = Application.context(); context.startService(new Intent(context, FetcherService.class).setAction(FetcherService.ACTION_DOWNLOAD_IMAGES)); } }).start(); } } return content; } public static String getMainImageURL(String content) { if (!TextUtils.isEmpty(content)) { Matcher matcher = IMG_PATTERN.matcher(content); while (matcher.find()) { String imgUrl = matcher.group(1).replace(" ", URL_SPACE); if (isCorrectImage(imgUrl)) { return imgUrl; } } } return null; } public static String getMainImageURL(ArrayList<String> imgUrls) { for (String imgUrl : imgUrls) { if (isCorrectImage(imgUrl)) { return imgUrl; } } return null; } private static boolean isCorrectImage(String imgUrl) { if (!imgUrl.endsWith(".gif") && !imgUrl.endsWith(".GIF") && !imgUrl.endsWith(".img") && !imgUrl.endsWith(".IMG")) { return true; } return false; } }