Android Open Source - Reader Html Utils






From Project

Back to project page Reader.

License

The source code is released under:

GNU General Public License

If you think the Android project Reader listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

/**
 * Flym/*w w  w.j a v a 2 s  .  c  o  m*/
 *
 * Copyright (c) 2012-2013 Frederic Julian
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.carlrice.reader.utils;

import android.content.Context;
import android.content.Intent;
import android.text.TextUtils;

import com.carlrice.reader.Application;
import com.carlrice.reader.Constants;
import com.carlrice.reader.service.FetcherService;

import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

import java.io.File;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtils {

    private static final Whitelist JSOUP_WHITELIST = Whitelist.relaxed().addTags("iframe", "video", "audio", "source", "track")
            .addAttributes("iframe", "src", "frameborder", "height", "width")
            .addAttributes("video", "src", "controls", "height", "width", "poster")
            .addAttributes("audio", "src", "controls")
            .addAttributes("source", "src", "type")
            .addAttributes("track", "src", "kind", "srclang", "label");

    // middle() is group 1; s* is important for non-whitespaces; ' also usable
    private static final Pattern IMG_PATTERN = Pattern.compile("<img\\s+[^>]*src=\\s*['\"]([^'\"]+)['\"][^>]*>", Pattern.CASE_INSENSITIVE);
    private static final String URL_SPACE = "%20";

    public static String improveHtmlContent(String content, String baseUri) {
        if (content != null) {
            // remove some ads
            content = content.replaceAll("(?i)<div class=('|\")mf-viral('|\")><table border=('|\")0('|\")>.*", "");
            // remove lazy loading images stuff
            content = content.replaceAll("(?i)\\s+src=[^>]+\\s+original[-]*src=(\"|')", " src=$1");
            // remove bad image paths
            content = content.replaceAll("(?i)\\s+(href|src)=(\"|')//", " $1=$2http://");
            // clean by jsoup
            content = Jsoup.clean(content, baseUri, JSOUP_WHITELIST);
        }

        return content;
    }

    public static ArrayList<String> getImageURLs(String content) {
        ArrayList<String> images = new ArrayList<>();

        if (!TextUtils.isEmpty(content)) {
            Matcher matcher = IMG_PATTERN.matcher(content);

            while (matcher.find()) {
                images.add(matcher.group(1).replace(" ", URL_SPACE));
            }
        }

        return images;
    }

    public static String replaceImageURLs(String content, final long entryId) {

        if (!TextUtils.isEmpty(content)) {
            boolean needDownloadPictures = NetworkUtils.needDownloadPictures();
            final ArrayList<String> imagesToDl = new ArrayList<>();

            Matcher matcher = IMG_PATTERN.matcher(content);
            while (matcher.find()) {
                String match = matcher.group(1).replace(" ", URL_SPACE);

                String imgPath = NetworkUtils.getDownloadedImagePath(entryId, match);
                if (new File(imgPath).exists()) {
                    content = content.replace(match, Constants.FILE_SCHEME + imgPath);
                } else if (needDownloadPictures) {
                    imagesToDl.add(match);
                }
            }

            // Download the images if needed
            if (!imagesToDl.isEmpty()) {
                new Thread(new Runnable() {
                    @Override
                    public void run() {
                        FetcherService.addImagesToDownload(String.valueOf(entryId), imagesToDl);
                        Context context = Application.context();
                        context.startService(new Intent(context, FetcherService.class).setAction(FetcherService.ACTION_DOWNLOAD_IMAGES));
                    }
                }).start();
            }
        }

        return content;
    }

    public static String getMainImageURL(String content) {
        if (!TextUtils.isEmpty(content)) {
            Matcher matcher = IMG_PATTERN.matcher(content);

            while (matcher.find()) {
                String imgUrl = matcher.group(1).replace(" ", URL_SPACE);
                if (isCorrectImage(imgUrl)) {
                    return imgUrl;
                }
            }
        }

        return null;
    }

    public static String getMainImageURL(ArrayList<String> imgUrls) {
        for (String imgUrl : imgUrls) {
            if (isCorrectImage(imgUrl)) {
                return imgUrl;
            }
        }

        return null;
    }

    private static boolean isCorrectImage(String imgUrl) {
        if (!imgUrl.endsWith(".gif") && !imgUrl.endsWith(".GIF") && !imgUrl.endsWith(".img") && !imgUrl.endsWith(".IMG")) {
            return true;
        }

        return false;
    }
}




Java Source Code List

com.carlrice.reader.Application.java
com.carlrice.reader.Constants.java
com.carlrice.reader.activity.AboutActivity.java
com.carlrice.reader.activity.AddGoogleNewsActivity.java
com.carlrice.reader.activity.BaseActivity.java
com.carlrice.reader.activity.EditFeedActivity.java
com.carlrice.reader.activity.EditFeedsListActivity.java
com.carlrice.reader.activity.EntryActivity.java
com.carlrice.reader.activity.GeneralPrefsActivity.java
com.carlrice.reader.activity.HomeActivity.java
com.carlrice.reader.adapter.CursorLoaderExpandableListAdapter.java
com.carlrice.reader.adapter.DrawerAdapter.java
com.carlrice.reader.adapter.EntriesCursorAdapter.java
com.carlrice.reader.adapter.FeedsCursorAdapter.java
com.carlrice.reader.adapter.FiltersCursorAdapter.java
com.carlrice.reader.fragment.EditFeedsListFragment.java
com.carlrice.reader.fragment.EntriesListFragment.java
com.carlrice.reader.fragment.EntryFragment.java
com.carlrice.reader.fragment.GeneralPrefsFragment.java
com.carlrice.reader.fragment.SwipeRefreshFragment.java
com.carlrice.reader.fragment.SwipeRefreshListFragment.java
com.carlrice.reader.loader.BaseLoader.java
com.carlrice.reader.parser.OPML.java
com.carlrice.reader.parser.RssAtomParser.java
com.carlrice.reader.provider.DatabaseHelper.java
com.carlrice.reader.provider.FeedDataContentProvider.java
com.carlrice.reader.provider.FeedData.java
com.carlrice.reader.receiver.BootCompletedBroadcastReceiver.java
com.carlrice.reader.receiver.ConnectionChangeReceiver.java
com.carlrice.reader.service.FetcherService.java
com.carlrice.reader.service.RefreshService.java
com.carlrice.reader.utils.ArticleTextExtractor.java
com.carlrice.reader.utils.CircleTransform.java
com.carlrice.reader.utils.FileUtils.java
com.carlrice.reader.utils.HtmlUtils.java
com.carlrice.reader.utils.NetworkUtils.java
com.carlrice.reader.utils.PrefUtils.java
com.carlrice.reader.utils.StringUtils.java
com.carlrice.reader.utils.ThrottledContentObserver.java
com.carlrice.reader.utils.UiUtils.java
com.carlrice.reader.view.AutoSummaryListPreference.java
com.carlrice.reader.view.BakedBezierInterpolator.java
com.carlrice.reader.view.DragNDropExpandableListView.java
com.carlrice.reader.view.DragNDropListener.java
com.carlrice.reader.view.EntryView.java
com.carlrice.reader.view.SwipeProgressBar.java
com.carlrice.reader.view.SwipeRefreshLayout.java
com.carlrice.reader.widget.ColorPickerDialogPreference.java
com.carlrice.reader.widget.TickerWidgetProvider.java
com.carlrice.reader.widget.TickerWidgetService.java
com.carlrice.reader.widget.WidgetConfigActivity.java
com.carlrice.reader.widget.WidgetConfigFragment.java
com.carlrice.reader.widget.WidgetProvider.java
com.carlrice.reader.widget.WidgetService.java