com.vaushell.superpipes.tools.http.ImageExtractor.java Source code

Java tutorial

Introduction

Here is the source code for com.vaushell.superpipes.tools.http.ImageExtractor.java

Source

/*
 * Copyright (C) 2013 Fabien Vauchelles (fabien_AT_vauchelles_DOT_com).
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3, 29 June 2007, of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301  USA
 */

package com.vaushell.superpipes.tools.http;

import com.vaushell.superpipes.tools.HTTPhelper;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * Extract biggest image of a webpage.
 *
 * @author Fabien Vauchelles (fabien_AT_vauchelles_DOT_com)
 */
public class ImageExtractor {
    // PUBLIC
    public ImageExtractor(final CloseableHttpClient client) {
        this.client = client;
    }

    /**
     * Return the biggest image URI of this webpage.
     *
     * @param rootURI Webpage URI
     * @return Biggest image
     * @throws IOException
     */
    public BufferedImage extractBiggest(final URI rootURI) throws IOException {
        final List<URI> imagesURIs = new ArrayList<>();
        HttpEntity responseEntity = null;
        try {
            // Exec request
            final HttpGet get = new HttpGet(rootURI);

            try (final CloseableHttpResponse response = client.execute(get)) {
                final StatusLine sl = response.getStatusLine();
                if (sl.getStatusCode() != 200) {
                    throw new IOException(sl.getReasonPhrase());
                }

                responseEntity = response.getEntity();

                try (final InputStream is = responseEntity.getContent()) {
                    final Document doc = Jsoup.parse(is, "UTF-8", rootURI.toString());

                    final Elements elts = doc.select("img");
                    if (elts != null) {
                        for (final Element elt : elts) {
                            final String src = elt.attr("src");
                            if (src != null && !src.isEmpty()) {
                                try {
                                    imagesURIs.add(rootURI.resolve(src));
                                } catch (final IllegalArgumentException ex) {
                                    // Ignore wrong encoded URI
                                }
                            }
                        }
                    }
                }
            }
        } finally {
            if (responseEntity != null) {
                EntityUtils.consume(responseEntity);
            }
        }

        final BufferedImage[] images = new BufferedImage[imagesURIs.size()];
        final ExecutorService service = Executors.newCachedThreadPool();
        for (int i = 0; i < imagesURIs.size(); ++i) {
            final int num = i;

            service.execute(new Runnable() {
                @Override
                public void run() {
                    try {
                        images[num] = HTTPhelper.loadPicture(client, imagesURIs.get(num));
                    } catch (final IOException ex) {
                        images[num] = null;
                    }
                }
            });
        }

        service.shutdown();

        try {
            service.awaitTermination(1L, TimeUnit.DAYS);
        } catch (final InterruptedException ex) {
            // Ignore
        }

        BufferedImage biggest = null;
        int biggestSize = Integer.MIN_VALUE;
        for (int i = 0; i < imagesURIs.size(); ++i) {
            if (images[i] != null) {
                final int actualSize = images[i].getWidth() * images[i].getHeight();
                if (actualSize > biggestSize) {
                    biggest = images[i];

                    biggestSize = actualSize;
                }
            }
        }

        return biggest;
    }

    // PRIVATE
    private final CloseableHttpClient client;
}