cn.kk.exia.MangaDownloader.java Source code

Java tutorial

Introduction

Here is the source code for cn.kk.exia.MangaDownloader.java

Source

/*  Copyright (c) 2010 Xiaoyun Zhu
 * 
 *  Permission is hereby granted, free of charge, to any person obtaining a copy  
 *  of this software and associated documentation files (the "Software"), to deal  
 *  in the Software without restriction, including without limitation the rights  
 *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 *  copies of the Software, and to permit persons to whom the Software is  
 *  furnished to do so, subject to the following conditions:
 *  
 *  The above copyright notice and this permission notice shall be included in  
 *  all copies or substantial portions of the Software.
 *  
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
 *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
 *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
 *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN  
 *  THE SOFTWARE.  
 */
package cn.kk.exia;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.StringEscapeUtils;

public class MangaDownloader implements Logger {
    private static class DownloadThread extends Thread {
        private final String to;
        private final String url;
        private final Logger log;
        boolean success;

        public DownloadThread(final String to, final String url, final Logger log) {
            super();
            this.to = to;
            this.url = StringEscapeUtils.unescapeHtml4(url);

            this.log = log;
            this.success = false;
        }

        @SuppressWarnings("resource")
        @Override
        public void run() {
            OutputStream out = null;
            InputStream in = null;
            try {
                out = new BufferedOutputStream(new FileOutputStream(this.to));
                in = new BufferedInputStream(MangaDownloader.openUrlInputStream(this.url));
                Helper.write(in, out);
                final File test = new File(this.to);
                if (test.isFile() && (test.length() > MangaDownloader.MIN_IMG_OK_BYTES)) {
                    this.success = true;
                } else {
                    throw new FileNotFoundException("??" + this.url);
                }
            } catch (final IOException e) {
                new File(this.to).delete();
                // e.printStackTrace();
                this.log.err("" + e.toString());
                if (e instanceof FileNotFoundException) {
                    throw new RuntimeException(e);
                }
            } finally {
                if (in != null) {
                    try {
                        in.close();
                    } catch (final IOException e) {
                        e.printStackTrace();
                    }
                }
                if (out != null) {
                    try {
                        out.close();
                    } catch (final IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }

    public static int sleepBase = 1000;

    public static String userAgent = "Mozilla/5.0 (Windows NT " + (((int) (Math.random() * 2) + 5)) + ".1) Firefox/"
            + (((int) (Math.random() * 8) + 3)) + "." + (((int) (Math.random() * 6) + 0)) + "."
            + (((int) (Math.random() * 6) + 0));

    public static String cookieString = "";

    private static boolean dump = false;

    private static final Map<String, String> DEFAULT_CONN_HEADERS = new HashMap<String, String>();

    public static final Charset CHARSET_UTF8 = Charset.forName("UTF-8");

    private static String mangaTitle;

    static {
        System.setProperty("http.keepAlive", "false");
        CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
        HttpURLConnection.setFollowRedirects(false);
        MangaDownloader.resetConnectionHeaders();
    }

    private static int lineCounter = 0;

    private static StringBuffer cookie = new StringBuffer(512);

    protected static String searchParams = "";

    private static final int MIN_IMG_OK_BYTES = 2000;

    private static String nextUrl;

    private static String analyzeAndDownload(final String line, final int num, final Logger log,
            final boolean first, final String targetDir) throws IOException {
        String pageTitle = Helper.substringBetweenNarrow(line, "<h1>", "</h1>");
        if (Helper.isNotEmptyOrNull(pageTitle)) {
            if (Helper.isNotEmptyOrNull(MangaDownloader.mangaTitle)) {
                // System.out.println("??-title: " + MangaDownloader.mangaTitle);
                pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(MangaDownloader.mangaTitle));
            } else {
                // System.out.println("??-h1: " + pageTitle);
                pageTitle = Helper.escapeFileName(StringEscapeUtils.unescapeHtml4(pageTitle));
            }
            final File dir = new File(targetDir + File.separator + pageTitle);
            if (first) {
                log.log("??" + pageTitle);
            }
            dir.mkdirs();
            MangaDownloader.nextUrl = Helper.substringBetweenNarrow(line, "<a href=\"", "-" + num + "\"><img");
            if (Helper.isNotEmptyOrNull(MangaDownloader.nextUrl)) {
                MangaDownloader.nextUrl += "-" + num;
                final String img = Helper.substringBetweenNarrow(line.substring(line.indexOf("<iframe")),
                        "<img src=\"", "\" style=\"");
                if (Helper.isNotEmptyOrNull(img)) {
                    try {
                        String imgName = img.substring(img.lastIndexOf('/'));
                        if (imgName.indexOf('=') != -1) {
                            imgName = imgName.substring(imgName.lastIndexOf('=') + 1);
                        }
                        imgName = Helper.escapeFileName(imgName);
                        String ext = ".jpg";
                        final int idx = imgName.lastIndexOf('.');
                        if (idx != -1) {
                            ext = imgName.substring(idx);
                        }
                        if (null != MangaDownloader.download(img,
                                new File(dir, pageTitle + "_" + num + ext).getAbsolutePath(), false, log)) {
                            log.log("?" + img);
                            try {
                                if (MangaDownloader.lineCounter++ >= 5) {
                                    MangaDownloader.lineCounter = 0;
                                    Thread.sleep((40 * MangaDownloader.sleepBase)
                                            + (int) (Math.random() * 12 * MangaDownloader.sleepBase)); // 12000
                                } else {
                                    Thread.sleep((8 * MangaDownloader.sleepBase)
                                            + (int) (Math.random() * 5 * MangaDownloader.sleepBase));
                                }
                            } catch (final InterruptedException e) {
                                e.printStackTrace();
                                log.err("" + e.toString());
                            }
                        } else {
                            log.log("" + img);
                            try {
                                Thread.sleep((5 * MangaDownloader.sleepBase)
                                        + (int) (Math.random() * 5 * MangaDownloader.sleepBase));
                            } catch (final InterruptedException e) {
                                e.printStackTrace();
                                log.err("" + e.toString());
                            }
                        }
                    } catch (final IOException e) {
                        // e.printStackTrace();
                        log.err("" + e.toString());
                        throw e;
                    }
                    return MangaDownloader.nextUrl;
                }
            }
        }
        return null;
    }

    public static final boolean appendCookies(final StringBuffer cookies, final HttpURLConnection conn)
            throws IOException {
        try {
            boolean changed = false;
            final List<String> values = conn.getHeaderFields().get("Set-Cookie");
            if (values != null) {
                for (final String v : values) {
                    if (v.indexOf("deleted") == -1) {
                        if (cookies.length() > 0) {
                            cookies.append("; ");
                        }
                        cookies.append(v.split(";")[0]);
                        changed = true;
                    }
                }
            }
            return changed;
        } catch (final Throwable e) {
            throw new IOException(e);
        }
    }

    private static void appendUrl(final File downloaded, final String mangaUrl) throws IOException {
        final FileWriter f = new FileWriter(downloaded, true);
        f.write(mangaUrl);
        f.write('\n');
        f.close();
    }

    private final static void checkAndDump(final String line, final Logger log, final String targetDir) {
        if (line.contains("An Error Has Occurred")) {
            try {
                Thread.sleep(
                        (60 * MangaDownloader.sleepBase) + (int) (Math.random() * 120 * MangaDownloader.sleepBase));
            } catch (final InterruptedException e) {
                e.printStackTrace();
            }
        }
        if (line.contains("your IP address")) {
            log.err("??" + line);
            try {
                Thread.sleep(
                        (60 * MangaDownloader.sleepBase) + (int) (Math.random() * 120 * MangaDownloader.sleepBase));
            } catch (final InterruptedException e) {
                e.printStackTrace();
            }
        }
        if (MangaDownloader.dump) {
            try {
                final File dumpFile = new File(targetDir + File.separator + "exia-dump.xml");
                if (!dumpFile.isFile()) {
                    log.err("dump: " + dumpFile.getAbsolutePath());
                }
                final FileWriter writer = new FileWriter(dumpFile, true);

                writer.append(line);
                writer.append('\n');
                writer.close();
            } catch (final IOException e) {
                e.printStackTrace();
            }
        }
    }

    private static String createMainUrl(final String keyword, final boolean[] optionsTypes,
            final boolean[] optionsSearchFields, final int minimumStars) throws UnsupportedEncodingException {
        final StringBuffer url = new StringBuffer(200);
        url.append("http://g.e-hentai.org/?");
        url.append("f_search=").append(URLEncoder.encode(keyword, MangaDownloader.CHARSET_UTF8.name()));
        if (minimumStars > 1) {
            url.append("&f_sr=on&f_srdd=").append(String.valueOf(minimumStars));
        }
        for (int i = 0; i < optionsTypes.length; i++) {
            final boolean b = optionsTypes[i];
            if (b) {
                switch (i) {
                case 0:
                    url.append("&f_doujinshi=on");
                    break;
                case 1:
                    url.append("&f_manga=on");
                    break;
                case 2:
                    url.append("&f_artistcg=on");
                    break;
                case 3:
                    url.append("&f_gamecg=on");
                    break;
                case 4:
                    url.append("&f_western=on");
                    break;
                case 5:
                    url.append("&f_non-h=on");
                    break;
                case 6:
                    url.append("&f_imageset=on");
                    break;
                case 7:
                    url.append("&f_cosplay=on");
                    break;
                case 8:
                    url.append("&f_asianporn=on");
                    break;
                case 9:
                    url.append("&f_misc=on");
                    break;
                default:
                    break;
                }
            }
        }
        url.append("&f_sfdd=favall");
        for (int i = 0; i < optionsSearchFields.length; i++) {
            final boolean b = optionsSearchFields[i];
            if (b) {
                switch (i) {
                case 0:
                    url.append("&f_sname=on");
                    break;
                case 1:
                    url.append("&f_stags=on&f_sdts=on");
                    break;
                case 2:
                    url.append("&f_sdesc=on");
                    break;
                default:
                    break;
                }
            }
        }
        if (Helper.isNotEmptyOrNull(MangaDownloader.searchParams)) {
            if (!MangaDownloader.searchParams.startsWith("&")) {
                url.append('&');
            }
            url.append(MangaDownloader.searchParams);
        }
        url.append("&f_apply=Apply+Filter");
        return url.toString();
    }

    public static String download(final String url, final String to, final boolean overwrite, final Logger log)
            throws IOException {
        final File toFile = new File(to);
        // 925 -> forbidden file
        if (!overwrite && toFile.exists() && (toFile.length() > MangaDownloader.MIN_IMG_OK_BYTES)
                && (toFile.length() != 925)) {
            return null;
        } else {
            int retries = 0;
            DownloadThread test = null;
            while ((retries++ < 3) && !(test = new DownloadThread(to, url, log)).success) {
                try {
                    test.start();
                    test.join(60000);
                    test.interrupt();
                } catch (final Throwable e) {
                    try {
                        Thread.sleep((3 * MangaDownloader.sleepBase)
                                + (int) (Math.random() * 3 * MangaDownloader.sleepBase));
                    } catch (InterruptedException e1) {
                        // ignore
                    }
                    if ((e instanceof RuntimeException) && (e.getCause() != null)) {
                        throw (IOException) e.getCause();
                    }
                }
            }
            if ((test != null) && !test.success) {
                throw new IOException("" + to);
            }
            return to;
        }
    }

    public static boolean downloadGallery(final String mangaUrl, final String targetDir, final Logger log) {
        try {
            final BufferedReader reader = new BufferedReader(new InputStreamReader(
                    MangaDownloader.openUrlInputStream(mangaUrl), MangaDownloader.CHARSET_UTF8));
            String line;
            boolean success = false;
            while (null != (line = reader.readLine())) {
                MangaDownloader.checkAndDump(line, log, targetDir);
                if (line.contains("<h1")) {
                    String pageUrl = Helper.substringBetweenNarrow(line, "<a href=\"", "-1\">");
                    if (Helper.isNotEmptyOrNull(pageUrl)) {
                        pageUrl += "-1";
                        if (MangaDownloader.downloadManga(pageUrl, targetDir, log)) {
                            success = true;
                            break;
                        }
                    }
                    break;
                }
            }
            reader.close();
            return success;
        } catch (final Exception e) {
            e.printStackTrace();
            log.err("?" + e.toString());
        }
        return false;
    }

    public static boolean downloadManga(final String url, final String targetDir, final Logger log) {
        try {
            boolean first = true;
            int retries = 0;
            String imgUrl = url;
            String backupImgUrl = imgUrl;
            MangaDownloader.mangaTitle = null;
            while (null != imgUrl) {
                log.log("" + imgUrl);
                final String lastUrl = imgUrl;
                final String substring = imgUrl.substring(imgUrl.lastIndexOf('-') + 1);
                final int num = Integer.parseInt(substring);
                try {
                    final BufferedReader reader = new BufferedReader(new InputStreamReader(
                            MangaDownloader.openUrlInputStream(imgUrl), MangaDownloader.CHARSET_UTF8));
                    String line;
                    while (null != (line = reader.readLine())) {
                        MangaDownloader.checkAndDump(line, log, targetDir);
                        if (line.contains("<h1>")) {
                            imgUrl = MangaDownloader.analyzeAndDownload(line, num + 1, log, first, targetDir);
                            backupImgUrl = lastUrl;
                            first = false;
                            break;
                        } else {
                            if (Helper.isEmptyOrNull(MangaDownloader.mangaTitle)) {
                                MangaDownloader.mangaTitle = Helper.substringBetweenNarrow(line, "<title>",
                                        "</title>");
                            }
                        }
                    }
                    reader.close();
                    if (imgUrl == null) {
                        MangaDownloader.dump = false;
                        return true;
                    }
                    if (!imgUrl.equals(lastUrl)) {
                        // ok
                        MangaDownloader.dump = false;
                    } else {
                        // log.err("?");
                        // MangaDownloader.dump = true;
                        // imgUrl = null;
                    }
                } catch (final Exception e) {
                    Thread.sleep((60 * MangaDownloader.sleepBase)
                            + (int) (Math.random() * 10 * MangaDownloader.sleepBase));
                    if (retries++ > 2) {
                        e.printStackTrace();
                        log.err("" + imgUrl);
                        imgUrl = MangaDownloader.nextUrl;
                    } else {
                        imgUrl = backupImgUrl;
                        log.err("??" + e);
                    }
                }
            }
        } catch (final Exception e) {
            e.printStackTrace();
            log.err("" + e.toString());
        }
        return false;
    }

    public static void downloadSearchResult(final String keyword, final String targetDir, final Logger log,
            final boolean[] optionsTypes, final boolean[] optionsSearchFields, final int minimumStars) {
        int pageNr = 0;
        int mangaCount;
        final File dir = new File(targetDir);
        dir.mkdirs();
        final File dumpFile = new File(targetDir + File.separator + "exia-dump.xml");
        dumpFile.delete();
        final File downloaded = new File(targetDir + File.separator + "md-downloaded.txt");
        final Set<String> downloadedIds = new HashSet<String>();
        if (downloaded.isFile()) {
            try {
                MangaDownloader.readDownloaded(downloaded, downloadedIds);
            } catch (final Exception e) {
                e.printStackTrace();
                log.err("" + e.toString());
            }
        }

        try {
            final String mainUrl = MangaDownloader.createMainUrl(keyword, optionsTypes, optionsSearchFields,
                    minimumStars);

            do {
                mangaCount = 0;
                try {
                    final String requestUrl = mainUrl + "&page=" + pageNr;
                    log.log("????" + requestUrl);

                    final BufferedReader reader = new BufferedReader(new InputStreamReader(
                            MangaDownloader.openUrlInputStream(requestUrl), MangaDownloader.CHARSET_UTF8));
                    String line;
                    while (null != (line = reader.readLine())) {
                        MangaDownloader.checkAndDump(line, log, targetDir);
                        if (line.contains("Your IP")) {
                            log.err("?"
                                    + line);
                            return;
                        } else if (line.contains("gallerytorrents.php")) {
                            final String[] split = line.split("imgicon.png");
                            for (int i = 1; i < split.length; i++) {
                                String mangaUrl = split[i];
                                mangaUrl = Helper.substringBetween(mangaUrl, "<div class=\"it1\"><a href=\"",
                                        "\">");
                                if (Helper.isNotEmptyOrNull(mangaUrl)) {
                                    final String mangaId = Helper.substringBetween(mangaUrl, "/g/", "/");
                                    System.out.println("ID/URL: " + mangaId + " / " + mangaUrl);
                                    if (downloadedIds.contains(mangaId)) {
                                        log.log("" + mangaUrl);
                                        mangaCount++;
                                        continue;
                                    } else {
                                        log.log("" + mangaId);
                                        boolean success = false;
                                        int retries = 0;
                                        while ((retries++ < 4) && !success) {
                                            if (MangaDownloader.downloadGallery(mangaUrl, targetDir, log)) {
                                                mangaCount++;
                                                downloadedIds.add(mangaId);
                                                MangaDownloader.appendUrl(downloaded, mangaId);
                                                success = true;
                                            } else {
                                                log.log("??" + mangaUrl
                                                        + ", ?");
                                                try {
                                                    Thread.sleep((60 * MangaDownloader.sleepBase * 4)
                                                            + (int) (Math.random() * 10
                                                                    * MangaDownloader.sleepBase));
                                                } catch (final Exception e) {
                                                    e.printStackTrace();
                                                    log.err(e.toString());
                                                }
                                            }
                                        }
                                        if (!success) {
                                            log.err(""
                                                    + mangaUrl);
                                            return;
                                        }
                                    }
                                }
                            }
                            break;
                        }
                    }
                    reader.close();
                    Thread.sleep((15 * MangaDownloader.sleepBase)
                            + (int) (Math.random() * 10 * MangaDownloader.sleepBase));
                } catch (final Exception e) {
                    e.printStackTrace();
                    log.err("" + e.toString());
                }
                pageNr++;
            } while (mangaCount > 0);
        } catch (final Exception e) {
            e.printStackTrace();
            log.err("" + e.toString());
        }
    }

    public final static HttpURLConnection getUrlConnection(final String url) throws Exception {
        return MangaDownloader.getUrlConnection(url, false, null);
    }

    public final static HttpURLConnection getUrlConnection(final String url, final boolean post,
            final String output) throws IOException {
        int retries = 0;
        HttpURLConnection conn;
        while (true) {
            try {
                final URL urlObj = new URL(url);
                conn = (HttpURLConnection) urlObj.openConnection();
                conn.setConnectTimeout(15000);
                conn.setReadTimeout(30000);
                if (post) {
                    conn.setRequestMethod("POST");
                }
                final String referer;
                final int pathIdx;
                if ((pathIdx = url.lastIndexOf('/')) > "https://".length()) {
                    referer = url.substring(0, pathIdx);
                } else {
                    referer = url;
                }
                conn.setRequestProperty("Referer", referer);
                final Set<String> keys = MangaDownloader.DEFAULT_CONN_HEADERS.keySet();
                for (final String k : keys) {
                    final String value = MangaDownloader.DEFAULT_CONN_HEADERS.get(k);
                    if (value != null) {
                        conn.setRequestProperty(k, value);
                    }
                }
                // conn.setUseCaches(false);
                if (output != null) {
                    conn.setDoOutput(true);
                    final BufferedOutputStream out = new BufferedOutputStream(conn.getOutputStream());
                    out.write(output.getBytes(MangaDownloader.CHARSET_UTF8));
                    out.close();
                }
                if (MangaDownloader.appendCookies(MangaDownloader.cookie, conn)) {
                    MangaDownloader.putConnectionHeader("Cookie", MangaDownloader.cookie.toString());
                }
                break;
            } catch (final Throwable e) {
                // 
                System.err.println(e.toString());
                if (retries++ > 10) {
                    throw new IOException(e);
                } else {
                    try {
                        Thread.sleep((60 * retries * MangaDownloader.sleepBase)
                                + ((int) Math.random() * MangaDownloader.sleepBase * 60 * retries));
                    } catch (final InterruptedException e1) {
                        e1.printStackTrace();
                    }
                }
            }
        }
        return conn;
    }

    /**
     * @param args
     */
    public static void main(final String[] args) {
        String keyword = "chinese";
        String targetDir = "D:\\temp";
        if ((args != null) && (args.length > 0)) {
            targetDir = args[0];
            if (args.length > 1) {
                keyword = args[1];
            }
        }

        MangaDownloader.downloadSearchResult(keyword, targetDir, new MangaDownloader(),
                new boolean[] { true, true, false, false, true, false, false, false, true, false },
                new boolean[] { true, true, true }, 5);
    }

    public final static InputStream openUrlInputStream(final String url) throws MalformedURLException, IOException {
        return MangaDownloader.openUrlInputStream(url, false, null);
    }

    public static final InputStream openUrlInputStream(final String url, final boolean post, final String output)
            throws IOException {
        return MangaDownloader.getUrlConnection(url, post, output).getInputStream();
    }

    public static final void putConnectionHeader(final String key, final String value) {
        MangaDownloader.DEFAULT_CONN_HEADERS.put(key, value);
    }

    private static void readDownloaded(final File downloaded, final Set<String> downloadedUrls)
            throws FileNotFoundException, IOException {
        final BufferedReader reader = new BufferedReader(new FileReader(downloaded));
        String line;
        while (null != (line = reader.readLine())) {
            downloadedUrls.add(line);
        }
        reader.close();
    }

    public static final void resetConnectionHeaders() {
        MangaDownloader.DEFAULT_CONN_HEADERS.clear();
        MangaDownloader.DEFAULT_CONN_HEADERS.put("User-Agent", MangaDownloader.userAgent);
        if (Helper.isNotEmptyOrNull(MangaDownloader.cookieString)) {
            MangaDownloader.cookie = new StringBuffer(MangaDownloader.cookieString);
            MangaDownloader.putConnectionHeader("Cookie", MangaDownloader.cookie.toString());
        }
        // DEFAULT_CONN_HEADERS.put("Cache-Control", "no-cache");
        // DEFAULT_CONN_HEADERS.put("Pragma", "no-cache");
    }

    @Override
    public void err(final String message) {
        System.err.println(message);
    }

    @Override
    public void log(final String message) {
        System.out.println(message);
    }

}