com.serphacker.serposcope.task.proxy.ProxyChecker.java Source code

Java tutorial

Introduction

Here is the source code for com.serphacker.serposcope.task.proxy.ProxyChecker.java

Source

/*
 * Serposcope - SEO rank checker https://serposcope.serphacker.com/
 * 
 * Copyright (c) 2016 SERP Hacker
 * @author Pierre Nogues <support@serphacker.com>
 * @license https://opensource.org/licenses/MIT MIT License
 */

package com.serphacker.serposcope.task.proxy;

import com.serphacker.serposcope.db.base.BaseDB;
import com.serphacker.serposcope.models.base.Proxy;
import com.serphacker.serposcope.scraper.http.ScrapClient;
import com.serphacker.serposcope.scraper.http.proxy.ScrapProxy;
import com.serphacker.serposcope.scraper.http.proxy.ScrapProxy;
import java.time.LocalDateTime;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.slf4j.LoggerFactory;

public class ProxyChecker extends Thread {

    private static final org.slf4j.Logger LOG = LoggerFactory.getLogger(ProxyChecker.class);

    static Pattern PATTERN_IP = Pattern.compile("Your IP: ([0-9.:\\[\\]]+)");

    BaseDB db;
    int nThread = 50;
    int timeoutMS = 20000;
    String judgeUrl = "https://proxychecker.serphacker.com/";

    AtomicInteger checked = new AtomicInteger();
    volatile int totalProxies = 0;

    public ProxyChecker(BaseDB db, int threads, int timeoutMS) {
        this.db = db;
        this.nThread = threads;
        this.timeoutMS = timeoutMS;
    }

    public ProxyChecker(BaseDB db) {
        this.db = db;
    }

    @Override
    public void run() {

        LOG.info("starting proxy checking task, threads = {}, timeout in MS = {}", nThread, timeoutMS);

        long start = System.currentTimeMillis();

        List<Proxy> proxies = db.proxy.list();
        if (proxies == null || proxies.isEmpty()) {
            LOG.debug("no proxy to check");
            return;
        }

        totalProxies = proxies.size();

        ExecutorService executor = Executors.newFixedThreadPool(nThread);
        db.proxy.updateStatus(Proxy.Status.UNCHECKED,
                proxies.stream().map((t) -> t.getId()).collect(Collectors.toList()));

        for (Proxy proxy : proxies) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    ScrapClient cli = new ScrapClient();

                    cli.setTimeout(timeoutMS);
                    ScrapProxy scrapProxy = proxy.toScrapProxy();
                    cli.setProxy(scrapProxy);

                    LOG.info("checking {}", scrapProxy);

                    Proxy.Status proxyStatus = Proxy.Status.ERROR;

                    //                    try{Thread.sleep(30000l);}catch(Exception ex){}

                    int httpStatus = cli.get(judgeUrl);
                    if (httpStatus == 200 && cli.getContentAsString() != null) {
                        Matcher matcher = PATTERN_IP.matcher(cli.getContentAsString());
                        if (matcher.find()) {
                            proxy.setRemoteip(matcher.group(1));
                            proxyStatus = Proxy.Status.OK;
                        }
                    }

                    proxy.setStatus(proxyStatus);
                    proxy.setLastCheck(LocalDateTime.now());
                    db.proxy.update(proxy);

                    checked.incrementAndGet();
                }
            });
        }

        executor.shutdown();
        try {
            executor.awaitTermination(1, TimeUnit.HOURS);
        } catch (InterruptedException ex) {
            executor.shutdownNow();
        }
        LOG.info("proxy checking finished in {}",
                DurationFormatUtils.formatDurationHMS(System.currentTimeMillis() - start));
    }

    public int getProgress() {
        return (int) (((float) checked.get() / (float) totalProxies) * 100f);
    }

}