org.apdplat.superword.tools.ProxyIp.java Source code

Java tutorial

Introduction

Here is the source code for org.apdplat.superword.tools.ProxyIp.java

Source

/*
 * *
 *  *
 *  * APDPlat - Application Product Development Platform
 *  * Copyright (c) 2013, ??, yang-shangchuan@qq.com
 *  *
 *  * This program is free software: you can redistribute it and/or modify
 *  * it under the terms of the GNU General Public License as published by
 *  * the Free Software Foundation, either version 3 of the License, or
 *  * (at your option) any later version.
 *  *
 *  * This program is distributed in the hope that it will be useful,
 *  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  * GNU General Public License for more details.
 *  *
 *  * You should have received a copy of the GNU General Public License
 *  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *  *
 *
 */

package org.apdplat.superword.tools;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/**
 *
 * IP??????
 * ???
 *
 * @author ??
 */
public class ProxyIp {
    private ProxyIp() {
    }

    private static final Logger LOGGER = LoggerFactory.getLogger(ProxyIp.class);
    private static final String ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    private static final String ENCODING = "gzip, deflate";
    private static final String LANGUAGE = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
    private static final String CONNECTION = "keep-alive";
    private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:36.0) Gecko/20100101 Firefox/36.0";
    private static volatile boolean isSwitching = false;
    private static volatile long lastSwitchTime = 0l;
    private static final WebClient WEB_CLIENT = new WebClient(BrowserVersion.INTERNET_EXPLORER_11);
    private static final Pattern IP_PATTERN = Pattern.compile(
            "((?:(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d)))\\.){3}(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d))))");
    //??IP
    private static final List<String> IPS = new Vector<>();
    private static volatile int currentIpIndex = 0;
    private static volatile boolean detect = true;
    //
    private static volatile int detectInterval = 300000;
    private static final Path PROXY_IPS_FILE = Paths.get("src/main/resources/proxy_ips.txt");
    //IP?
    private static String previousIp = getCurrentIp();
    //??IP?
    private static final Set<String> EXCELLENT_IPS = new ConcurrentSkipListSet<>();
    private static final Set<String> EXCELLENT_USA_IPS = new ConcurrentSkipListSet<>();
    //???IP?
    private static final Set<String> NORMAL_IPS = new ConcurrentSkipListSet<>();
    private static final Path EXCELLENT_PROXY_IPS_FILE = Paths.get("src/main/resources/proxy_ips_excellent.txt");;
    private static final Path EXCELLENT_USA_PROXY_IPS_FILE = Paths
            .get("src/main/resources/proxy_ips_excellent_usa.txt");
    private static final Path NORMAL_PROXY_IPS_FILE = Paths.get("src/main/resources/proxy_ips_normal.txt");
    static {
        Set<String> ipSet = new HashSet<>();
        //?
        try {
            if (Files.notExists(PROXY_IPS_FILE.getParent())) {
                PROXY_IPS_FILE.getParent().toFile().mkdirs();
            }
            if (Files.notExists(PROXY_IPS_FILE)) {
                PROXY_IPS_FILE.toFile().createNewFile();
            }
            if (Files.notExists(EXCELLENT_PROXY_IPS_FILE)) {
                EXCELLENT_PROXY_IPS_FILE.toFile().createNewFile();
            }
            if (Files.notExists(EXCELLENT_USA_PROXY_IPS_FILE)) {
                EXCELLENT_USA_PROXY_IPS_FILE.toFile().createNewFile();
            }
            if (Files.notExists(NORMAL_PROXY_IPS_FILE)) {
                NORMAL_PROXY_IPS_FILE.toFile().createNewFile();
            }
            LOGGER.info("?IP" + PROXY_IPS_FILE.toAbsolutePath().toString());
            ipSet.addAll(Files.readAllLines(PROXY_IPS_FILE));
            ipSet.addAll(Files.readAllLines(EXCELLENT_PROXY_IPS_FILE));
        } catch (Exception e) {
            LOGGER.error("??IP", e);
        }
        if (ipSet.isEmpty()) {
            //??IP?
            ipSet.addAll(getProxyIps());
        }
        IPS.addAll(ipSet);
        LOGGER.info("IP(" + IPS.size() + ")");
        AtomicInteger i = new AtomicInteger();
        IPS.forEach(ip -> LOGGER.info(i.incrementAndGet() + "?" + ip));

        new Thread(() -> {
            //
            int count = 0;
            while (detect) {
                try {
                    save();
                    if (count % 10 == 9) {
                        //?IP??
                        toNewIp();
                    }
                    Thread.sleep(detectInterval);
                    //?IP
                    getProxyIps().forEach(ip -> {
                        if (!IPS.contains(ip)) {
                            IPS.add(ip);
                            LOGGER.info("??IP" + ip);
                        }
                    });
                    count++;
                } catch (Exception e) {
                    LOGGER.error("?IP", e);
                }
            }
        }).start();
    }

    public static void stopDetect() {
        detect = false;
    }

    public static void startDetect() {
        detect = true;
    }

    private static void save() {
        try {
            //??IP??
            Set<String> ips = new ConcurrentSkipListSet<>();
            ips.addAll(Files.readAllLines(PROXY_IPS_FILE));
            ips.addAll(IPS);
            //???IP
            ips.removeAll(NORMAL_IPS);
            Files.write(PROXY_IPS_FILE, toVerify(ips));
            LOGGER.info("" + ips.size() + "??IP?");
            Set<String> excellentIps = new HashSet<>();
            excellentIps.addAll(Files.readAllLines(EXCELLENT_PROXY_IPS_FILE));
            excellentIps.addAll(EXCELLENT_IPS);
            Files.write(EXCELLENT_PROXY_IPS_FILE, toVerify(excellentIps));
            LOGGER.info("" + excellentIps.size() + "????IP?");
            Set<String> excellentUsaIps = new HashSet<>();
            excellentUsaIps.addAll(Files.readAllLines(EXCELLENT_USA_PROXY_IPS_FILE));
            excellentUsaIps.addAll(EXCELLENT_USA_IPS);
            Files.write(EXCELLENT_USA_PROXY_IPS_FILE, toVerify(excellentUsaIps));
            LOGGER.info("" + excellentUsaIps.size() + "????IP?");
            Set<String> normalIps = new HashSet<>();
            normalIps.addAll(Files.readAllLines(NORMAL_PROXY_IPS_FILE));
            normalIps.addAll(NORMAL_IPS);
            Files.write(NORMAL_PROXY_IPS_FILE, toVerify(normalIps));
            LOGGER.info("" + normalIps.size() + "?????IP?");
        } catch (Exception e) {
            LOGGER.error("?", e);
        }
    }

    private static List<String> toVerify(Set<String> ips) {
        AtomicInteger i = new AtomicInteger();
        AtomicInteger f = new AtomicInteger();
        List<String> list = ips.parallelStream().filter(ip -> {
            LOGGER.info("?" + ips.size() + "/" + i.incrementAndGet());
            String[] attr = ip.split(":");
            if (verify(attr[0], Integer.parseInt(attr[1]))) {
                return true;
            }
            IPS.remove(ip);
            f.incrementAndGet();
            return false;
        }).sorted().collect(Collectors.toList());
        LOGGER.info("??IP" + (ips.size() - f.get()));
        LOGGER.info("?IP" + f.get());
        return list;
    }

    private static String getNextProxyIp() {
        int index = currentIpIndex % IPS.size();
        currentIpIndex++;
        return IPS.get(index);
    }

    public static boolean toNewIp() {
        long requestSwitchTime = System.currentTimeMillis();
        LOGGER.info(Thread.currentThread() + "???");
        synchronized (ProxyIp.class) {
            if (isSwitching) {
                LOGGER.info(Thread.currentThread()
                        + "????????");
                try {
                    ProxyIp.class.wait();
                } catch (InterruptedException e) {
                    LOGGER.error(e.getMessage(), e);
                }
                LOGGER.info(Thread.currentThread() + "????");
                return true;
            }
            isSwitching = true;
        }
        //???
        //????????
        if (requestSwitchTime <= lastSwitchTime) {
            LOGGER.info("?");
            isSwitching = false;
            return true;
        }
        LOGGER.info(Thread.currentThread() + "???");
        long start = System.currentTimeMillis();
        String proxyIp = useNewProxyIp();
        String currentIp = null;
        int times = 0;
        //?IP??IP?IP
        //??
        while ((currentIp = getCurrentIp()).equals(previousIp) && (times++) < Integer.MAX_VALUE) {
            NORMAL_IPS.add(proxyIp);
            IPS.remove(proxyIp);
            proxyIp = useNewProxyIp();
        }
        if (!currentIp.equals(previousIp)) {
            previousIp = currentIp;
            EXCELLENT_IPS.add(proxyIp);
            LOGGER.info(Thread.currentThread() + "????");
            LOGGER.info(Thread.currentThread() + "??" + (System.currentTimeMillis() - start)
                    + "");
            //?
            synchronized (ProxyIp.class) {
                ProxyIp.class.notifyAll();
            }
            isSwitching = false;
            lastSwitchTime = System.currentTimeMillis();
            return true;
        }
        NORMAL_IPS.add(proxyIp);
        IPS.remove(proxyIp);
        LOGGER.info(Thread.currentThread() + "???");
        LOGGER.info(
                Thread.currentThread() + "??" + (System.currentTimeMillis() - start) + "");
        //?
        synchronized (ProxyIp.class) {
            ProxyIp.class.notifyAll();
        }
        isSwitching = false;
        return false;
    }

    private static String useNewProxyIp() {
        String newProxy = getNextProxyIp();
        String[] attr = newProxy.split(":");
        System.setProperty("proxySet", "true");
        System.setProperty("http.proxyHost", attr[0]);
        System.setProperty("http.proxyPort", attr[1]);
        LOGGER.info("??" + newProxy);
        return newProxy;
    }

    /**
     * ??IP?????IP
     * @param host
     * @param port
     * @return
     */
    public static boolean verify(String host, int port) {
        try {
            String url = "http://apdplat.org";
            Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(host, port));
            HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(proxy);
            connection.setConnectTimeout(10000);
            connection.setReadTimeout(10000);
            connection.setUseCaches(false);
            BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            StringBuilder html = new StringBuilder();
            String line = null;
            while ((line = reader.readLine()) != null) {
                html.append(line);
            }
            LOGGER.info("HTML" + html);
            if (html.toString().contains("APDPlat???")) {
                LOGGER.info("?IP??" + host + ":" + port);
                return true;
            }
        } catch (Exception e) {
            LOGGER.error(e.getMessage());
        }
        LOGGER.info("?IP?" + host + ":" + port);
        return false;
    }

    /**
     * ip138IP
     * @return
     */
    public static String getCurrentIp() {
        try {
            String url = "http://1111.ip138.com/ic.asp?timestamp=" + System.nanoTime();
            String text = Jsoup.connect(url).header("Accept", ACCEPT).header("Accept-Encoding", ENCODING)
                    .header("Accept-Language", LANGUAGE).header("Connection", CONNECTION)
                    .header("Host", "1111.ip138.com").header("Referer", "http://ip138.com/")
                    .header("User-Agent", USER_AGENT).ignoreContentType(true).timeout(5000).get().text();
            LOGGER.info("IP?" + text);
            Matcher matcher = IP_PATTERN.matcher(text);
            if (matcher.find()) {
                String ip = matcher.group();
                LOGGER.info("IP?" + ip);
                if (text.contains("")) {
                    EXCELLENT_USA_IPS
                            .add(System.getProperty("http.proxyHost") + ":" + System.getProperty("http.proxyPort"));
                }
                return ip;
            }
        } catch (Exception e) {
            LOGGER.error(e.getMessage());
        }
        LOGGER.info("IP??IP?" + previousIp);
        return previousIp;
    }

    private static Set<String> getProxyIps() {
        Set<String> ips = new HashSet<>();
        ips.addAll(getProxyIpOne());
        ips.addAll(getProxyIpTwo());
        ips.addAll(getProxyIpThree());
        ips.addAll(getProxyIpFour());
        return ips;
    }

    private static List<String> getProxyIpOne() {
        String url = "http://proxy.goubanjia.com/?timestamp=" + System.nanoTime();
        String cssPath = "html body div.wrap.fullwidth div#content div#post-2.post-2.page.type-page.status-publish.hentry div.entry.entry-content div#list table.table tbody tr";
        return getProxyIp(url, cssPath);
    }

    private static List<String> getProxyIpTwo() {
        String url = "http://ip.qiaodm.com/?timestamp=" + System.nanoTime();
        String cssPath = "html body div#main_container div.inner table.iplist tbody tr";
        return getProxyIp(url, cssPath);
    }

    private static List<String> getProxyIp(String url, String cssPath) {
        List<String> ips = new ArrayList<>();
        try {
            String html = ((HtmlPage) WEB_CLIENT.getPage(url)).getBody().asXml();
            //LOGGER.info("html"+html);
            Document doc = Jsoup.parse(html);
            Elements elements = doc.select(cssPath);
            elements.forEach(element -> {
                try {
                    Elements tds = element.children();
                    String ip = null;
                    int port = 0;
                    if (tds.size() > 1) {
                        Element ele = tds.get(0);
                        ip = getIps(ele);
                        String text = tds.get(1).text();
                        LOGGER.info("?" + text + " -> " + tds.get(1).outerHtml());
                        port = Integer.parseInt(text);
                    }
                    if (ip != null && port > 0) {
                        LOGGER.info("?IP" + ip + "?" + port);
                        if (verify(ip, port)) {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                            ips.add(ip + ":" + port);
                        } else {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("?IP", e);
                }
            });
        } catch (Exception e) {
            LOGGER.error("?IP", e);
        }
        return ips;
    }

    private static List<String> getProxyIpThree() {
        List<String> ips = new ArrayList<>();
        for (int i = 1; i <= 10; i++) {
            ips.addAll(getProxyIpThree(i));
        }
        return ips;
    }

    private static List<String> getProxyIpThree(int page) {
        List<String> ips = new ArrayList<>();
        try {
            String url = "http://www.kuaidaili.com/proxylist/" + page;
            String html = ((HtmlPage) WEB_CLIENT.getPage(url)).getBody().asXml();
            //LOGGER.info("html"+html);
            Document doc = Jsoup.parse(html);
            Elements elements = doc
                    .select("html body div#container div#list table.table.table-bordered.table-striped tbody tr");
            elements.forEach(element -> {
                try {
                    Elements tds = element.children();
                    String ip = null;
                    int port = 0;
                    if (tds.size() > 1) {
                        ip = tds.get(0).text();
                        String text = tds.get(1).text();
                        LOGGER.info("IP" + ip);
                        LOGGER.info("?" + text);
                        Matcher matcher = IP_PATTERN.matcher(ip.toString());
                        if (matcher.find()) {
                            ip = matcher.group();
                            LOGGER.info("ip??" + ip);
                        } else {
                            LOGGER.info("ip??" + ip);
                            ip = null;
                        }
                        try {
                            port = Integer.parseInt(text);
                            LOGGER.info("??" + port);
                        } catch (Exception e) {
                            LOGGER.info("??" + port);
                        }
                    }
                    if (ip != null && port > 0) {
                        LOGGER.info("?IP" + ip + "?" + port);
                        if (verify(ip, port)) {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                            ips.add(ip + ":" + port);
                        } else {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("?IP", e);
                }
            });
        } catch (Exception e) {
            LOGGER.error("?IP", e);
        }
        return ips;
    }

    private static List<String> getProxyIpFour() {
        List<String> ips = new ArrayList<>();
        for (int i = 1; i <= 10; i++) {
            ips.addAll(getProxyIpFour(i));
        }
        return ips;
    }

    private static List<String> getProxyIpFour(int page) {
        List<String> ips = new ArrayList<>();
        try {
            String url = "http://www.kxdaili.com/ipList/" + page + ".html";
            String html = ((HtmlPage) WEB_CLIENT.getPage(url)).getBody().asXml();
            //LOGGER.info("html"+html);
            Document doc = Jsoup.parse(html);
            Elements elements = doc
                    .select("html body#nav_btn01 div.tab_c_box.buy_tab_box table.ui.table.segment tbody tr");
            elements.forEach(element -> {
                try {
                    Elements tds = element.children();
                    String ip = null;
                    int port = 0;
                    if (tds.size() > 1) {
                        ip = tds.get(0).text();
                        String text = tds.get(1).text();
                        LOGGER.info("IP" + ip);
                        LOGGER.info("?" + text);
                        Matcher matcher = IP_PATTERN.matcher(ip.toString());
                        if (matcher.find()) {
                            ip = matcher.group();
                            LOGGER.info("ip??" + ip);
                        } else {
                            LOGGER.info("ip??" + ip);
                            ip = null;
                        }
                        try {
                            port = Integer.parseInt(text);
                            LOGGER.info("??" + port);
                        } catch (Exception e) {
                            LOGGER.info("??" + port);
                        }
                    }
                    if (ip != null && port > 0) {
                        LOGGER.info("?IP" + ip + "?" + port);
                        if (verify(ip, port)) {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                            ips.add(ip + ":" + port);
                        } else {
                            LOGGER.info("IP" + ip + "?" + port + "?");
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("?IP", e);
                }
            });
        } catch (Exception e) {
            LOGGER.error("?IP", e);
        }
        return ips;
    }

    private static String getIps(Element element) {
        StringBuilder ip = new StringBuilder();
        Elements all = element.children();
        LOGGER.info("");
        LOGGER.info("?IP?" + element.text());
        AtomicInteger count = new AtomicInteger();
        all.forEach(ele -> {
            String html = ele.outerHtml();
            LOGGER.info(count.incrementAndGet() + "?" + "HTML" + html.replaceAll("[\n\r]", ""));
            String text = ele.text();
            if (ele.hasAttr("style")
                    && (ele.attr("style").equals("display: none;") || ele.attr("style").equals("display:none;"))) {
                LOGGER.info("?" + text);
            } else {
                if (StringUtils.isNotBlank(text)) {
                    LOGGER.info("?" + text);
                    ip.append(text);
                } else {
                    LOGGER.info("");
                }
            }
        });
        LOGGER.info("----------------------------------------------------------------");
        LOGGER.info("?ip: " + ip);
        LOGGER.info("----------------------------------------------------------------");
        Matcher matcher = IP_PATTERN.matcher(ip.toString());
        if (matcher.find()) {
            String _ip = matcher.group();
            LOGGER.info("ip??" + _ip);
            return _ip;
        } else {
            LOGGER.info("ip??" + ip);
        }
        return null;
    }

    public static void main(String[] args) {
        //?IP????1
        detectInterval = 1000;
        while (true) {
            toNewIp();
        }
    }
}