com.isoftstone.proxy.api.sdk.KuaidailiProxySDK.java Source code

Java tutorial

Introduction

Here is the source code for com.isoftstone.proxy.api.sdk.KuaidailiProxySDK.java

Source

/*
 * @(#)KuaidailiProxySDK.java 2015-2-27 ?2:00:19 proxy_pool Copyright 2015
 * Isoftstone, Inc. All rights reserved. ISOFTSTONE PROPRIETARY/CONFIDENTIAL.
 * Use is subject to license terms.
 */
package com.isoftstone.proxy.api.sdk;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.isoftstone.proxy.api.model.CustomException;
import com.isoftstone.proxy.api.model.ProxyVo;

/**
 * KuaidailiProxySDK
 * @author danhb
 * @date  2015-2-27
 * @version 1.0
 *
 */
public class KuaidailiProxySDK extends ProxySDK {

    private static final Log LOG = LogFactory.getLog(KuaidailiProxySDK.class);

    /*
     * (non-Javadoc)
     * 
     * @see com.isoftstone.proxy.api.sdk.ProxySDK#getProxyList()
     */
    @Override
    public List<ProxyVo> getProxyList() throws CustomException {
        return getProxyListByCrawler();
    }

    public List<ProxyVo> getProxyListByCrawler() throws CustomException {
        //        String html = client.getProxyByHttpClient("http://www.kuaidaili.com/free/");
        //        Document doc = Jsoup.parse(html);
        //        System.out.println(html);
        List<ProxyVo> proxyList = new ArrayList<ProxyVo>();
        try {
            Document doc = Jsoup.connect("http://www.kuaidaili.com/free/").get();
            //          Elements eles = doc.select("#listnav li a:last-child");
            int size = 5;
            //        if (eles.size() > 20) {
            //            size = 20;
            //        } else {
            //            size = eles.size();
            //        }
            proxyList.addAll(parseHtml(doc));
            for (int i = 2; i < size; i++) {
                //            String html = eles.get(i).absUrl("href");
                String html = "http://www.kuaidaili.com/free/inha/" + i + "/";
                doc = Jsoup.connect(html).get();
                proxyList.addAll(parseHtml(doc));
            }
        } catch (IOException e) {
            LOG.error("jsoup IO.", e);
        }
        return proxyList;
    }

    public List<ProxyVo> getProxyFromReader(Reader in) throws CustomException {
        List<ProxyVo> proxyList = new ArrayList<ProxyVo>();
        BufferedReader br = new BufferedReader(in);
        String str = null;
        try {
            while ((str = br.readLine()) != null) {
                String[] proxy = str.split(":");
                ProxyVo proxyVo = new ProxyVo();
                proxyVo.setProxyIp(proxy[0]);
                proxyVo.setProxyPort(Integer.valueOf(proxy[1]));
                proxyList.add(proxyVo);
            }
        } catch (NumberFormatException e) {
            LOG.error("?.", e);
        } catch (IOException e) {
            LOG.error("?.", e);
        } finally {
            if (br != null) {
                try {
                    br.close();
                } catch (IOException e) {
                    LOG.error("?.", e);
                }
            }
        }
        return proxyList;
    }

    private List<ProxyVo> parseHtml(Document doc) {
        Elements eles = doc.select("#list table tr");
        List<ProxyVo> proxyList = new ArrayList<ProxyVo>();
        for (int i = 1; i < eles.size(); i++) {
            Element ele = eles.get(i);
            Element ipEle = ele.select("td:eq(0)").first();
            Element portEle = ele.select("td:eq(1)").first();
            ProxyVo proxyVo = new ProxyVo();
            proxyVo.setProxyIp(ipEle.text());
            proxyVo.setProxyPort(Integer.parseInt(portEle.text()));
            proxyList.add(proxyVo);
        }
        return proxyList;
    }

}