Java tutorial
/* * @(#)KuaidailiProxySDK.java 2015-2-27 ?2:00:19 proxy_pool Copyright 2015 * Isoftstone, Inc. All rights reserved. ISOFTSTONE PROPRIETARY/CONFIDENTIAL. * Use is subject to license terms. */ package com.isoftstone.proxy.api.sdk; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.isoftstone.proxy.api.model.CustomException; import com.isoftstone.proxy.api.model.ProxyVo; /** * KuaidailiProxySDK * @author danhb * @date 2015-2-27 * @version 1.0 * */ public class KuaidailiProxySDK extends ProxySDK { private static final Log LOG = LogFactory.getLog(KuaidailiProxySDK.class); /* * (non-Javadoc) * * @see com.isoftstone.proxy.api.sdk.ProxySDK#getProxyList() */ @Override public List<ProxyVo> getProxyList() throws CustomException { return getProxyListByCrawler(); } public List<ProxyVo> getProxyListByCrawler() throws CustomException { // String html = client.getProxyByHttpClient("http://www.kuaidaili.com/free/"); // Document doc = Jsoup.parse(html); // System.out.println(html); List<ProxyVo> proxyList = new ArrayList<ProxyVo>(); try { Document doc = Jsoup.connect("http://www.kuaidaili.com/free/").get(); // Elements eles = doc.select("#listnav li a:last-child"); int size = 5; // if (eles.size() > 20) { // size = 20; // } else { // size = eles.size(); // } proxyList.addAll(parseHtml(doc)); for (int i = 2; i < size; i++) { // String html = eles.get(i).absUrl("href"); String html = "http://www.kuaidaili.com/free/inha/" + i + "/"; doc = Jsoup.connect(html).get(); proxyList.addAll(parseHtml(doc)); } } catch (IOException e) { LOG.error("jsoup IO.", e); } return proxyList; } public List<ProxyVo> getProxyFromReader(Reader in) throws CustomException { List<ProxyVo> proxyList = new ArrayList<ProxyVo>(); BufferedReader br = new BufferedReader(in); String str = null; try { while ((str = br.readLine()) != null) { String[] proxy = str.split(":"); ProxyVo proxyVo = new ProxyVo(); proxyVo.setProxyIp(proxy[0]); proxyVo.setProxyPort(Integer.valueOf(proxy[1])); proxyList.add(proxyVo); } } catch (NumberFormatException e) { LOG.error("?.", e); } catch (IOException e) { LOG.error("?.", e); } finally { if (br != null) { try { br.close(); } catch (IOException e) { LOG.error("?.", e); } } } return proxyList; } private List<ProxyVo> parseHtml(Document doc) { Elements eles = doc.select("#list table tr"); List<ProxyVo> proxyList = new ArrayList<ProxyVo>(); for (int i = 1; i < eles.size(); i++) { Element ele = eles.get(i); Element ipEle = ele.select("td:eq(0)").first(); Element portEle = ele.select("td:eq(1)").first(); ProxyVo proxyVo = new ProxyVo(); proxyVo.setProxyIp(ipEle.text()); proxyVo.setProxyPort(Integer.parseInt(portEle.text())); proxyList.add(proxyVo); } return proxyList; } }