Java tutorial
/* * IMEI.java * Created on 2011-2-27; Project to Colt2010; $Id: IMEI.java 309 2013-04-25 16:38:44Z tristan $ * * Copyright (c) 2011, Xu Brothers and/or its affiliates. All rights reserved. * Xu Brothers PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package com.waku.mmdataextract; import java.io.File; import java.io.FileWriter; import java.io.InputStream; import java.util.List; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.htmlunit.HtmlUnitDriver; import com.waku.common.http.HttpCaller; /** * @versin $Rev: 309 $, $Date: 2013-04-26 00:38:44 +0800 (, 26 2013) $ * @author Jin */ public class IMEI { private static final String PRODUCT_DETAIL_URL = "http://shouji.gd.chinamobile.com/gdmobile/productDetail/"; private static final String SEARCH_ACTION_PER_BRD = "searchImeiAction.do?flag=typebrand&typeid=1&brandid="; private static final String SEARCH_ACTION_PER_PAGE = "searchImeiAction.do?flag=brand¤tPage="; private static String urlFor(int i, String searchvalue) { return PRODUCT_DETAIL_URL + SEARCH_ACTION_PER_PAGE + i + "&searchvalue=" + searchvalue; } private static String urlFor(String bId) { return PRODUCT_DETAIL_URL + SEARCH_ACTION_PER_BRD + bId; } @SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { WebDriver driver = new HtmlUnitDriver(); InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream("Brand.xml"); List<Element> brands = new SAXReader().read(in).selectNodes("/brand/option"); for (Element brand : brands) { String bId = brand.attributeValue("value"); FileWriter fw = new FileWriter(new File("output/" + brand.getText() + ".csv")); try { Document doc = HttpCaller.httpGetAsXMLDoc(urlFor(bId)); List<Element> products = doc.selectNodes("/products/product"); for (Element e : products) { String qString = e.attributeValue("id"); driver.get(urlFor(1, qString)); int pageNumber = Integer .parseInt(driver.findElements(By.xpath("//span[@class='font_b2']")).get(1).getText()); if (pageNumber >= 1) { for (int i = 1; i <= pageNumber; i++) { readForEachPage(fw, driver, urlFor(i, qString)); } } } fw.close(); } catch (RuntimeException e) { System.out.println("Skipped " + brand.getText()); fw.write("Skipped!"); fw.close(); } } } private static void readForEachPage(FileWriter fw, WebDriver driver, String url) throws Exception { try { driver.get(url); WebElement table = driver.findElement(By.xpath("//table[@bgcolor='BFBEC3']")); List<WebElement> items = table.findElements(By.xpath("./tbody/tr")); for (int i = 1; i < items.size() - 1; i++) { List<WebElement> cols = items.get(i).findElements(By.tagName("td")); for (WebElement col : cols) { System.out.print(col.getText() + ","); fw.write(col.getText() + ","); } System.out.println(); fw.write("\n"); } } catch (Exception e) { System.out.println("---------Retry--------------------------"); System.out.println(e.getMessage()); System.out.println("---------Retry---------------------------"); readForEachPage(fw, driver, url); } } }