Java tutorial
/* * CompareProductions.java * Created on 2011-5-22; Project to Colt2010; $Id: CompareProductions.java 309 2013-04-25 16:38:44Z tristan $ * * Copyright (c) 2011, Xu Brothers and/or its affiliates. All rights reserved. * Xu Brothers PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ package com.waku.mmdataextract; import java.io.BufferedReader; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.dom4j.Document; import org.dom4j.Element; import org.supercsv.io.CsvListWriter; import org.supercsv.io.CsvMapReader; import org.supercsv.prefs.CsvPreference; import com.waku.common.http.MyHttpClient; import com.waku.common.http.WebRequestEncoder; /** * @versin $Rev: 309 $, $Date: 2013-04-26 00:38:44 +0800 (, 26 2013) $ * @author Jin */ public class CompareProductions { static Logger logger = Logger.getLogger(CompareProductions.class.getName()); private static final String COMPARE_ACTION = "http://shouji.gd.chinamobile.com/gdmobile/mobileCompareAction.do"; private static final List<String> HEAD_LIST = new ArrayList<String>(); private static final List<Map<String, String>> RESULT_LIST = new ArrayList<Map<String, String>>(); private static final String LOG_INDEX = "Get product id add -> "; private static final String NOKIA_N95 = "596"; private static Map<String, String> customIconMap = new HashMap<String, String>(); static { customIconMap.put("/gdmobile/images/custom_no.gif", "?"); customIconMap.put("/gdmobile/images/custom_yes.gif", ""); customIconMap.put("/gdmobile/images/custom_yes1.gif", ""); customIconMap.put("/gdmobile/images/custom_yes2.gif", ""); } private static String urlForCompare(String... ids) { WebRequestEncoder request = new WebRequestEncoder(COMPARE_ACTION); StringBuilder sb = new StringBuilder(); for (String id : ids) { if (id != null) { sb.append(id); sb.append("|"); } } request.addValue("str_id", sb.toString()); return request.getURL(); } private static List<String[]> combineProdIds(List<String> prodIdList) { List<String[]> list = new ArrayList<String[]>(); int i = 0; String[] s = null; for (String prodId : prodIdList) { if (i % 4 == 0) { s = new String[4]; } s[i % 4] = prodId; if (i % 4 == 3) { list.add(s); } i++; } // if the last one is less than 4 if ((i - 1) % 4 < 3) { list.add(s); } return list; } private static List<String> getProdIdListFromLog(String fileName) { InputStream resourceAsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName); BufferedReader br = new BufferedReader(new InputStreamReader(resourceAsStream)); String line = null; List<String> prodIdList = new ArrayList<String>(); try { while ((line = br.readLine()) != null) { if (line.startsWith(LOG_INDEX)) { prodIdList.add(line.substring(line.indexOf(LOG_INDEX) + LOG_INDEX.length())); } } } catch (IOException e) { e.printStackTrace(); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace(); } try { resourceAsStream.close(); } catch (IOException e) { e.printStackTrace(); } } logger.info("Get product id list done! -> "); logger.info(prodIdList); return prodIdList; } public static void main(String[] args) { load("CompareProductions9200.csv"); start(getProdIdListFromLog("ComprehensiveSearch.log"), 9200); } private static void load(String fileName) { InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName); CsvMapReader mReadder = new CsvMapReader(new InputStreamReader(is), CsvPreference.STANDARD_PREFERENCE); String[] csvHeader; try { csvHeader = mReadder.getHeader(true); HEAD_LIST.addAll(Arrays.asList(csvHeader)); // logger.info("Get head line as -> " + HEAD_LIST); Map<String, String> m = null; while ((m = mReadder.read(csvHeader)) != null) { RESULT_LIST.add(m); // logger.info("Get result -> " + m); } } catch (IOException e) { e.printStackTrace(); } finally { try { mReadder.close(); } catch (IOException e) { } try { is.close(); } catch (IOException e) { } } logger.info("Load existing data from file done!"); } public static void start(List<String> prodIdList, int start) { for (int step = start; step < prodIdList.size(); step = step + 100) { List<String> temp = prodIdList.subList(step, (step + 100) > prodIdList.size() ? prodIdList.size() : step + 100); File file = new File("output/CompareProductions" + (step + temp.size()) + ".csv"); // if (file.exists()) { // logger.info("Data already finished in " + file.getName()); // continue; // } logger.info("Started with -> " + temp); for (String[] ids : combineProdIds(temp)) { processCompare(false, ids); } String[] headLine = HEAD_LIST.toArray(new String[0]); List<String[]> prodList = new ArrayList<String[]>(); for (Map<String, String> prodMap : RESULT_LIST) { String[] prod = new String[headLine.length]; for (Map.Entry<String, String> entry : prodMap.entrySet()) { prod[HEAD_LIST.indexOf(entry.getKey())] = entry.getValue(); } for (int i = 0; i < prod.length; i++) { if (prod[i] == null) { prod[i] = "N/A"; } } prodList.add(prod); } CsvListWriter writer = null; try { writer = new CsvListWriter(new FileWriter(file), CsvPreference.STANDARD_PREFERENCE); writer.writeHeader(headLine); for (String[] s : prodList) { writer.write(s); } } catch (IOException e) { e.printStackTrace(); } finally { try { writer.close(); } catch (IOException e) { // ignore } } logger.info("---> Done for " + (step + temp.size())); } logger.info("------------------> Haha, done!"); } @SuppressWarnings("unchecked") private static void processCompare(boolean single, String... ids) { Document document = MyHttpClient.getAsDom4jDoc(urlForCompare(ids)); if (document == null) { if (single) { logger.info("####### Skip dirty productId ---> " + ids[0]); return; } StringBuffer sb = new StringBuffer(); for (String id : ids) { sb.append(id + "|"); } logger.info("Get compare information failed -> " + sb); logger.info("Retry compare each id with NOIKA N95!"); for (String id : ids) { logger.info("Retry compare -> " + id + "|" + NOKIA_N95); processCompare(true, id, NOKIA_N95); } return; } List<Element> headLine = document.selectNodes("//td[@class='sjbj_r_space']"); logger.info("head size = " + headLine.size()); for (Element e : headLine) { String head = e.getText(); if (!HEAD_LIST.contains(head)) { HEAD_LIST.add(head); } } int count = 0; for (String id : ids) { if (id != null) { count++; } } logger.info("count = " + count); List<Element> eList = document.selectNodes("//td[@class='sjbj_l_space']"); logger.info("eList size = " + eList.size()); if (headLine.size() * count != eList.size()) { logger.info("Something wrong here! headLine.size() * count != eList.size()"); logger.info("Try compare each id with NOIKA N95!"); for (String id : ids) { logger.info("Try compare -> " + id + "|" + NOKIA_N95 + "|"); processCompare(true, id, NOKIA_N95); } return; } List<Map<String, String>> list = new ArrayList<Map<String, String>>(); for (int i = 0; i < eList.size(); i++) { String value = eList.get(i).getText(); if (i < count) { list.add(new HashMap<String, String>()); String toFileName = ids[i] + ".gif"; ComprehensiveSearch.saveImage(eList.get(i).element("img").attributeValue("src"), toFileName); value = toFileName; } if (i / count == 2) { // ? value = customIconMap.get(eList.get(i).element("img").attributeValue("src")); } list.get(i % count).put(headLine.get(i / count).getText(), StringUtils.isEmpty(value) ? "N/A" : value); if (single) { i++; // skip the second one } } logger.info(list); RESULT_LIST.addAll(list); } }