com.waku.mmdataextract.CompareProductions.java Source code

Java tutorial

Introduction

Here is the source code for com.waku.mmdataextract.CompareProductions.java

Source

/*
 * CompareProductions.java
 * Created on 2011-5-22; Project to Colt2010; $Id: CompareProductions.java 309 2013-04-25 16:38:44Z tristan $
 * 
 * Copyright (c) 2011, Xu Brothers and/or its affiliates. All rights reserved.
 * Xu Brothers PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */

package com.waku.mmdataextract;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.Element;
import org.supercsv.io.CsvListWriter;
import org.supercsv.io.CsvMapReader;
import org.supercsv.prefs.CsvPreference;

import com.waku.common.http.MyHttpClient;
import com.waku.common.http.WebRequestEncoder;

/**
 * @versin $Rev: 309 $, $Date: 2013-04-26 00:38:44 +0800 (, 26  2013) $
 * @author Jin
 */
public class CompareProductions {

    static Logger logger = Logger.getLogger(CompareProductions.class.getName());

    private static final String COMPARE_ACTION = "http://shouji.gd.chinamobile.com/gdmobile/mobileCompareAction.do";

    private static final List<String> HEAD_LIST = new ArrayList<String>();

    private static final List<Map<String, String>> RESULT_LIST = new ArrayList<Map<String, String>>();

    private static final String LOG_INDEX = "Get product id add -> ";

    private static final String NOKIA_N95 = "596";

    private static Map<String, String> customIconMap = new HashMap<String, String>();

    static {
        customIconMap.put("/gdmobile/images/custom_no.gif", "?");
        customIconMap.put("/gdmobile/images/custom_yes.gif", "");
        customIconMap.put("/gdmobile/images/custom_yes1.gif", "");
        customIconMap.put("/gdmobile/images/custom_yes2.gif", "");
    }

    private static String urlForCompare(String... ids) {
        WebRequestEncoder request = new WebRequestEncoder(COMPARE_ACTION);
        StringBuilder sb = new StringBuilder();
        for (String id : ids) {
            if (id != null) {
                sb.append(id);
                sb.append("|");
            }
        }
        request.addValue("str_id", sb.toString());
        return request.getURL();
    }

    private static List<String[]> combineProdIds(List<String> prodIdList) {
        List<String[]> list = new ArrayList<String[]>();
        int i = 0;
        String[] s = null;
        for (String prodId : prodIdList) {
            if (i % 4 == 0) {
                s = new String[4];
            }
            s[i % 4] = prodId;
            if (i % 4 == 3) {
                list.add(s);
            }
            i++;
        }
        // if the last one is less than 4
        if ((i - 1) % 4 < 3) {
            list.add(s);
        }
        return list;
    }

    private static List<String> getProdIdListFromLog(String fileName) {
        InputStream resourceAsStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName);
        BufferedReader br = new BufferedReader(new InputStreamReader(resourceAsStream));
        String line = null;
        List<String> prodIdList = new ArrayList<String>();
        try {
            while ((line = br.readLine()) != null) {
                if (line.startsWith(LOG_INDEX)) {
                    prodIdList.add(line.substring(line.indexOf(LOG_INDEX) + LOG_INDEX.length()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                resourceAsStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        logger.info("Get product id list done! -> ");
        logger.info(prodIdList);
        return prodIdList;
    }

    public static void main(String[] args) {
        load("CompareProductions9200.csv");
        start(getProdIdListFromLog("ComprehensiveSearch.log"), 9200);
    }

    private static void load(String fileName) {
        InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(fileName);
        CsvMapReader mReadder = new CsvMapReader(new InputStreamReader(is), CsvPreference.STANDARD_PREFERENCE);
        String[] csvHeader;
        try {
            csvHeader = mReadder.getHeader(true);
            HEAD_LIST.addAll(Arrays.asList(csvHeader));
            // logger.info("Get head line as -> " + HEAD_LIST);
            Map<String, String> m = null;
            while ((m = mReadder.read(csvHeader)) != null) {
                RESULT_LIST.add(m);
                // logger.info("Get result -> " + m);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                mReadder.close();
            } catch (IOException e) {
            }
            try {
                is.close();
            } catch (IOException e) {
            }
        }
        logger.info("Load existing data from file done!");
    }

    public static void start(List<String> prodIdList, int start) {
        for (int step = start; step < prodIdList.size(); step = step + 100) {
            List<String> temp = prodIdList.subList(step,
                    (step + 100) > prodIdList.size() ? prodIdList.size() : step + 100);
            File file = new File("output/CompareProductions" + (step + temp.size()) + ".csv");
            // if (file.exists()) {
            // logger.info("Data already finished in " + file.getName());
            // continue;
            // }
            logger.info("Started with -> " + temp);

            for (String[] ids : combineProdIds(temp)) {
                processCompare(false, ids);
            }

            String[] headLine = HEAD_LIST.toArray(new String[0]);
            List<String[]> prodList = new ArrayList<String[]>();

            for (Map<String, String> prodMap : RESULT_LIST) {
                String[] prod = new String[headLine.length];
                for (Map.Entry<String, String> entry : prodMap.entrySet()) {
                    prod[HEAD_LIST.indexOf(entry.getKey())] = entry.getValue();
                }
                for (int i = 0; i < prod.length; i++) {
                    if (prod[i] == null) {
                        prod[i] = "N/A";
                    }
                }
                prodList.add(prod);
            }

            CsvListWriter writer = null;
            try {
                writer = new CsvListWriter(new FileWriter(file), CsvPreference.STANDARD_PREFERENCE);
                writer.writeHeader(headLine);
                for (String[] s : prodList) {
                    writer.write(s);
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    writer.close();
                } catch (IOException e) {
                    // ignore
                }
            }
            logger.info("---> Done for " + (step + temp.size()));
        }
        logger.info("------------------> Haha, done!");
    }

    @SuppressWarnings("unchecked")
    private static void processCompare(boolean single, String... ids) {
        Document document = MyHttpClient.getAsDom4jDoc(urlForCompare(ids));
        if (document == null) {
            if (single) {
                logger.info("####### Skip dirty productId ---> " + ids[0]);
                return;
            }
            StringBuffer sb = new StringBuffer();
            for (String id : ids) {
                sb.append(id + "|");
            }
            logger.info("Get compare information failed -> " + sb);
            logger.info("Retry compare each id with NOIKA N95!");
            for (String id : ids) {
                logger.info("Retry compare -> " + id + "|" + NOKIA_N95);
                processCompare(true, id, NOKIA_N95);
            }
            return;
        }

        List<Element> headLine = document.selectNodes("//td[@class='sjbj_r_space']");
        logger.info("head size = " + headLine.size());

        for (Element e : headLine) {
            String head = e.getText();
            if (!HEAD_LIST.contains(head)) {
                HEAD_LIST.add(head);
            }
        }

        int count = 0;
        for (String id : ids) {
            if (id != null) {
                count++;
            }
        }
        logger.info("count = " + count);

        List<Element> eList = document.selectNodes("//td[@class='sjbj_l_space']");
        logger.info("eList size = " + eList.size());

        if (headLine.size() * count != eList.size()) {
            logger.info("Something wrong here! headLine.size() * count != eList.size()");
            logger.info("Try compare each id with NOIKA N95!");
            for (String id : ids) {
                logger.info("Try compare -> " + id + "|" + NOKIA_N95 + "|");
                processCompare(true, id, NOKIA_N95);
            }
            return;
        }

        List<Map<String, String>> list = new ArrayList<Map<String, String>>();
        for (int i = 0; i < eList.size(); i++) {
            String value = eList.get(i).getText();
            if (i < count) {
                list.add(new HashMap<String, String>());
                String toFileName = ids[i] + ".gif";
                ComprehensiveSearch.saveImage(eList.get(i).element("img").attributeValue("src"), toFileName);
                value = toFileName;
            }
            if (i / count == 2) { // ?
                value = customIconMap.get(eList.get(i).element("img").attributeValue("src"));
            }
            list.get(i % count).put(headLine.get(i / count).getText(), StringUtils.isEmpty(value) ? "N/A" : value);
            if (single) {
                i++; // skip the second one
            }
        }
        logger.info(list);
        RESULT_LIST.addAll(list);
    }

}