crawlers.Xinh.java Source code

Java tutorial

Introduction

Here is the source code for crawlers.Xinh.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package crawlers;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.nct.framework.common.LogUtil;
import com.nct.framework.util.JSONUtil;
import databaseUtils.DatabaseServiceUtils;
import entities.DB.AppImageEnt;
import entities.crawlEnt.XinhImageEnt;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

/**
 *
 * @author liempt
 */
public class Xinh {
    private static final Logger logger = LogUtil.getLogger(Xinh.class);

    public static void main(String[] args) {
        System.out.println("Crawl Data Xinh!");
        logger.fatal("Crawl Data Xinh!");
        int pageSize = 120;
        String linkXinhCategory = "http://vietmkt.com/xinhxinh/?op=categories";
        String linkHome = "http://vietmkt.com/xinhxinh/?op=home&page=%s&pagesize=%s&device=ipad";
        String linkCategory = "http://vietmkt.com/xinhxinh/?op=getListCate&ID=%s&page=%s&pagesize=%s&device=ipad";

        try {
            //Get Home
            boolean isLoad = true;
            int i = 0;
            while (isLoad) {
                i = (i + 1);
                System.out.println("[Home] Get Image {" + pageSize + "} -> " + i);
                logger.fatal("[Home] Get Image {" + pageSize + "} -> " + i);
                String urlHome = String.format(linkHome, i, pageSize);
                isLoad = saveImageByUrl(urlHome, "Xinh,XHome");
            }

            //Get All Category
            JsonObject jsonObjCategory = readJsonObjectFromUrl(linkXinhCategory);
            if (jsonObjCategory != null) {
                JsonElement jsonElementErrorCode = jsonObjCategory.get("ErrorCode");
                if (jsonElementErrorCode != null && jsonElementErrorCode.getAsInt() == 0) {
                    JsonElement jsonElementLoadMore = jsonObjCategory.get("LoadMore");
                    JsonArray jsonElementData = jsonObjCategory.get("DATA").getAsJsonArray();
                    if (jsonElementData != null && jsonElementData.size() > 0) {
                        for (JsonElement tmpJson : jsonElementData) {
                            String catName = tmpJson.getAsJsonObject().get("NAME_CAT").getAsString();
                            String catKey = tmpJson.getAsJsonObject().get("link").getAsString();

                            isLoad = true;
                            i = 0;
                            while (isLoad) {
                                i = (i + 1);
                                System.out.println("[" + catKey + "] Get Image {" + pageSize + "} -> " + i);
                                logger.fatal("[" + catKey + "] Get Image {" + pageSize + "} -> " + i);
                                String urlCategory = String.format(linkCategory, catKey, i, pageSize);
                                isLoad = saveImageByUrl(urlCategory, "Xinh," + catName + "," + catKey);
                            }
                        }
                    }
                }
            }
            System.exit(0);

            //            System.out.println(YoutubeChannel.getChannelId("https://www.youtube.com/channel/UC_iuTdGAVYHMcUZm-srQcRw"));
            //            System.out.println(YoutubeChannel.getChannelId("https://www.youtube.com/user/trucnhanchannel"));
            //            System.out.println(YoutubeChannel.getChannelId("https://www.youtube.com/channel/UC_iuTdGAVYHMcUZm-srQcRw!fdsafasd").length());
        } catch (Exception ex) {

        }
    }

    private static boolean saveImageByUrl(String urlGet, String tags) throws IOException {
        boolean isMore = false;
        List<AppImageEnt> listAppImageEnt = new ArrayList<AppImageEnt>();
        JsonObject jsonObj = readJsonObjectFromUrl(urlGet);
        if (jsonObj != null) {
            JsonElement jsonElementErrorCode = jsonObj.get("ErrorCode");
            if (jsonElementErrorCode != null && jsonElementErrorCode.getAsInt() == 0) {
                String jsonElementLoadMore = jsonObj.get("LoadMore").getAsString();
                if (jsonElementLoadMore.equalsIgnoreCase("YES")) {
                    isMore = true;
                }

                JsonArray jsonElementData = jsonObj.get("DATA").getAsJsonArray();
                if (jsonElementData != null && jsonElementData.size() > 0) {
                    for (JsonElement tmpJson : jsonElementData) {
                        JsonObject jsonItem = tmpJson.getAsJsonObject();
                        listAppImageEnt.add(new AppImageEnt(jsonItem, tags, AppImageEnt.STATUS.ENABLE));
                    }
                }

                DatabaseServiceUtils.InsertAppImageEnt(listAppImageEnt);
            }
        }
        return isMore;
    }

    private static HashMap<String, JsonArray> readMapJsonFromUrl(List<String> listUrl) throws IOException {
        String listURLString = StringUtils.join(listUrl, ",");
        InputStream is = new URL("https://graph.facebook.com/comments/?ids=" + listURLString).openStream();
        HashMap<String, JsonArray> returnValue = new HashMap<String, JsonArray>();

        try {
            BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
            StringBuilder sb = new StringBuilder();
            int cp;
            while ((cp = rd.read()) != -1) {
                sb.append((char) cp);
            }
            String jsonText = sb.toString();
            if (jsonText != null && jsonText.length() > 0) {
                JsonParser jsonParser = new JsonParser();
                JsonObject jsonObj = jsonParser.parse(jsonText).getAsJsonObject();

                if (jsonObj != null) {
                    for (String tmpUrl : listUrl) {
                        JsonElement jsonElement = jsonObj.get(tmpUrl);
                        JsonArray json = null;
                        if (jsonElement != null) {
                            JsonElement jsonElementSub = jsonElement.getAsJsonObject().get("comments");
                            if (jsonElementSub != null)
                                json = jsonElementSub.getAsJsonObject().get("data").getAsJsonArray();
                        }

                        if (json != null)
                            returnValue.put(tmpUrl, json);
                    }
                }
            }
            return returnValue;
        } finally {
            is.close();
        }
    }

    private static JsonArray readJsonFromUrl(String url) throws IOException {
        InputStream is = new URL(url).openStream();
        JsonArray json = null;
        try {
            BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
            StringBuilder sb = new StringBuilder();
            int cp;
            while ((cp = rd.read()) != -1) {
                sb.append((char) cp);
            }
            String jsonText = sb.toString();
            if (jsonText != null && jsonText.length() > 0) {
                JsonParser jsonParser = new JsonParser();
                JsonObject jsonObj = jsonParser.parse(jsonText).getAsJsonObject();

                if (jsonObj != null) {
                    JsonElement jsonElement = jsonObj.get(url);
                    if (jsonElement != null) {
                        JsonElement jsonElementSub = jsonElement.getAsJsonObject().get("comments");
                        if (jsonElementSub != null)
                            json = jsonElementSub.getAsJsonObject().get("data").getAsJsonArray();
                    }
                }
            }
            return json;
        } finally {
            is.close();
        }
    }

    private static JsonObject readJsonObjectFromUrl(String url) throws IOException {
        InputStream is = new URL(url).openStream();
        JsonObject jsonObj = null;
        try {
            BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
            StringBuilder sb = new StringBuilder();
            int cp;
            while ((cp = rd.read()) != -1) {
                sb.append((char) cp);
            }
            String jsonText = sb.toString();
            if (jsonText != null && jsonText.length() > 0) {
                JsonParser jsonParser = new JsonParser();
                jsonObj = jsonParser.parse(jsonText).getAsJsonObject();

                if (jsonObj != null) {
                    return jsonObj;
                }
            }
            return jsonObj;
        } finally {
            is.close();
        }
    }
}