mas.MAS.java Source code

Java tutorial

Introduction

Here is the source code for mas.MAS.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package mas;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 *
 * @author Aale
 */
public class MAS {

    public static String SEPERATOR = "\t";
    public static String NEWLINE = "\n";

    // social science   22 => communication 2
    // medicine  6   =>  Psychiatry & Psychology 22  ==> on palm, working 
    // social science   22   => political science 8 ==> local, working
    // social science   22   =>  sociology 10 ==> local , done
    // medicine  6   =>  neuroscience 14  ==> oak, working
    public static void main(String[] args) {

        String all_papers_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\papers_title_d2_sd18.txt";
        String top_paper_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\top_papers.txt";
        String out_file = "C:\\Users\\Aale\\Box Sync\\MotiProjects\\Universal-DB\\data\\citation\\top_papers_with_id.txt";

        filterPapersFromTopPaperList(all_papers_file, top_paper_file, out_file);
    }

    public static void main2(String[] args) {
        //        extractPaperCategory(0, 2, 18);
        //        extractPaperCategory(0, 6, 22);
        //        extractPaperCategory(0, 22, 8);
        //        extractPaperCategory(0, 22, 10);
        //        extractPaperCategory(0, 6, 14);

        //        extractPapers(1434300);
        //        extractPaperCategory(0, 22, 2);
        //        extractAffilition(0);
        //        extractAuthor(0);
        //        extractConference(0);
        //        extractJournal(0);
        //        extractPaper_Author(0);
        //        extractPaper_Category(4171300);
        int domain = 2;
        int subdomain = 18;
        final List<Integer> paperIds = getPapersIdsFromPaperCatFile(
                "data/papers_d" + domain + "_sd" + subdomain + ".csv");
        System.out.println("Paper#: " + paperIds.size());
        batchPapers(paperIds, "data/papers_title_d" + domain + "_sd" + subdomain + ".csv",
                "data/papers_title_d" + domain + "_sd" + subdomain + "_dump.json");
        //        
        //        batchPaperAuthors(paperIds, "data/paper_author_d" + domain + "_sd" + subdomain + ".csv", "data/paper_author_d" + domain + "_sd" + subdomain + "_dump.json");
        //        final SortedSet<Integer> authorIds = getAuthorIdsFromPaperAuthorFile("data/paper_author_d" + domain + "_sd" + subdomain + ".csv");
        //        System.out.println("Authors#: " + authorIds.size());
        //        batchAuthors(authorIds, "data/author_d" + domain + "_sd" + subdomain + ".csv", "data/author_d" + domain + "_sd" + subdomain + "_dump.json");
    }

    public static void filterPapersFromTopPaperList(String all_papers_file, String top_paper_file,
            String out_file) {
        final List<String> lines = IOUtils.readFileLineByLine(all_papers_file, false);
        Map<String, Integer> papers = new HashMap<>();
        for (String l : lines) {
            String[] split = l.split("\\t");
            Integer id = Integer.parseInt(split[0]);
            String title = split[1];
            papers.put(title, id);
        }
        System.out.println("papers map size = " + papers.size());
        final List<String> lines2 = IOUtils.readFileLineByLine(top_paper_file, false);
        StringBuilder stb = new StringBuilder();
        for (String l : lines2) {
            String[] split = l.split("\\t");
            String title = split[0];
            title = title.substring(0, title.length() - 7);
            System.out.println("title = " + title);
            Integer id = papers.get(title);
            if (id != null) {
                System.out.println("find title = " + title + " " + id);
                stb.append(id).append("\t").append(title).append("\n");
            }
        }
        IOUtils.writeDataIntoFile(stb.toString(), out_file);
    }

    public static void batchAuthors(SortedSet<Integer> author_ids, String csvFile, String jsonFile) {
        int start = 0;
        int limit = 200;
        String str = "";
        for (Integer id : author_ids) {
            if (start == author_ids.size()) {
                break;
            }
            System.out.println("process author: " + id);
            str += "ID%20eq%20" + id + "%20or%20";
            if (start % limit == 0) {
                str = "(" + str.substring(0, str.length() - 8) + ")";
                getAuthors(0, str, csvFile, jsonFile);
                System.out.println("URL: " + str);
                str = "";
            }
            start++;
        }
    }

    public static void batchPapers(List<Integer> paper_ids, String csvFile, String jsonFile) {
        int start = 0;
        int limit = 200;
        String str = "";
        while (true) {
            if (start == paper_ids.size()) {
                break;
            }
            //            System.out.println("process paper: " + paper_ids.get(start));
            str += "ID%20eq%20" + paper_ids.get(start) + "%20or%20";
            if (start % limit == 0) {
                System.out.println("process papers: " + start);
                str = "(" + str.substring(0, str.length() - 8) + ")";
                getPapers(0, str, csvFile, jsonFile);
                System.out.println("URL: " + str);
                str = "";
            }
            start++;
        }
    }

    public static void batchPaperAuthors(List<Integer> paper_ids, String csvFile, String jsonFile) {
        int start = 0;
        int limit = 200;
        String str = "";
        while (true) {
            if (start == paper_ids.size()) {
                break;
            }
            System.out.println("process paper: " + paper_ids.get(start));
            str += "PaperID%20eq%20" + paper_ids.get(start) + "%20or%20";
            if (start % limit == 0) {
                str = "(" + str.substring(0, str.length() - 8) + ")";
                getPaperAuthors(0, str, csvFile, jsonFile);
                System.out.println("URL: " + str);
                str = "";
            }
            start++;
        }
    }

    public static void getAuthors(int start, String authors_filter, String csvFile, String jsonFile) {
        String csv_file_path = csvFile;
        String json_dump_file_path = jsonFile;
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Author?$select=ID,Name,NativeName,AffiliationID&$filter="
                + authors_filter + "&$format=json";
        //        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Author?$select=PaperID,SeqID,AuthorID,Name,AffiliationID&$filter=" + papers_filter + "&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String name = normalized((String) paperObj.get("Name"));
                        String nativeName = normalized((String) paperObj.get("NativeName"));
                        //                        String affiliation = normalized((String) paperObj.get("Affiliation"));
                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        //                        Long version = (Long) paperObj.get("Version");
                        csv_str.append(id).append(SEPERATOR).append(name).append(SEPERATOR).append(nativeName)
                                .append(SEPERATOR).append(affiliationID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(400L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void getPaperAuthors(int start, String papers_filter, String csvFile, String jsonFile) {
        String csv_file_path = csvFile;
        String json_dump_file_path = jsonFile;
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Author?$select=PaperID,SeqID,AuthorID,Name,AffiliationID&$filter="
                + papers_filter + "&$format=json";
        //        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Ref?$select=SrcID,DstID,SeqID&$filter=" + papers_filter + "&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long paperID = (Long) paperObj.get("PaperID");
                        Long seqID = (Long) paperObj.get("SeqID");
                        Long authorID = (Long) paperObj.get("AuthorID");
                        String name = normalized((String) paperObj.get("Name"));
                        //                        String affiliation = normalized((String) paperObj.get("Affiliation"));
                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        csv_str.append(paperID).append(SEPERATOR).append(seqID).append(SEPERATOR).append(authorID)
                                .append(SEPERATOR).append(name).append(SEPERATOR).append(affiliationID)
                                .append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(5000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS_VLDB.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static String paper_last = "data/paper.last";

    private static List<Integer> getPapersIdsFromPaperCatFile(String file) {
        final List<String> lines = IOUtils.readFileLineByLine(file, false);
        List<Integer> paper_ids = new ArrayList<>();
        boolean added = true;
        for (String l : lines) {
            String[] split = l.split("\\t");
            if (split.length > 0) {
                int id = Integer.parseInt(split[0]);
                //                if (id == 4972689) {
                //                    added = true;
                //                }
                if (added) {
                    paper_ids.add(id);
                }
            }
        }
        return paper_ids;
    }

    private static SortedSet<Integer> getAuthorIdsFromPaperAuthorFile(String file) {
        final List<String> lines = IOUtils.readFileLineByLine(file, false);
        SortedSet<Integer> author_ids = new TreeSet<>();
        boolean added = true;
        for (String l : lines) {
            String[] split = l.split("\\t");
            if (split.length > 3) {
                int id = Integer.parseInt(split[2]);
                //                if (id == 4972689) {
                //                    added = true;
                //                }
                if (added) {
                    author_ids.add(id);
                }
            }
        }
        return author_ids;
    }

    public static void extractPaperCategory(int start, int domain, int subdomain) {
        String csv_file_path = "data/papers_d" + domain + "_sd" + subdomain + ".csv";
        String json_dump_file_path = "data/papers_dump_d" + domain + "_sd" + subdomain + ".json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Category?$filter=DomainID%20eq%20"
                + domain + "%20and%20SubDomainID%20eq%20" + subdomain + "&$format=json";
        //        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper?$select=ID,DocType,Title,Year,ConfID,JourID&$filter=Year%20gt%202001&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long cPaperID = (Long) paperObj.get("CPaperID");
                        Long domainID = (Long) paperObj.get("DomainID");
                        Long subDomainID = (Long) paperObj.get("SubDomainID");
                        csv_str.append(cPaperID).append(SEPERATOR).append(domainID).append(SEPERATOR)
                                .append(subDomainID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(400L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void extractPapers(int start) {
        String csv_file_path = "data/papers_" + start + ".csv";
        String json_dump_file_path = "data/papers_dump_" + start + ".json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper?$select=ID,DocType,Title,Year,ConfID,JourID&$filter=Year%20gt%202001&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long docType = (Long) paperObj.get("DocType");
                        Long year = (Long) paperObj.get("Year");
                        Long jourID = (Long) paperObj.get("JourID");
                        Long confID = (Long) paperObj.get("ConfID");
                        Long id = (Long) paperObj.get("ID");
                        String title = (String) paperObj.get("Title");
                        title = normalized(title);
                        csv_str.append(id).append(SEPERATOR).append(docType).append(SEPERATOR).append(year)
                                .append(SEPERATOR).append(jourID).append(SEPERATOR).append(confID).append(SEPERATOR)
                                .append(title).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(400L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void getPapers(int start, String papers_filter, String csvFile, String jsonFile) {
        String csv_file_path = csvFile;
        String json_dump_file_path = jsonFile;
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper?$select=ID,Title&$filter="
                + papers_filter + "&$format=json";
        while (true) {
            IOUtils.writeDataIntoFile(start + "", paper_last, false);
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                //                System.out.println("json=" + json);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(1000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        //                        Long docType = (Long) paperObj.get("DocType");
                        //                        Long year = (Long) paperObj.get("Year");
                        //                        Long jourID = (Long) paperObj.get("JourID");
                        //                        Long confID = (Long) paperObj.get("ConfID");
                        Long id = (Long) paperObj.get("ID");
                        String title = (String) paperObj.get("Title");
                        title = normalized(title);
                        csv_str.append(id).append(SEPERATOR).append(title).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(400L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            } catch (InterruptedException ex) {
                System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
                start += 100;
                try {
                    Thread.sleep(10000L);
                } catch (InterruptedException ex1) {
                    Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex1);
                }

            }
        }
    }

    public static void extractAffilition(int start) {
        String file_prefix = "affilitions";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Affiliation?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Affilition: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String officialName = normalized((String) paperObj.get("OfficialName"));
                        String displayName = normalized((String) paperObj.get("DisplayName"));
                        String nativeName = normalized((String) paperObj.get("NativeName"));
                        Long parentID = (Long) paperObj.get("ParentID");
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        Long type = (Long) paperObj.get("Type");
                        csv_str.append(id).append(SEPERATOR).append(officialName).append(SEPERATOR)
                                .append(displayName).append(SEPERATOR).append(nativeName).append(SEPERATOR)
                                .append(parentID).append(SEPERATOR).append(homepage).append(SEPERATOR)
                                .append(shortName).append(SEPERATOR).append(type).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractAuthor(int start) {
        String file_prefix = "authors";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Author?$select=ID,Name,NativeName,Affiliation,AffiliationID,Version";
        url += "&$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Author: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String name = normalized((String) paperObj.get("Name"));
                        String nativeName = normalized((String) paperObj.get("NativeName"));
                        String affiliation = normalized((String) paperObj.get("Affiliation"));
                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        Long version = (Long) paperObj.get("Version");
                        csv_str.append(id).append(SEPERATOR).append(name).append(SEPERATOR).append(nativeName)
                                .append(SEPERATOR).append(affiliation).append(SEPERATOR).append(affiliationID)
                                .append(SEPERATOR).append(version).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractConference(int start) {
        String file_prefix = "conferences";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Conference?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Conference: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        String fullName = normalized((String) paperObj.get("FullName"));
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        csv_str.append(id).append(SEPERATOR).append(shortName).append(SEPERATOR).append(fullName)
                                .append(SEPERATOR).append(homepage).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractJournal(int start) {
        String file_prefix = "journals";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Journal?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Journals: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long id = (Long) paperObj.get("ID");
                        String shortName = normalized((String) paperObj.get("ShortName"));
                        String fullName = normalized((String) paperObj.get("FullName"));
                        String homepage = normalized((String) paperObj.get("Homepage"));
                        csv_str.append(id).append(SEPERATOR).append(shortName).append(SEPERATOR).append(fullName)
                                .append(SEPERATOR).append(homepage).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractPaper_Author(int start) {
        String file_prefix = "paper_authors";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Author?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper_Author: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long paperID = (Long) paperObj.get("PaperID");

                        Long seqID = (Long) paperObj.get("SeqID");

                        Long authorID = (Long) paperObj.get("authorID");
                        String name = normalized((String) paperObj.get("Name"));
                        String affiliation = normalized((String) paperObj.get("Affiliation"));

                        Long affiliationID = (Long) paperObj.get("AffiliationID");
                        csv_str.append(paperID).append(SEPERATOR).append(seqID).append(SEPERATOR).append(authorID)
                                .append(SEPERATOR).append(name).append(SEPERATOR).append(affiliation)
                                .append(SEPERATOR).append(affiliationID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static void extractPaper_Category(int start) {
        String file_prefix = "paper_categories";
        String csv_file_path = "data/" + file_prefix + ".csv";
        String json_dump_file_path = "data/" + file_prefix + "_dump.json";
        String url = "https://api.datamarket.azure.com/MRC/MicrosoftAcademic/v2/Paper_Category?";
        url += "$format=json";
        while (true) {
            try {
                StringBuilder csv_str = new StringBuilder();
                final String json = getData2(url, start);
                if (json == null) {
                    System.out.println("json is null. skip. old start=" + start);
                    start += 100;
                    Thread.sleep(5000L);
                    continue;
                }
                JSONParser parser = new JSONParser();
                JSONObject jsonObj = (JSONObject) parser.parse(json);
                final JSONObject dObj = (JSONObject) jsonObj.get("d");
                final JSONArray results = (JSONArray) dObj.get("results");
                if (results.size() == 0) {
                    System.out.println("results is Empty, break.");
                    break;
                } else {
                    System.out.println("Paper_Category: start = " + start + " results# = " + results.size());
                    for (Object paper : results) {
                        JSONObject paperObj = (JSONObject) paper;
                        Long cPaperID = (Long) paperObj.get("CPaperID");

                        Long domainID = (Long) paperObj.get("DomainID");

                        Long subDomainID = (Long) paperObj.get("SubDomainID");
                        csv_str.append(cPaperID).append(SEPERATOR).append(domainID).append(SEPERATOR)
                                .append(subDomainID).append(NEWLINE);
                    }
                    IOUtils.writeDataIntoFile(json + "\n", json_dump_file_path);
                    IOUtils.writeDataIntoFile(csv_str.toString(), csv_file_path);
                    start += 100;
                    Thread.sleep(300L);
                }
                //                System.out.println("json= " + jsonObj);
            } catch (ParseException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            } catch (InterruptedException ex) {
                Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    }

    public static String getData2(String url_org, int start) {

        try {
            String complete_url = url_org + "&$skip=" + start;
            //            String url_str = generateURL(url_org, prop);
            Document doc = Jsoup.connect(complete_url).timeout(25000).ignoreContentType(true).get();
            return doc.text();
        } catch (IOException ex) {
            System.out.println(ex.getMessage() + " Cause: " + ex.getCause());
            Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    /**
     * @param args the command line arguments
     */
    public static String getData_old(String url_org, int start) {
        try {
            String complete_url = url_org + "&$skip=" + start;
            //            String url_str = generateURL(url_org, prop);
            URL url = new URL(complete_url);
            URLConnection yc = url.openConnection();
            yc.setConnectTimeout(25 * 1000);
            yc.setReadTimeout(25 * 1000);
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));
            String inputLine;
            StringBuffer result = new StringBuffer();
            while ((inputLine = in.readLine()) != null) {
                //                System.out.println(inputLine);
                result.append(inputLine);
            }
            in.close();
            return result.toString();
        } catch (MalformedURLException ex) {
            Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
        }
        return null;
    }

    /**
     * @param args the command line arguments
     */
    // for old version
    public static void getData(int startIdx, int endIdx) {
        try {
            Properties prop = new Properties();
            prop.setProperty("AppId", "1df63064-efad-4bbd-a797-1131499b7728");
            prop.setProperty("ResultObjects", "publication");
            prop.setProperty("DomainID", "22");
            prop.setProperty("SubDomainID", "2");
            prop.setProperty("YearStart", "2001");
            prop.setProperty("YearEnd", "2010");
            prop.setProperty("StartIdx", startIdx + "");
            prop.setProperty("EndIdx", endIdx + "");

            String url_org = "http://academic.research.microsoft.com/json.svc/search";
            String url_str = generateURL(url_org, prop);
            URL url = new URL(url_str);
            URLConnection yc = url.openConnection();
            BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream()));
            String inputLine;
            while ((inputLine = in.readLine()) != null) {
                System.out.println(inputLine);
            }
            in.close();
        } catch (MalformedURLException ex) {
            Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(MAS.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private static String generateURL(String url_org, Properties prop) {
        String url = url_org + "?";
        for (Object k : prop.keySet()) {
            String key = (String) k;
            String val = prop.getProperty(key);
            url = url + key + "=" + val + "&";
        }
        return url.substring(0, url.length() - 1);
    }

    private static String normalized(String str) {
        if (str == null || str.length() == 0) {
            return "";
        }
        return str.replaceAll("\\s", " ").replaceAll("\\n", " ");
    }

}