icevaluation.GetWikiURL.java Source code

Java tutorial

Introduction

Here is the source code for icevaluation.GetWikiURL.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package icevaluation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import org.apache.commons.codec.binary.Base64;

/**
 *
 * @author Masud
 */
public class GetWikiURL {
    private HashMap<String, Integer> keyMap;
    private int totalkey = 0;

    /**
     * @param args the command line arguments
     */
    public GetWikiURL() {
        keyMap = new HashMap<>();

    }

    public static void main(String[] args) {
        // TODO code application logic here
        GetWikiURL getWiki = new GetWikiURL();
        // getWiki.processHierarchyData();
        //System.out.println("Query: "+getWiki.formatQuery("Top/Sports/Cricket/Ball/"));
        getWiki.processHierarchyData();
    }

    public String formatQuery(String directoryString) {
        String[] catparts = directoryString.trim().split(" ");
        String[] formatedQ = catparts[0].trim().split("/");
        String query = "";
        for (int i = 1; i < formatedQ.length; i++) {
            query = query + " " + formatedQ[i];
        }
        return query.trim();
    }

    public void processHierarchyData() {
        loadkeyInfo();
        System.out.println("Total Key: " + totalkey);
        //String queryOriginal = "sports game";
        //String queryCover = "jack raihanee";
        BufferedReader reader = null;
        FileWriter fw = null;
        try {
            File file = new File("C:/Development/NetbeanProjects/data/dmoz_Category.txt");
            reader = new BufferedReader(new FileReader(file));

            fw = new FileWriter("C:/Development/NetbeanProjects/data/DMOZ_Wiki_URLs/dmozcat_wikiurl.txt", true);
            String query = null;
            String line = null;
            int count_category = 0;
            while ((line = reader.readLine()) != null) {//original query
                //System.out.println("Original: "+line);
                try {
                    query = formatQuery(line);
                    query = query + " " + "site:wikipedia.org";
                    String bingKey = getValidBestKey();
                    String search_result = getWikiResults(query, bingKey);
                    String wikiurl = processURL(search_result);
                    String[] catparts = line.trim().split(" ");
                    String write_cat = "<category id = " + count_category + " >" + catparts[0] + "</category>";
                    String write_url = "<url id = " + count_category + " >" + wikiurl + "</url>";
                    fw.write(write_cat + "\n" + write_url + "\n");
                    count_category++;
                    if (count_category % 100 == 0) {
                        System.out.println("Completed Categories: " + count_category);
                    }

                } catch (Exception err) {
                    System.out.println("Some function not worked well .... Jump to next line!");
                    err.printStackTrace();
                }

            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                reader.close();
                fw.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            writeKeyInfoBack();
            System.out.println("Key status write succesfully");
        }
    }

    public String processURL(String searchResults) {
        String firstWikiURL = null;
        try {

            String[] parts = searchResults.split("\"");
            int httpCount = 0;
            for (int i = 0; i < parts.length; i++) {
                if (parts[i].trim().startsWith("https:")) {
                    httpCount++;
                    if (httpCount == 4) {
                        firstWikiURL = parts[i].trim();
                        // System.out.println("First URL:"+firstWikiURL);
                    }
                    //System.out.println("Line: "+i+": "+parts[i]);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return firstWikiURL;
    }

    public String getWikiResults(String searchText, String bingAPIKey) {
        String search_results = null;
        //update key use by incrementing key use
        Integer n = keyMap.get(bingAPIKey);
        if (n == null) {
            n = 1;
        } else {
            n = n + 1;
        }
        keyMap.put(bingAPIKey, n);

        searchText = searchText.replaceAll(" ", "%20");
        String accountKey = bingAPIKey;

        byte[] accountKeyBytes = Base64.encodeBase64((accountKey + ":" + accountKey).getBytes());
        String accountKeyEnc = new String(accountKeyBytes);
        URL url;
        try {
            url = new URL("https://api.datamarket.azure.com/Bing/Search/v1/Composite?Sources=%27Web%27&Query=%27"
                    + searchText + "%27&$format=JSON");
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setRequestMethod("GET");
            conn.setRequestProperty("Authorization", "Basic " + accountKeyEnc);
            //conn.addRequestProperty(accountKeyEnc, "Mozilla/4.76"); 
            BufferedReader br = new BufferedReader(new InputStreamReader((conn.getInputStream())));
            StringBuilder sb = new StringBuilder();
            String output = null;

            //System.out.println("Output from Server .... \n");
            //write json to string sb
            if ((output = br.readLine()) != null) {
                search_results = output;
            }

            conn.disconnect();
        } catch (MalformedURLException e1) {
            e1.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return search_results;

    }

    public void loadkeyInfo() {
        BufferedReader reader = null;
        try {
            File file = new File("data/apiKeySettings.txt");
            reader = new BufferedReader(new FileReader(file));
            String line;
            while ((line = reader.readLine()) != null) {
                System.out.println(line);
                String[] linerParts = line.split(" ");
                keyMap.put(linerParts[0], Integer.parseInt(linerParts[1]));
                totalkey++;
            }

        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

    private void writeKeyInfoBack() {
        try {
            File file = new File("data/apiKeySettings.txt");
            // if file doesnt exists, then create it
            if (!file.exists()) {
                file.createNewFile();
            }
            FileWriter fw = new FileWriter(file.getAbsoluteFile());
            BufferedWriter bw = new BufferedWriter(fw);

            for (String key : keyMap.keySet()) {
                bw.write(key + " " + keyMap.get(key) + "\n");
            }
            bw.close();
            System.out.println("Done");

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public String getValidBestKey() {
        String validKey = "";
        for (String key : keyMap.keySet()) {
            if (keyMap.get(key) < 4900) {
                validKey = key;
            }
        }
        if (validKey.isEmpty()) {
            System.out.println("All keys are out of limit, Please add more new keys!");
        }
        return validKey;
    }

}