phonegraph.PhoneGraph.java Source code

Java tutorial

Introduction

Here is the source code for phonegraph.PhoneGraph.java

Source

package phonegraph;

/**
 *
 * @author linhong
 */

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import util.IntPair;
import util.StrUtil;

/**
 *
 * @author linhong
 */
public class PhoneGraph {
    HashMap<String, Integer> phonemaps;
    HashMap<Integer, HashSet<String>> phone2urls;
    HashMap<IntPair, Integer> edges;
    int phonenumber = 0;

    public PhoneGraph() {
    }

    public void AddtoMap(String phone, String uri) {
        if (this.phonemaps.containsKey(phone) == false) {
            this.phonemaps.put(phone, phonenumber);
            HashSet<String> urls = new HashSet<String>(10);
            urls.add(uri);
            phone2urls.put(phonenumber, urls);
            phonenumber++;
        } else {
            int id = this.phonemaps.get(phone);
            HashSet<String> urls = this.phone2urls.get(id);
            urls.add(uri);
            phone2urls.put(id, urls);
        }
    }

    public void runPhoneMap(String dirname) {
        ArrayList<String> filenames = StrUtil.Initfolder(dirname);
        System.out.println(filenames.size());
        phonemaps = new HashMap<String, Integer>(100000);
        phone2urls = new HashMap<Integer, HashSet<String>>(100000);
        for (int i = 0; i < filenames.size(); i++) {
            CreatedphoneMapGT(filenames.get(i));
        }
    }

    public void CreatedphoneOnly(String filename) {
        try {
            phonemaps = new HashMap<String, Integer>(400000);
            FileReader fileReader = new FileReader(filename);
            // Always wrap FileReader in BufferedReader.
            BufferedReader bufferedReader = new BufferedReader(fileReader);
            String line;
            int count = 0;
            bufferedReader.readLine();
            while ((line = bufferedReader.readLine()) != null) {
                Scanner sc = new Scanner(line);
                sc.useDelimiter("\t");
                Integer id = sc.nextInt();
                String phone = sc.next();
                phonemaps.put(phone, id);
                count++;
                //                if(count%100==0){
                //                    System.out.println(count);
                //                }
            }
            bufferedReader.close();
        } catch (IOException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public void CreatedphoneMapGT(String filename) {
        try {

            FileReader fileReader = new FileReader(filename);
            // Always wrap FileReader in BufferedReader.
            BufferedReader bufferedReader = new BufferedReader(fileReader);
            String line;
            int count = 0;
            while ((line = bufferedReader.readLine()) != null) {
                JSONObject documentInfo = (JSONObject) new JSONParser().parse(line);
                JSONArray phones = (JSONArray) documentInfo.get("high_precision_phone");
                //JSONObject rawdata = (JSONObject)documentInfo.get("crawl_data");
                String uri = (String) documentInfo.get("cdr_id");
                if (phones != null) {
                    for (int i = 0; i < phones.size(); i++) {
                        JSONObject o = (JSONObject) phones.get(i);
                        JSONArray a = (JSONArray) o.get("result");
                        for (int j = 0; j < a.size(); j++) {
                            JSONObject entity = (JSONObject) a.get(j);
                            String phone = (String) entity.get("value");
                            AddtoMap(phone, uri);
                        }
                    }

                }
                JSONArray emails = (JSONArray) documentInfo.get("high_precision_email");
                if (emails != null) {
                    for (int i = 0; i < emails.size(); i++) {
                        JSONObject o = (JSONObject) emails.get(i);
                        JSONArray a = (JSONArray) o.get("result");
                        for (int j = 0; j < a.size(); j++) {
                            JSONObject entity = (JSONObject) a.get(j);
                            String email = (String) entity.get("value");
                            AddtoMap(email, uri);
                        }
                    }
                }
                count++;
                //                if(count%100==0){
                //                    System.out.println(count);
                //                }
            }
            bufferedReader.close();
        } catch (IOException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ParseException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public void PrintResults(String ID) {
        FileWriter fstream1 = null;
        try {
            fstream1 = new FileWriter("phone_map" + ID + ".txt", false);
            BufferedWriter out = new BufferedWriter(fstream1);
            Iterator it = phonemaps.entrySet().iterator();
            out.write("PhoneID\tPhone\n");
            while (it.hasNext()) {
                Map.Entry pairs = (Map.Entry) it.next();
                out.write(pairs.getValue() + "\t" + pairs.getKey() + "\n");
            }
            out.close();
            fstream1 = new FileWriter("phone2url" + ID + ".txt", false);
            out = new BufferedWriter(fstream1);
            it = phone2urls.entrySet().iterator();
            out.write("PhoneID\turi_lists\n");
            while (it.hasNext()) {
                Map.Entry pairs = (Map.Entry) it.next();
                out.write(pairs.getKey().toString());
                HashSet<String> newset = (HashSet<String>) pairs.getValue();
                Iterator iterator = newset.iterator();
                while (iterator.hasNext()) {
                    out.write("\t" + iterator.next());
                }
                out.write("\n");
                //phone2urls.remove((Integer)pairs.getKey());
            }
            out.close();
            fstream1.close();
            phone2urls.clear();
        } catch (IOException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public void runGraph(String dirname, int k) {
        ArrayList<String> filenames = StrUtil.Initfolder(dirname);
        edges = new HashMap<IntPair, Integer>(100000);
        for (int i = 0; i < filenames.size(); i++) {
            CreateGraphGT(filenames.get(i), k);
        }
    }

    public void CreateGraphGT(String filename, int k) {
        try {
            FileReader fileReader = new FileReader(filename);
            // Always wrap FileReader in BufferedReader.
            BufferedReader bufferedReader = new BufferedReader(fileReader);
            String line;
            int count = 0;
            HashSet<Integer> nodes = new HashSet<Integer>(10000);
            while ((line = bufferedReader.readLine()) != null) {
                //ignore the prefix for phone and uri
                JSONObject documentInfo = (JSONObject) new JSONParser().parse(line);
                JSONArray phones = (JSONArray) documentInfo.get("high_precision_phone");
                //JSONObject rawdata = (JSONObject)documentInfo.get("crawl_data");
                if (phones != null) {
                    for (int i = 0; i < phones.size(); i++) {
                        JSONObject o = (JSONObject) phones.get(i);
                        JSONArray a = (JSONArray) o.get("result");
                        for (int j = 0; j < a.size(); j++) {
                            JSONObject entity = (JSONObject) a.get(j);
                            String phone = (String) entity.get("value");
                            if (phonemaps.containsKey(phone) == true) {
                                int id = phonemaps.get(phone);
                                nodes.add(id);
                            }
                        }
                    }

                }
                JSONArray emails = (JSONArray) documentInfo.get("high_precision_email");
                if (emails != null) {
                    for (int i = 0; i < emails.size(); i++) {
                        JSONObject o = (JSONObject) emails.get(i);
                        JSONArray a = (JSONArray) o.get("result");
                        for (int j = 0; j < a.size(); j++) {
                            JSONObject entity = (JSONObject) a.get(j);
                            String email = (String) entity.get("value");
                            if (phonemaps.containsKey(email) == true) {
                                int id = phonemaps.get(email);
                                nodes.add(id);
                            }
                        }
                    }
                }
                if (nodes.size() < k) {
                    Integer[] arr = nodes.toArray(new Integer[nodes.size()]);
                    for (int i = 0; i < arr.length; i++) {
                        for (int j = (i + 1); j < arr.length; j++) {
                            IntPair o = new IntPair(arr[i], arr[j]);
                            if (arr[i] > arr[j]) {
                                o.setKey(arr[j]);
                                o.setValue(arr[i]);
                            }
                            if (this.edges.containsKey(o) == false) {
                                this.edges.put(o, 1);
                            } else {
                                int freq = this.edges.get(o);
                                this.edges.put(o, freq + 1);
                            }
                        }
                    }
                }
                nodes.clear();
                count++;
                if (count % 100 == 0) {
                    System.out.println(count);
                }
            }
        } catch (IOException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        } catch (ParseException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public void WriteGraph(String ID) {
        FileWriter fstream1 = null;
        try {
            fstream1 = new FileWriter("phone_graph" + ID + ".txt", false);
            BufferedWriter out = new BufferedWriter(fstream1);
            Iterator it = edges.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry pairs = (Map.Entry) it.next();
                IntPair o = (IntPair) pairs.getKey();
                out.write(o.getsource() + "\t" + o.gettarget() + "\t" + pairs.getValue() + "\n");
            }
            out.close();
        } catch (IOException ex) {
            Logger.getLogger(PhoneGraph.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public static void main(String[] args) {
        if (args.length < 1) {
            System.out.println("Usage: [raw_json_file] [output_prefix_string] [k]");
            System.exit(3);
        }
        PhoneGraph mygraph = new PhoneGraph();
        mygraph.runPhoneMap(args[0]);
        mygraph.PrintResults(args[1]);
        //mygraph.CreatedphoneOnly(args[1]);
        System.out.println("finish reading results");
        int k = Integer.parseInt(args[2]);
        mygraph.runGraph(args[0], k);
        mygraph.WriteGraph(args[1]);
    }
}