freebase.api.FreebaseAPI.java Source code

Java tutorial

Introduction

Here is the source code for freebase.api.FreebaseAPI.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package freebase.api;

/**
 *
 * @author Aale
 */
import freebase.api.entity.movie.Actor;
import freebase.api.entity.movie.Directedby;
import freebase.api.entity.movie.Director;
import freebase.api.entity.movie.Film;
import freebase.api.entity.movie.Starring;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

public class FreebaseAPI {

    public static String current_cursor = "";
    public static Sequence IDs = new Sequence();
    public static Sequence directedbyIDs = new Sequence();

    static Map<String, Integer> actor_map = new HashMap<>();
    static Map<String, Integer> film_map = new HashMap<>();
    static Map<String, Integer> director_map = new HashMap<>();
    static Map<String, Integer> character_map = new HashMap<>();
    static Map<String, Integer> starring_map = new HashMap<>();

    public static void main(String[] args) {
        //        removeDuplicates(CHARACTER_FILE);
        //        removeDuplicates(ACTOR_FILE);
        //        removeDuplicates(FILM_FILE);
        //        removeDuplicates(DIRECTOR_FILE);
        //        removeDuplicates(STARRING_FILE);

        //        FetchingFromGoogleAPI();
        //        getTopKActors(20);
        filterByTopActors();
        //        System.out.println(getTopKActors(100));
    }

    public static Set<Integer> filterBy(String inputfile, Set<Integer> keys, Integer column, String outputfile) {
        return filterBy(inputfile, keys, column, outputfile, null);

    }

    public static Set<Integer> filterBy(String inputfile, Set<Integer> keys, Integer column, String outputfile,
            Integer output_column) {
        final List<String> input_list = Utils.readFileLineByLine(inputfile, true);
        StringBuilder output_str = new StringBuilder();
        Set<Integer> output_set = new HashSet();
        for (String line : input_list) {
            String[] split = line.split("\\t");
            Integer key = Integer.parseInt(split[column]);
            if (keys.contains(key)) {
                output_str.append(line).append(NEWLINE);
                if (output_column != null) {
                    output_set.add(Integer.parseInt(split[output_column]));
                }
            }
        }
        Utils.writeDataIntoFile(output_str.toString(), outputfile);
        if (output_column != null) {
            return output_set;
        } else {
            return null;
        }
    }

    public static void filterByTopActors() {
        String suffix = ".1";
        final Set<Integer> topActors = getTopKActors(3);
        final Set<Integer> selected_films = filterBy(ACTOR_FILM_FILE, topActors, 0, ACTOR_FILM_FILE + suffix, 1);
        final Set<Integer> selected_characters = filterBy(ACTOR_CHARACTER_FILE, topActors, 0,
                ACTOR_CHARACTER_FILE + suffix, 1);
        final Set<Integer> selected_starring = filterBy(ACTOR_STARRING_FILE, topActors, 0,
                ACTOR_STARRING_FILE + suffix, 1);
        final Set<Integer> selected_director = filterBy(FILM_DIRECTOR_FILE, selected_films, 0,
                FILM_DIRECTOR_FILE + suffix, 1);
        final Set<Integer> selected_directedby = filterBy(FILM_DIRECTEDBY_FILE, selected_films, 0,
                FILM_DIRECTEDBY_FILE + suffix, 1);

        System.out.println("Actor# " + topActors.size());
        System.out.println("Films# " + selected_films.size());
        System.out.println("Characters# " + selected_characters.size());
        System.out.println("Starring# " + selected_starring.size());
        System.out.println("Directors# " + selected_director.size());
        System.out.println("Directedby# " + selected_directedby.size());
        filterBy(DIRECTOR_DIRECTEDBY_FILE, selected_director, 0, DIRECTOR_DIRECTEDBY_FILE + suffix);
        filterBy(CHARACTER_STARRING_FILE, selected_characters, 0, CHARACTER_STARRING_FILE + suffix);
        filterBy(CHARACTER_FILM_FILE, selected_characters, 0, CHARACTER_FILM_FILE + suffix);
        filterBy(FILM_STARRING_FILE, selected_films, 0, FILM_STARRING_FILE + suffix);

        filterBy(ACTOR_FILE, topActors, 0, ACTOR_FILE + suffix);
        filterBy(FILM_FILE, selected_films, 0, FILM_FILE + suffix);
        filterBy(CHARACTER_FILE, selected_characters, 0, CHARACTER_FILE + suffix);
        filterBy(STARRING_FILE, selected_starring, 0, STARRING_FILE + suffix);
        filterBy(DIRECTOR_FILE, selected_director, 0, DIRECTOR_FILE + suffix);
        filterBy(DIRECTEDBY_FILE, selected_directedby, 0, DIRECTEDBY_FILE + suffix);

    }

    public static Set<Integer> getTopKActors(int minFilmPerActor) {
        final List<String> edges = Utils.readFileLineByLine(ACTOR_FILM_FILE, true);
        Map<String, Integer> film_per_actor_counts = new HashMap();
        for (String line : edges) {
            String split[] = line.split("\\t");
            String actor = split[0];
            String film = split[1];
            Integer film_count = film_per_actor_counts.get(actor);
            if (film_count == null) {
                film_per_actor_counts.put(actor, 1);
            } else {
                film_per_actor_counts.put(actor, film_count + 1);
            }
        }
        Set<Integer> top_actors = new HashSet<>();
        for (String actor : film_per_actor_counts.keySet()) {
            if (film_per_actor_counts.get(actor) >= minFilmPerActor) {
                top_actors.add(Integer.parseInt(actor));
            }
        }
        System.out.println("Top-Actors# :" + top_actors.size());
        return top_actors;
    }

    public static List<Film> encodeJSON(JSONArray results) {
        List<Film> films = new ArrayList<>();
        for (Object flmObj : results) {
            JSONObject flmJObj = (JSONObject) flmObj;
            Film film = new Film(getId((String) flmJObj.get("mid"), film_map), (String) flmJObj.get("mid"),
                    (String) flmJObj.get("name"));

            JSONArray directed_by_arr = (JSONArray) flmJObj.get("directed_by");
            JSONObject directed_by = (JSONObject) directed_by_arr.get(0);
            Directedby directedby = new Directedby();
            directedby.setId(directedbyIDs.next());

            Director director = new Director(getId((String) directed_by.get("mid"), director_map),
                    (String) directed_by.get("mid"), (String) directed_by.get("name"));
            directedby.setDirector(director);
            film.setDirectedBy(directedby);

            JSONArray starrings = (JSONArray) flmJObj.get("starring");
            for (Object starringObj : starrings) {
                JSONObject starringJObj = (JSONObject) starringObj;
                Starring starring = new Starring();
                starring.setMid((String) starringJObj.get("mid"));
                starring.setId(getId((String) starringJObj.get("mid"), starring_map));

                JSONArray actorJObj_arr = (JSONArray) starringJObj.get("actor");
                JSONObject actorJObj = (JSONObject) actorJObj_arr.get(0);
                Actor actor = new Actor(getId((String) actorJObj.get("mid"), actor_map),
                        (String) actorJObj.get("mid"), (String) actorJObj.get("name"));
                starring.setActor(actor);

                JSONArray characterJObj_arr = (JSONArray) starringJObj.get("character");
                JSONObject characterJObj = (JSONObject) characterJObj_arr.get(0);
                freebase.api.entity.movie.FCharacter character = new freebase.api.entity.movie.FCharacter(
                        getId((String) characterJObj.get("mid"), character_map), (String) characterJObj.get("mid"),
                        (String) characterJObj.get("name"));
                starring.setCharacter(character);

                film.addStarring(starring);
            }

            films.add(film);
        }
        return films;
    }

    public static List getFilms(String fromDate, String toDate) {
        JSONArray results = FreebaseHelper.getJSON(fromDate, toDate);
        List<Film> films = FreebaseAPI.encodeJSON(results);
        return films;
    }

    final static String ROOT_DIR = "C:/freebase-dataset/";

    static String JSON_DUMP_FILE = ROOT_DIR + "dump_json.txt";
    //node files
    static String CHARACTER_FILE = ROOT_DIR + "character.txt";
    static String DIRECTOR_FILE = ROOT_DIR + "director.txt";
    static String FILM_FILE = ROOT_DIR + "film.txt";
    static String ACTOR_FILE = ROOT_DIR + "actor.txt";
    static String STARRING_FILE = ROOT_DIR + "starring.txt";
    static String DIRECTEDBY_FILE = ROOT_DIR + "directedby.txt";

    //edge files
    static String FILM_STARRING_FILE = ROOT_DIR + "film_starring.txt";
    static String ACTOR_STARRING_FILE = ROOT_DIR + "actor_starring.txt";
    static String CHARACTER_STARRING_FILE = ROOT_DIR + "character_starring.txt";
    static String FILM_DIRECTEDBY_FILE = ROOT_DIR + "film_directedby.txt";
    static String DIRECTOR_DIRECTEDBY_FILE = ROOT_DIR + "director_directedby.txt";

    //edge files - for imdb
    static String ACTOR_CHARACTER_FILE = ROOT_DIR + "actor_character.txt";
    static String ACTOR_FILM_FILE = ROOT_DIR + "actor_film.txt";
    static String CHARACTER_FILM_FILE = ROOT_DIR + "character_film.txt";
    static String FILM_DIRECTOR_FILE = ROOT_DIR + "film_director.txt";

    static String SEPERATOR = "\t";
    static String NEWLINE = "\n";

    public static void removeDuplicates(String filepath) {
        final List<String> file = Utils.readFileLineByLine(filepath, true);
        final List<String> file_all = Utils.readFileLineByLine(filepath + ".all", true);
        Set<String> mids = new HashSet<>();
        StringBuilder fileout_str = new StringBuilder();
        StringBuilder fileout_all_str = new StringBuilder();
        int duplicateCounts = 0;
        System.out.println("Removing duplicates from " + filepath);
        for (int i = 0; i < file_all.size(); i++) {
            String line = file_all.get(i);
            String[] split = line.split("\\t");
            String mid = split[1];
            //            System.out.println("mid=" + mid);
            if (!mids.contains(mid)) {
                fileout_str.append(file.get(i)).append(NEWLINE);
                fileout_all_str.append(line).append(NEWLINE);
                mids.add(mid);
            } else {
                duplicateCounts++;
            }
        }
        System.out.println("Writing to file...");
        Utils.writeDataIntoFile(fileout_str.toString(), filepath + ".2");
        Utils.writeDataIntoFile(fileout_all_str.toString(), filepath + ".all.2");

        System.out.println("Duplicates# :" + duplicateCounts);
    }

    private static void writeToFiles(List<Film> films) {
        StringBuilder film_str_all = new StringBuilder();
        StringBuilder film_str = new StringBuilder();
        StringBuilder directedby_str = new StringBuilder();
        StringBuilder film_directedby_str = new StringBuilder();
        StringBuilder film_director_str = new StringBuilder();
        StringBuilder director_str_all = new StringBuilder();
        StringBuilder director_str = new StringBuilder();
        StringBuilder director_directedby_str = new StringBuilder();
        StringBuilder starring_str_all = new StringBuilder();
        StringBuilder starring_str = new StringBuilder();
        StringBuilder film_starring_str = new StringBuilder();
        StringBuilder actor_str_all = new StringBuilder();
        StringBuilder actor_str = new StringBuilder();
        StringBuilder actor_starring_str = new StringBuilder();
        StringBuilder character_str_all = new StringBuilder();
        StringBuilder character_str = new StringBuilder();
        StringBuilder character_starring_str = new StringBuilder();
        StringBuilder character_film_str = new StringBuilder();
        StringBuilder actor_character_str = new StringBuilder();
        StringBuilder actor_film_str = new StringBuilder();

        for (Film film : films) {
            System.out.println("Writing film = " + film.getId() + " , " + film.getMid() + " , " + film.getName()
                    + " , starring# = " + film.getStarrings().size());
            //film
            film_str_all.append(film.getId()).append(SEPERATOR).append(film.getMid()).append(SEPERATOR)
                    .append(film.getName()).append(NEWLINE);
            film_str.append(film.getId()).append(SEPERATOR).append(film.getName()).append(NEWLINE);

            //directedby
            directedby_str.append(film.getDirectedBy().getId()).append(NEWLINE);

            //film-directedby
            film_directedby_str.append(film.getId()).append(SEPERATOR).append(film.getDirectedBy().getId())
                    .append(NEWLINE);

            //film-director
            film_director_str.append(film.getId()).append(SEPERATOR)
                    .append(film.getDirectedBy().getDirector().getId()).append(NEWLINE);

            //director
            director_str_all.append(film.getDirectedBy().getDirector().getId()).append(SEPERATOR)
                    .append(film.getDirectedBy().getDirector().getMid()).append(SEPERATOR)
                    .append(film.getDirectedBy().getDirector().getName()).append(NEWLINE);
            director_str.append(film.getDirectedBy().getDirector().getId()).append(SEPERATOR)
                    .append(film.getDirectedBy().getDirector().getName()).append(NEWLINE);

            //director-directedby
            director_directedby_str.append(film.getDirectedBy().getDirector().getId()).append(SEPERATOR)
                    .append(film.getDirectedBy().getId()).append(NEWLINE);

            for (Starring starring : film.getStarrings()) {
                //starring
                starring_str_all.append(starring.getId()).append(SEPERATOR).append(starring.getMid())
                        .append(NEWLINE);
                starring_str.append(starring.getId()).append(NEWLINE);

                //film-starring
                film_starring_str.append(film.getId()).append(SEPERATOR).append(starring.getId()).append(NEWLINE);

                //actor
                actor_str_all.append(starring.getActor().getId()).append(SEPERATOR)
                        .append(starring.getActor().getMid()).append(SEPERATOR)
                        .append(starring.getActor().getName()).append(NEWLINE);
                actor_str.append(starring.getActor().getId()).append(SEPERATOR)
                        .append(starring.getActor().getName()).append(NEWLINE);

                //actor-starring
                actor_starring_str.append(starring.getActor().getId()).append(SEPERATOR).append(starring.getId())
                        .append(NEWLINE);

                //character
                character_str_all.append(starring.getCharacter().getId()).append(SEPERATOR)
                        .append(starring.getCharacter().getMid()).append(SEPERATOR)
                        .append(starring.getCharacter().getName()).append(NEWLINE);
                character_str.append(starring.getCharacter().getId()).append(SEPERATOR)
                        .append(starring.getCharacter().getName()).append(NEWLINE);

                //character-starring
                character_starring_str.append(starring.getCharacter().getId()).append(SEPERATOR)
                        .append(starring.getId()).append(NEWLINE);

                //character-film
                character_film_str.append(starring.getCharacter().getId()).append(SEPERATOR).append(film.getId())
                        .append(NEWLINE);

                //actor-character
                actor_character_str.append(starring.getActor().getId()).append(SEPERATOR)
                        .append(starring.getCharacter().getId()).append(NEWLINE);

                //actor-film
                actor_film_str.append(starring.getActor().getId()).append(SEPERATOR).append(film.getId())
                        .append(NEWLINE);

            }
        }
        Utils.writeDataIntoFile(film_str_all.toString(), FILM_FILE + ".all");
        Utils.writeDataIntoFile(film_str.toString(), FILM_FILE);
        Utils.writeDataIntoFile(directedby_str.toString(), DIRECTEDBY_FILE);
        Utils.writeDataIntoFile(film_directedby_str.toString(), FILM_DIRECTEDBY_FILE);
        Utils.writeDataIntoFile(film_director_str.toString(), FILM_DIRECTOR_FILE);
        Utils.writeDataIntoFile(director_str_all.toString(), DIRECTOR_FILE + ".all");
        Utils.writeDataIntoFile(director_str.toString(), DIRECTOR_FILE);
        Utils.writeDataIntoFile(director_directedby_str.toString(), DIRECTOR_DIRECTEDBY_FILE);
        Utils.writeDataIntoFile(starring_str_all.toString(), STARRING_FILE + ".all");
        Utils.writeDataIntoFile(starring_str.toString(), STARRING_FILE);
        Utils.writeDataIntoFile(film_starring_str.toString(), FILM_STARRING_FILE);
        Utils.writeDataIntoFile(actor_str_all.toString(), ACTOR_FILE + ".all");
        Utils.writeDataIntoFile(actor_str.toString(), ACTOR_FILE);
        Utils.writeDataIntoFile(actor_starring_str.toString(), ACTOR_STARRING_FILE);
        Utils.writeDataIntoFile(character_str_all.toString(), CHARACTER_FILE + ".all");
        Utils.writeDataIntoFile(character_str.toString(), CHARACTER_FILE);
        Utils.writeDataIntoFile(character_starring_str.toString(), CHARACTER_STARRING_FILE);

        Utils.writeDataIntoFile(character_film_str.toString(), CHARACTER_FILM_FILE);
        Utils.writeDataIntoFile(actor_character_str.toString(), ACTOR_CHARACTER_FILE);
        Utils.writeDataIntoFile(actor_film_str.toString(), ACTOR_FILM_FILE);

    }

    private static int getId(String mid, Map<String, Integer> map) {
        Integer id = map.get(mid);
        if (id == null) {
            Integer new_id = IDs.next();
            map.put(mid, new_id);
            return new_id;
        } else {
            return id;
        }
    }

    private static boolean isNew(String mid, Map<String, Integer> map) {
        Integer id = map.get(mid);
        if (id == null) {
            return true;
        } else {
            return false;
        }
    }

    private static Set<Integer> getFilmsBy(Set<Integer> topActors) {
        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
    }

    public static void FetchingFromGoogleAPI() {

        int max_step = 1;
        int step = 0;

        int month_step = 1;
        Calendar last_date = Calendar.getInstance();
        last_date.set(Calendar.YEAR, 2016);
        last_date.set(Calendar.MONTH, 0);
        last_date.set(Calendar.DAY_OF_MONTH, 1);

        Calendar from_date = Calendar.getInstance();
        from_date.set(Calendar.YEAR, 2010);
        from_date.set(Calendar.MONTH, 0);
        from_date.set(Calendar.DAY_OF_MONTH, 1);

        Calendar to_date = (Calendar) from_date.clone();
        to_date.add(Calendar.MONTH, month_step);
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

        while (true) {
            //            if (step > max_step) {
            //                break;
            //            }
            //            System.out.println(">>> Cursor = " + current_cursor);
            //            if (/*films == null || films.isEmpty() ||*/step == max_step) {
            //                System.out.println("Films is null or zero size. Break.");
            //                break;
            //            }
            if (from_date.after(last_date)) {
                System.out.println("Date is after last date");
                break;
            }
            String fromDate = sdf.format(from_date.getTime());
            String toDate = sdf.format(to_date.getTime());
            System.out.println(step + " ) fetch films from " + fromDate + " to " + toDate);
            step++;
            List<Film> films = getFilms(fromDate, toDate);
            System.out.println("Films#: " + films.size());
            writeToFiles(films);
            //            for (Film f : films) {
            //                System.out.println(f);
            //            }
            from_date.add(Calendar.MONTH, month_step);
            to_date.add(Calendar.MONTH, month_step);

        }
    }
}