com.cotrino.langnet.GenerateVisualization.java Source code

Java tutorial

Introduction

Here is the source code for com.cotrino.langnet.GenerateVisualization.java

Source

/*************************************************************************************
 * Copyright (C) 2009-2015 Jos Miguel Cotrino. All rights reserved.
 *
 * This software may be distributed and modified under the terms of the GNU
 * General Public License version 2 (GPL2) as published by the Free Software
 * Foundation and appearing in the file LICENSE included in the packaging of
 * this file. Please note that GPL2 Section 2[b] requires that all works based
 * on this software must also be made publicly available under the terms of
 * the GPL2 ("Copyleft").
 *
 * Contact information
 * -------------------
 *
 * Jos Miguel Cotrino
 * Web : http://www.cotrino.com/
 *
 ************************************************************************************/
package com.cotrino.langnet;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.StringJoiner;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cotrino.langnet.io.IOUtil;

/**
 * 
 * @author <a href="http://www.cotrino.com/">Jos Miguel Cotrino</a>
 *
 */
public class GenerateVisualization {

    final static Logger logger = LoggerFactory.getLogger(GenerateVisualization.class);

    private final static double MIN_SIMILARITY = 0.30;
    private CSVFormat csvFormat = CSVFormat.DEFAULT.withDelimiter(';').withIgnoreEmptyLines(true).withHeader()
            .withQuote('#');
    private HashMap<String, List<LanguageSimilarity>> languageSimilarities;
    private HashMap<String, Integer> amountWordsPerLanguage;

    public GenerateVisualization() throws IOException {

        generateSummary(Constants.RESULTS_FOLDER, Constants.SUMMARY_FILE);

        generateLanguages(Constants.SUMMARY_FILE, Constants.LANGUAGES_FILE);

        getLanguages(Constants.LANGUAGES_FILE);

        generateVisualization(Constants.VISUALIZATION_FILE);

    }

    private void generateSummary(String path, String summaryFile) {

        languageSimilarities = new HashMap<String, List<LanguageSimilarity>>();

        String content = "LanguageA;LanguageB;Similarity;ExecutedComparisons;IdenticalWords;VerySimilarWords;\n";
        File directory = new File(path);
        File[] logFiles = directory.listFiles();
        for (File logFile : logFiles) {
            if (logFile.getName().contains(".log")) {
                try {
                    //logger.debug("Including "+logFile.getName());
                    String[] parts = logFile.getName().split("[\\_\\.]");
                    String langA = parts[1];
                    String langB = parts[2];
                    double[] similarity = calculateSimilarity(logFile);
                    content += langA + ";" + langB + ";" + similarity[0] + ";" + (int) similarity[1] + ";0;0;\n";

                    if (similarity[0] >= MIN_SIMILARITY) {
                        if (!languageSimilarities.containsKey(langA)) {
                            languageSimilarities.put(langA, new LinkedList<LanguageSimilarity>());
                        }
                        if (!languageSimilarities.containsKey(langB)) {
                            languageSimilarities.put(langB, new LinkedList<LanguageSimilarity>());
                        }
                        languageSimilarities.get(langA).add(new LanguageSimilarity(langB, similarity[0]));
                        languageSimilarities.get(langB).add(new LanguageSimilarity(langA, similarity[0]));
                    }

                } catch (IOException e) {
                    logger.error("At " + logFile.getName() + ", error " + e.getMessage());
                }
            }
        }
        IOUtil.write(summaryFile, content);

    }

    private double[] calculateSimilarity(File logFile) throws IOException {

        int i = 0;
        double total = 0.0;
        Reader reader = new FileReader(logFile);
        CSVParser parser = new CSVParser(reader, csvFormat);
        for (CSVRecord record : parser) {
            try {
                double similarity = Double.parseDouble(record.get("Similarity"));
                total += similarity;
                i++;
            } catch (NumberFormatException e) {
                logger.error("At " + logFile.getName() + ", failed line: " + record.toString());
            }
        }
        parser.close();
        reader.close();
        return new double[] { (total / i), i };

    }

    private void generateLanguages(String summaryFile, String languagesFile) throws IOException {

        HashMap<String, Integer> list = new HashMap<String, Integer>();
        Reader reader = new FileReader(summaryFile);
        CSVParser parser = new CSVParser(reader, csvFormat);
        for (CSVRecord record : parser) {
            String languageA = record.get("LanguageA");
            String languageB = record.get("LanguageB");
            int words = Integer.parseInt(record.get("ExecutedComparisons"));
            list.put(languageA, Math.max(words, list.getOrDefault(languageA, 0)));
            list.put(languageB, Math.max(words, list.getOrDefault(languageB, 0)));
        }
        parser.close();
        reader.close();

        String content = "Language;Words;Family;\n";
        for (String language : list.keySet()) {
            content += language + ";" + list.get(language) + ";Romance;\n";
        }
        IOUtil.write(languagesFile, content);
    }

    private void getLanguages(String file) throws IOException {

        amountWordsPerLanguage = new HashMap<String, Integer>();

        Reader reader = new FileReader(file);
        CSVParser parser = new CSVParser(reader, csvFormat);
        for (CSVRecord record : parser) {
            String language = record.get("Language");
            int words = Integer.parseInt(record.get("Words"));
            amountWordsPerLanguage.put(language, words);
        }
        parser.close();
        reader.close();

    }

    /**
     * based on http://bl.ocks.org/1377729
     * @param jsFile
     */
    private void generateVisualization(String jsFile) {

        String content = "";
        int i = 0;
        double max = 0.0, min = 100.0;
        StringJoiner sj = new StringJoiner(",");
        StringJoiner nodesHash = new StringJoiner(" ");
        StringJoiner linkList = new StringJoiner(",");
        HashMap<String, Integer> languageIds = new HashMap<String, Integer>();

        for (String language : amountWordsPerLanguage.keySet()) {

            // language bubble color & size
            int red = (int) (Math.random() * 127.0);
            int green = (int) (Math.random() * 127.0);
            int blue = (int) (Math.random() * 127.0);
            int color = ((red + 128) << 16) | ((green + 128) << 8) | ((blue + 128));
            int textcolor = ((red + 64) << 16) | ((green + 64) << 8) | ((blue + 64));
            int size = amountWordsPerLanguage.get(language) / 2000;
            if (size < 5) {
                size = 5;
            }

            // language information
            String description = "Language: " + getWikiURL(language) + "<br/><br/>";
            if (languageSimilarities.containsKey(language)) {
                List<LanguageSimilarity> similarLanguages = languageSimilarities.get(language);
                Collections.sort(similarLanguages);
                description += "Similar to:<ul>";
                // look for most similar languages
                for (LanguageSimilarity languageB : similarLanguages) {
                    description += "<li>" + getWikiURL(languageB.language) + " at "
                            + String.format("%d", (int) (languageB.similarity * 100)) + "%</li>";
                    max = Math.max(max, languageB.similarity);
                    min = Math.min(min, languageB.similarity);
                }
                description += "</ul>";
            } else {
                description += "No similar languages found.";
            }

            sj.add("{ label : \"" + language + "\", " + "id : " + i + ", " + "color : \"#"
                    + String.format("%6x", color) + "\", " + "textcolor : \"#" + String.format("%6x", textcolor)
                    + "\", " + "size : " + size + ", desc : \"" + description + "\" }");

            nodesHash.add("nodesHash[\"" + language + "\"] = " + i + ";");
            languageIds.put(language, i);
            i++;
        }

        content += "var nodesArray = [\n" + sj.toString() + "\n];\n\n";

        content += "var nodesHash = [];\n" + nodesHash.toString() + "\n\n";

        for (String languageA : languageSimilarities.keySet()) {
            for (LanguageSimilarity languageB : languageSimilarities.get(languageA)) {
                int color = (int) (240 * (1.0 + MIN_SIMILARITY - languageB.similarity));
                if (color > 240) {
                    color = 240;
                }
                if (languageIds.containsKey(languageA) && languageIds.containsKey(languageB.language)) {
                    linkList.add("{ desc : \"" + languageA + " -- " + languageB.language + "\", " + "source : "
                            + languageIds.get(languageA) + ", " + "target : " + languageIds.get(languageB.language)
                            + ", " + "weight : " + languageB.similarity + ", " + "color : \"#"
                            + String.format("%02x%02xff", color, color) + "\" }");
                }
            }
        }
        content += "var linksArray = [\n" + linkList.toString() + "\n];\n";

        IOUtil.write(jsFile, content);

    }

    /*
    private void cleanName
    {
       my $name = shift;
       #$name =~ s/\s//igs;
       $name =~ s/-/ /igs;
       while($name =~ m/(.+[^ ]+)([A-Z])(.+)/) {
     $name = "$1 $2$3";
       }
       return $name;
    }
        
    */

    private String getWikiURL(String language) {
        String url = "http://en.wikipedia.org/wiki/" + language + "_language";
        url.replaceAll(" ", "_");
        return "<a href='" + url + "' target='_blank'>" + language + "</a>";
    }

    class LanguageSimilarity implements Comparable<LanguageSimilarity> {

        String language;
        double similarity;

        public LanguageSimilarity(String language, double similarity) {
            this.language = language;
            this.similarity = similarity;
        }

        @Override
        public int compareTo(LanguageSimilarity o) {
            if (this.similarity > o.similarity) {
                return -1;
            } else if (this.similarity < o.similarity) {
                return 1;
            } else {
                return 0;
            }
        }

    }

    /**
     * @param args
     * @throws IOException 
     */
    public static void main(String[] args) throws IOException {

        long start = System.currentTimeMillis();

        // generate visualization
        new GenerateVisualization();

        long end = System.currentTimeMillis();
        double duration = ((double) (end - start)) / 1000.0;
        logger.debug(String.format("%.2f seconds = %.2f minutes = %.2f hours", duration, duration / 60.0,
                duration / 3600.0));

    }

}