gr.iit.demokritos.cru.cps.ai.ComputationalCreativityMetrics.java Source code

Java tutorial

Introduction

Here is the source code for gr.iit.demokritos.cru.cps.ai.ComputationalCreativityMetrics.java

Source

/*
 * Copyright (C) 2015 Computational Systems & Human Mind Research Unit
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package gr.iit.demokritos.cru.cps.ai;

import edu.uci.ics.jung.graph.DirectedSparseGraph;
import edu.uci.ics.jung.graph.Graph;
import edu.uci.ics.jung.graph.util.EdgeType;
import gr.iit.demokritos.cru.cps.ai.InfoSummarization;
import gr.iit.demokritos.cru.cps.ai.KeyphraseClustering;
import gr.iit.demokritos.cru.cps.utilities.wordnet.WNAccess;
import gr.iit.demokritos.cru.cps.utilities.wordnet.WNDE;
import gr.iit.demokritos.cru.cps.utilities.wordnet.WNEL;
import gr.iit.demokritos.cru.cps.utilities.wordnet.WordNetENDistance;
import gr.iit.demokritos.cru.cps.Metric;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import static java.lang.Math.abs;
import static java.lang.Math.abs;
import static java.lang.Math.abs;
import static java.lang.Math.abs;
import static java.lang.Math.abs;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.ListIterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import static org.apache.commons.lang3.StringUtils.getLevenshteinDistance;
import weka.core.Instances;

/**
 *
 * @author Giorgos Panagopoulos, Pythagoras Karampiperis, Antonis Koukourikos
 */
public class ComputationalCreativityMetrics {

    private Instances data;
    private String language;
    private WNAccess wn;
    private WNDE wnde;
    private WNEL wnel;
    private InfoSummarization inf;
    private Set<String> stop;
    private Set<String> off;
    private Class stemCLass;

    public ComputationalCreativityMetrics(String language, WNAccess wn, WNDE wnde, WNEL wnel, InfoSummarization inf,
            Set<String> stop, Set<String> off) throws ClassNotFoundException {
        this.language = language;

        this.wn = wn;
        this.wnde = wnde;
        this.wnel = wnel;
        this.inf = inf;
        this.stop = stop;
        this.off = off;

        this.stemCLass = Class.forName("gr.iit.demokritos.cru.cps.utilities.snowball.ext.englishStemmer");

        if (language.equalsIgnoreCase("de")) {
            this.stemCLass = Class.forName("gr.iit.demokritos.cru.cps.utilities.snowball.ext.dutchStemmer");
        }

    }

    public WNDE getWnde() {
        return wnde;
    }

    public void setWnde(WNDE wnde) {
        this.wnde = wnde;
    }

    public WNEL getWnel() {
        return wnel;
    }

    public void setWnel(WNEL wnel) {
        this.wnel = wnel;
    }

    public ArrayList<Metric> ComputationalCreativityMetricsCalculator(String NewStory, ArrayList<String> stories,
            String type) throws ClassNotFoundException, IllegalAccessException, Exception {

        //ArrayList<String> stories = new ArrayList<String>();//get stories
        stories.add(NewStory);
        if (type.equalsIgnoreCase("semantic")) {
            Map<String, String> com = ComputeRar_Eff(stories);
            Map<String, Double> nov = Novelty(stories);
            Map<String, Double> sur = new HashMap<String, Double>();
            String delims = "[\\.\\s;?!():\"]+";
            for (String s : stories) {
                String h = s;
                String[] e = h.split(delims);
                ArrayList<String> fragments = new ArrayList<String>();
                if (e.length > 30) { //surprise is calculated for every  30 words sentence
                    while (e.length > 30) {//30
                        String temp1 = h.substring(0, h.indexOf(e[29]));//the sentence before 30
                        String temp2 = h.substring(h.indexOf(e[29]));//the sentence after
                        String[] g = temp2.split("[\\.?;!]+", 2);//keep the words before the setence ended
                        fragments.add(temp1 + g[0]);//the fragment
                        if (g.length < 2) {
                            break;
                        }
                        e = g[1].split(delims);//the next fragments
                        h = g[1];
                    }

                    double surp = Surprise("", fragments);
                    sur.put(s, surp);
                } else {
                    //if the story has less then thirty words it has NO surprise
                    sur.put(s, 0.0);
                }
            }
            ArrayList<Metric> values = new ArrayList<Metric>();
            values.add(new Metric("Novelty", nov.get(NewStory)));
            values.add(new Metric("Surprise", sur.get(NewStory)));
            String impr = com.get(NewStory);

            values.add(new Metric("Rarity", Double.parseDouble(impr.split(":::")[0])));
            values.add(new Metric("Recreational", Double.parseDouble(impr.split(":::")[1])));
            return values;
        } else {
            System.out.println("name not defined");
            return null;
        }
    }

    //compute rarity and recreational effort as defined in the paper

    public Map<String, String> ComputeRar_Eff(ArrayList<String> stories)
            throws ClassNotFoundException, InstantiationException, IllegalAccessException, Exception {
        ArrayList<Double> WeiOfClust = new ArrayList();
        ArrayList<Double> NoOfClust = new ArrayList();
        for (int i = 0; i < stories.size(); i++) {
            // System.out.println("new story " + stories.get(i));
            //take the top terms of story
            HashMap<ArrayList<String>, Double> top = inf.TopTerms(stories.get(i).replace("---", " "), true);
            //  if (top.isEmpty()) {
            //      WeiOfClust.add(0.0);
            //  }
            ArrayList<String> terms = new ArrayList<String>();
            for (ArrayList<String> stems : top.keySet()) {
                //if it is in compact form , there is only one term for each stem 
                for (int j = 0; j < stems.size(); j++) {
                    //terms and the stem's tf
                    //stems.get(i)= foo {fooing,fooed ...}
                    terms.add(stems.get(j).split("\\{")[0] + ";" + top.get(stems));
                    //System.out.println("term "+stems.get(j).split("\\{")[0]);////////////////////////////////////todo
                    // System.out.println(terms.get(j));
                }
            }
            ArrayList<String> clusters = new ArrayList<String>();
            //find the term clusters for this story
            if (terms.size() > 1) {
                KeyphraseClustering kl = new KeyphraseClustering(terms, 0, this.language, this.wn, this.wnde,
                        this.wnel);
                clusters = kl.getClusters();
            }

            //for every cluster
            double min = 10000;
            double maxclosure = 0.0;
            //System.out.println(stories.get(i));
            for (String s : clusters) {
                String p = s.replace(";", " ");
                //count the maximum possible closure the cluster may have, which is (#of words in cluster -1)*1.0, meaning the max possible distances
                maxclosure += (s.split(";").length - 1) * 1.0;
                if (!p.equalsIgnoreCase("")) {
                    //get the minclosure of the terms' graph
                    //double wei = 0.0;
                    // System.out.println(k.replace(";", ""));
                    //the clusters that have only one word have a closure of 0
                    double wei = MinClosure(p, stories.get(i));
                    //we keep the minimum minweight closure of the graph
                    if (wei < min && wei != 0) { ///todo update the online with wei!=0
                        min = wei;
                    }
                }
            }
            //System.out.println(clusters.size()+" "+min);
            if (min != 0.0 && min != 10000) {
                WeiOfClust.add(min);
            } else {
                WeiOfClust.add(maxclosure);
            }
            //the number of clusters the story had
            NoOfClust.add((double) clusters.size());
            //  System.out.println(stories.get(i) + " " + WeiOfClust.get(i) + " " + NoOfClust.get(i));
        }
        ArrayList<Double> rar = Rar_Eff(WeiOfClust);
        //we make the NoOfClust arraylist <double> to use it with the Metric method
        ArrayList<Double> recr = Rar_Eff(NoOfClust);
        //map each story with its rarity and its recreational value
        Map<String, String> raef = new HashMap<String, String>();
        for (int i = 0; i < stories.size(); i++) {
            //put in the respective story its rarity+reacreational effort /
            //story tallies to the respective number
            raef.put(stories.get(i), rar.get(i) + ":::" + recr.get(i)); //(rar.get(i) + recr.get(i)) / 2);
            //System.out.println(stories.get(i) + rar.get(i) + recr.get(i));
        }
        return raef;
    }

    //compute the formula of rarity and recreational effort with the same method
    public ArrayList<Double> Rar_Eff(ArrayList<Double> SomethingOfClust) {
        double max = Collections.max(SomethingOfClust);
        ArrayList<Double> score = new ArrayList<Double>();
        //the formula is 2*#Clusters/max(#Clusters) 
        for (int i = 0; i < SomethingOfClust.size(); i++) {
            if (max == 0) {
                score.add(0.0);
            } else {
                score.add(2.0 * SomethingOfClust.get(i) / max);
            }
        }
        return score;
    }

    //measuring the creativity points of a new phrase given old phrases
    public Double CreativityPoints(String new_phrase, ArrayList<String> phrases)
            throws ClassNotFoundException, InstantiationException, IllegalAccessException {
        double creat = 0.0;
        // InfoSummarization inf = new InfoSummarization(language);
        // if (this.language.equalsIgnoreCase("en")) {
        HashMap<String, Double> dict = new HashMap<String, Double>();
        for (String s : phrases) {
            //get the minclosure of the graph of those terms
            dict.put(s, MinClosure(s, ""));
            // System.out.println(s + " " + dict.get(s));
        }
        //get the minclosure of the graph of this term
        Double neo = MinClosure(new_phrase, "");
        //for each time the neo closure is bigger than one of the olds, creat++
        for (Double val : dict.values()) {
            if (neo > val) {
                creat++;
            }
        }
        //laize creat to 10
        creat = creat * 10 / phrases.size();
        //  }
        return creat;
    }

    //the closure of the graph of words based on our distance measure
    public double MinClosure(String phrase, String story) {
        double closure = 0.0;
        //in case minclosue is not called by ComputeRar_Eff
        if (story.equalsIgnoreCase("")) {
            story = phrase;
        }
        //hashmap of the terms and their index
        HashMap<String, Double> termIndex = new HashMap<String, Double>();
        //take the top terms of the phrase by their stems tf
        // HashMap<ArrayList<String>, Double> termsTf = inf.TopTerms(story.toLowerCase(), true);

        for (String s : phrase.split(" ")) {
            termIndex.put(s, 1.0 * story.indexOf(s));
        }

        //sort the hashamp (descending) and traverse it reversely, to start from the first word in the phrase
        LinkedHashMap<String, Double> sorted = inf.sortHashMapByValues(termIndex);
        ListIterator iter = new ArrayList(sorted.keySet()).listIterator(sorted.size());

        HashMap<String, Double> graph = new HashMap<String, Double>();
        //store the first word in the phrase, in order to be found in the first iteration
        graph.put(sorted.keySet().toArray()[sorted.keySet().size() - 1].toString(), 0.0);
        //for each word that comes next in the phrase
        while (iter.hasPrevious()) {
            String s = iter.previous().toString();
            //find the shortest distance from it to the root (first word)
            double min = 1.0;
            //looking through every word that has already defined its min distance to the root
            for (String k : graph.keySet()) {
                double dist = getDistance(s, k); //+ graph.get(k);
                if (dist < min) {
                    min = dist;
                }
            }
            graph.put(s, min);
            //keep the overal sum of weights of the edges
            closure += min;
        }
        return closure;

    }

    //distance of two words as defined in the paper
    public double getDistance(String s1, String s2) {
        double sem = 0.0;
        if (this.language.equalsIgnoreCase("en")) {
            sem = 0.75 * wn.getDistance(s1, s2);
        } else if (this.language.equalsIgnoreCase("de")) {
            sem = 0.75 * wnde.getDistance(s1, s2);
        } else if (this.language.equalsIgnoreCase("el")) {
            sem = 0.75 * wnel.getDistance(s1, s2);
        }
        double lev = 0.25 * getLevenshteinDistance(s1, s2) / ((s1.length() + s2.length()) / 2);
        //System.out.println(w1 + "-" + w2 + ": " + d);
        return sem + lev;
    }

    //novelty of a set of stories as defined in the paper
    public Map<String, Double> Novelty(ArrayList<String> stories)
            throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
        String AllStories = "";
        Map<String, Double> novelties = new HashMap<String, Double>();
        //count average sem distance for every story
        Map<String, Double> stoped = new HashMap<String, Double>();
        for (int i = 0; i < stories.size(); i++) {
            HashMap<ArrayList<String>, Double> top = inf.TopTerms(stories.get(i).replace("---", " "), false);
            if (top.isEmpty()) {
                stoped.put(stories.get(i), -1.0);
            }
            //cumpute the average semantic distance of the top terms in this story
            Set<String> terms = new HashSet<String>();
            for (ArrayList<String> stems : top.keySet()) {
                //if it is in compact form , there is only one term for each stem 
                for (int j = 0; j < stems.size(); j++) {
                    terms.add(stems.get(j));
                }
            }
            // System.out.println("terms" + terms);
            double nov = AvgSemDist(terms);
            novelties.put(stories.get(i), nov);
            AllStories += "  " + stories.get(i);
        }
        //count avg semantic distance for all the stories together
        HashMap<ArrayList<String>, Double> bigtop = inf.TopTerms(AllStories, false);
        Set<String> termsAll = new HashSet<String>();
        for (ArrayList<String> stems : bigtop.keySet()) {
            for (int j = 0; j < stems.size(); j++) {
                termsAll.add(stems.get(j));
            }
        }
        //the novelty of all stories combined
        double novBig = AvgSemDist(termsAll);
        for (Map.Entry<String, Double> nov : novelties.entrySet()) {
            double newNov = 2 * abs(nov.getValue() - novBig);// / bigtop.size();
            novelties.put(nov.getKey(), newNov);
        }
        novelties.putAll(stoped);
        return novelties;
    }

    //suprise of a story as defined in the paper
    public double Surprise(String new_frag, ArrayList<String> frags)
            throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
        //split the story in fragments (sentences)
        if (frags.isEmpty()) {
            return 0.0;
        }
        double dist = 0;
        //calculate AvgSemDist for the first fragment
        HashMap<ArrayList<String>, Double> top = inf.TopTerms(frags.get(0), false);
        double older = 0.0;
        Set<String> terms = new HashSet<String>();
        if (!top.isEmpty()) {
            //cumpute the average semantic distance of this fragment
            for (ArrayList<String> stems : top.keySet()) {
                //if it is in compact form , there is only one term for each stem
                for (int j = 0; j < stems.size(); j++) {
                    terms.add(stems.get(j));
                }
            }
            older = AvgSemDist(terms);
        }
        double newer = 0.0;
        //put the new story as the last fragment
        if (new_frag.length() > 0) {
            frags.add(new_frag);
        }
        //frags.remove(0);//fragment 0 is already calculated
        for (int i = 1; i < frags.size(); i++) {
            //calculate AvgSemDist for every fragment
            top = inf.TopTerms(frags.get(i), false);
            if (top.isEmpty()) {
                //if the framgent has only stopwords, it has 0.0 avg sem distance
                //newer = 0.0;
                //if the fragment had only stopoff words, step it
                continue;
            } else {
                terms = new HashSet<String>();
                //cumpute the average semantic distance of this story
                for (ArrayList<String> stems : top.keySet()) {
                    //if it is in compact form , there is only one term for each stem
                    for (int j = 0; j < stems.size(); j++) {
                        terms.add(stems.get(j));
                    }
                }
                newer = AvgSemDist(terms);
                //System.out.println(newer);
            }
            //and abstract it with the previous
            dist += abs(older - newer);// / top.size();
            //the new becomes the older to be abstracted with the next fragment
            older = newer;
        }
        //calculate the final formula
        double sur = 0.0;
        if (dist != 0) {
            sur = dist * 2 / (frags.size() - 1);
        }
        return sur;

    }

    //average semantic distance between a set of strings as defined in the paper
    public double AvgSemDist(Set<String> top) {
        float dist = 0;
        int i = 0;
        ArrayList<String> examined = new ArrayList<String>();
        if (language.equalsIgnoreCase("el")) {
            for (String key : top) {
                for (String other : top) {
                    if (!key.equalsIgnoreCase(other)) {
                        if (!key.equalsIgnoreCase("") && !other.equalsIgnoreCase("") && !examined.contains(other)) {
                            dist += wnel.getDistance(key, other);
                            i++;
                        }
                        // System.out.println(key + " " + other + " " + wnel.getDistance(key, other));
                    }
                }
                examined.add(key);
            }
        } else if (language.equalsIgnoreCase("de")) {
            for (String key : top) {
                for (String other : top) {
                    // System.out.println(key+" "+other+" "+pos+" ");
                    if (!key.equalsIgnoreCase(other) && !examined.contains(other)) {
                        dist += wnde.getDistance(key, other);
                        i++;
                    }
                }
                examined.add(key);
            }
        } else {
            for (String key : top) {
                String pos = wn.getCommonPos(key);
                for (String other : top) {
                    if (!key.equalsIgnoreCase(other) && pos != null && !examined.contains(other)) {
                        dist += wn.getDistance(key, other);
                        //System.out.println(dist+" "+top.size());
                        i++;
                    }
                }
                //System.out.println("examined "+key);
                examined.add(key);
            }
            //System.out.println(i);
        }
        //get the distance of one term to every other term in the document
        if (dist == 0) {
            return 0;
        }
        return dist / i;
    }

}