trustframework.utils.Similarity.java Source code

Java tutorial

Introduction

Here is the source code for trustframework.utils.Similarity.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package trustframework.utils;

import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;

/**
 *
 * @author Guilherme
 */
public class Similarity {

    /**
     * Calculates cosine similarity between two frequencies
     * @param <T> Hash key class
     * @param freq1 frequency 1
     * @param freq2 frequency 2
     * @return cosine similarity [0,1]
     */
    public static <T> double cosineSimilarity(Map<T, Integer> freq1, Map<T, Integer> freq2) {
        Set<T> terms = new HashSet<>();
        terms.addAll(freq1.keySet());
        terms.addAll(freq2.keySet());
        RealVector v1 = buildRealVector(freq1, terms);
        RealVector v2 = buildRealVector(freq2, terms);
        return (v1.dotProduct(v2)) / (v1.getNorm() * v2.getNorm());
    }

    /**
     * Build a real vector from frequencies
     * @param <T> Type of hask keys
     * @param freq hash with frequencies
     * @param keys set of all keys available
     * @return 
     */
    private static <T> RealVector buildRealVector(Map<T, Integer> freq, Set<T> keys) {
        RealVector vector = new ArrayRealVector(keys.size());
        int i = 0;
        for (T term : keys) {
            int value = freq.containsKey(term) ? freq.get(term) : 0;
            vector.setEntry(i++, value);
        }
        vector = vector.mapDivide(vector.getL1Norm());
        return vector;
    }

    /**
     * Calculates jaccard similarity between two collections
     * @param <T> Type of collection elements
     * @param c1 collection 1
     * @param c2 collection 2
     * @return jaccard similarity [0,1]
     */
    public static <T> double jaccardSimilarity(Collection<T> c1, Collection<T> c2) {
        Set<T> intersection = new HashSet<>(c1);
        intersection.retainAll(c2);
        Set<T> union = new HashSet<>(c1);
        union.addAll(c2);
        return (double) intersection.size() / (double) union.size();
    }
}