Java examples for java.lang:Math Vector
Computes the similarity of the two vectors.
/** A collection of mathematical utility functions. * <p>//from w w w . j ava 2 s . c o m * Copyright (c) 2008 Eric Eaton * <p> * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * <p> * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * <p> * You should have received a copy of the GNU General Public License * along with this program. If not, see http://www.gnu.org/licenses/. * * @author Eric Eaton (EricEaton@umbc.edu) <br> * University of Maryland Baltimore County * * @version 0.1 * */ import java.util.Arrays; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Random; public class Main{ final static double LOG2 = Math.log(2); /** * Computes the similarity of the two vectors. * @param targetV the target vector that forms the basis for comparison. * @param v the vector for comparison * @param similarityMetric the metric to use for computing the similarity * @return The similarity of the two models. */ public static double computeSimilarity(int[] targetV, int[] v, SimilarityMetric similarityMetric) { switch (similarityMetric) { case CORRELATION: return MathUtils.correlation(targetV, v); case MUTUAL_INFORMATION: return MathUtils.mutualInformation(targetV, v); case ACCURACY: return MathUtils.pairwiseAgreement(targetV, v); } return Double.NaN; } /** Computes the correlation between two arrays of the same length, p and q. * Computes the correlation between p and q as * r = (|p| * \sum_i(p[i]*q[i]) - \sum_i(p[i]) * \sum_i(q[i]))/ * sqrt((|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2) * * (|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2)) * This correlation can be tested for statistical significance via t-tests. * See e.g.: http://www.socialresearchmethods.net/kb/statcorr.htm * @return The correlation between the elements of the two arrays. */ public static double correlation(int[] p, int[] q) { if (p == null || q == null) { throw new IllegalArgumentException("p and q cannot be null"); } if (p.length != q.length) { throw new IllegalArgumentException( "p and q must be the same length"); } // compute the sums and squared sums int sumP = 0; int sumQ = 0; int sumPSquared = 0; int sumQSquared = 0; int sumPQ = 0; for (int i = 0; i < p.length; i++) { sumP += p[i]; sumQ += q[i]; sumPSquared += p[i] * p[i]; sumQSquared += q[i] * q[i]; sumPQ += p[i] * q[i]; } // compute the correlation double r = ((double) (p.length * sumPQ - sumP * sumQ)) / Math.sqrt(((long) (p.length * sumPSquared - sumP * sumP)) * ((long) (p.length * sumQSquared - sumQ * sumQ))); return r; } /** Computes the correlation between two arrays of the same length, p and q. * Computes the correlation between p and q as * r = (|p| * \sum_i(p[i]*q[i]) - \sum_i(p[i]) * \sum_i(q[i]))/ * sqrt((|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2) * * (|p| * \sum_i((p[i])^2) - (\sum_i(p[i]))^2)) * This correlation can be tested for statistical significance via t-tests. * See e.g.: http://www.socialresearchmethods.net/kb/statcorr.htm * @return The correlation between the elements of the two arrays. */ public static double correlation(double[] p, double[] q) { if (p == null || q == null) { throw new IllegalArgumentException("p and q cannot be null"); } if (p.length != q.length) { throw new IllegalArgumentException( "p and q must be the same length"); } // compute the sums and squared sums double sumP = 0; double sumQ = 0; double sumPSquared = 0; double sumQSquared = 0; double sumPQ = 0; for (int i = 0; i < p.length; i++) { sumP += p[i]; sumQ += q[i]; sumPSquared += p[i] * p[i]; sumQSquared += q[i] * q[i]; sumPQ += p[i] * q[i]; } // compute the correlation double r = (p.length * sumPQ - sumP * sumQ) / Math.sqrt((p.length * sumPSquared - sumP * sumP) * (p.length * sumQSquared - sumQ * sumQ)); return r; } /** Computes the mutual information between two vectors. * @param p the first vector. * @param q the second vector. * @return the mutual information between p and q. */ public static double mutualInformation(int[] p, int[] q) { double[][] confusionMatrix = getConfusionMatrix(p, q); // get the row and col sums of the confusion matrix double[] rowsum = new double[confusionMatrix.length]; double[] colsum = new double[confusionMatrix.length]; for (int i = 0; i < confusionMatrix.length; i++) { for (int j = 0; j < confusionMatrix.length; j++) { rowsum[i] += confusionMatrix[i][j]; colsum[j] += confusionMatrix[i][j]; } } // compute the mutual information double mutualInformation = 0; for (int i = 0; i < confusionMatrix.length; i++) { for (int j = 0; j < confusionMatrix.length; j++) { double deltaMI = 0; // if entry is not 0, then the deltaMI shouldn't be 0 if (confusionMatrix[i][j] != 0) { deltaMI = confusionMatrix[i][j] * log2(confusionMatrix[i][j] / (rowsum[i] * colsum[j])); } if (Double.isNaN(deltaMI)) { throw new IllegalStateException("MI is NaN!"); } mutualInformation += deltaMI; } } return mutualInformation; } /** Computes the pairwise agreement between two pairwise arrays of labelings. * The pairwise agreement is the number-of-pairs-in-agreement / the-total-number-of-pairs. * The two arrays must be the same length. * @param p An array of labels. * @param q An array of labels. * @return The pairwise agreement between the labelings in p and q. */ public static double pairwiseAgreement(int[] p, int[] q) { if (p == null || q == null) { throw new IllegalArgumentException("p and q cannot be null"); } if (p.length != q.length) { throw new IllegalArgumentException( "p and q must be the same length"); } int numSamePairs = 0; for (int i = 0; i < p.length; i++) { if (p[i] == q[i]) numSamePairs++; } return ((double) numSamePairs) / p.length; } /** Computes the normalized confusion matrix for two vectors. * @param p the first vector * @param q the second vector * @return the normalized confusion matrix for p and q */ public static double[][] getConfusionMatrix(int[] p, int[] q) { if (p.length != q.length) { throw new IllegalArgumentException( "p and q must be the same length."); } int[] classes = uniqueValues(append(p, q)); int n = p.length; // compute the confusion matrix double[][] confusionMatrix = new double[classes.length][classes.length]; for (int i = 0; i < n; i++) { // determine the classIdx of p[i] int piClassIdx; for (piClassIdx = 0; piClassIdx < classes.length; piClassIdx++) { if (p[i] == classes[piClassIdx]) break; } // determine the classIdx of q[i] int qiClassIdx; for (qiClassIdx = 0; qiClassIdx < classes.length; qiClassIdx++) { if (q[i] == classes[qiClassIdx]) break; } // increment the counter in the confusion matrix confusionMatrix[piClassIdx][qiClassIdx]++; } // normalize the confusion matrix for (int i = 0; i < confusionMatrix.length; i++) { for (int j = 0; j < confusionMatrix.length; j++) { confusionMatrix[i][j] /= n; } } return confusionMatrix; } /** Computes the log-base-2 of a number. * @param d * @return the log-base-2 of d */ public static double log2(double d) { return Math.log(d) / LOG2; } /** Determines the unique values of v. The values are returned in no particular order. * @param v * @return the unique values of v in no particular order. */ public static int[] uniqueValues(int[] v) { // form the values into a set, which automatically removes duplicates HashSet<Integer> uniqueValues = new HashSet<Integer>(); for (int i = 0; i < v.length; i++) { uniqueValues.add(v[i]); } // convert the set back into an array int[] vUnique = new int[uniqueValues.size()]; int i = 0; for (Integer uniqueValue : uniqueValues) { vUnique[i++] = uniqueValue; } return vUnique; } /** Determines the unique values of v. The values are returned in no particular order. * @param v * @return the unique values of v in no particular order. */ public static double[] uniqueValues(double[] v) { // form the values into a set, which automatically removes duplicates HashSet<Double> uniqueValues = new HashSet<Double>(); for (int i = 0; i < v.length; i++) { uniqueValues.add(v[i]); } // convert the set back into an array double[] vUnique = new double[uniqueValues.size()]; int i = 0; for (Double uniqueValue : uniqueValues) { vUnique[i++] = uniqueValue; } return vUnique; } /** Appends an element to a vector. * @param v1 the vector. * @param d the element to append. * @return A vector containing all the elements of v1 followed * by d. */ public static int[] append(int[] v1, int d) { int[] newVector = new int[v1.length + 1]; System.arraycopy(v1, 0, newVector, 0, v1.length); newVector[v1.length] = d; return newVector; } /** Appends an element to a vector. * @param v1 the vector. * @param d the element to append. * @return A vector containing all the elements of v1 followed * by d. */ public static double[] append(double[] v1, double d) { double[] newVector = new double[v1.length + 1]; System.arraycopy(v1, 0, newVector, 0, v1.length); newVector[v1.length] = d; return newVector; } /** Appends two vectors. * @param v1 the first vector. * @param v2 the second vector. * @return A vector containing all the elements of v1 followed * by all the elements of v2. */ public static double[] append(double[] v1, double[] v2) { double[] newVector = new double[v1.length + v2.length]; System.arraycopy(v1, 0, newVector, 0, v1.length); System.arraycopy(v2, 0, newVector, v1.length, v2.length); return newVector; } /** Appends two vectors. * @param v1 the first vector. * @param v2 the second vector. * @return A vector containing all the elements of v1 followed * by all the elements of v2. */ public static int[] append(int[] v1, int[] v2) { int[] newVector = new int[v1.length + v2.length]; System.arraycopy(v1, 0, newVector, 0, v1.length); System.arraycopy(v2, 0, newVector, v1.length, v2.length); return newVector; } }