net.lldp.checksims.algorithm.similaritymatrix.SimilarityMatrix.java Source code

Java tutorial

Introduction

Here is the source code for net.lldp.checksims.algorithm.similaritymatrix.SimilarityMatrix.java

Source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2014-2015 Nicholas DeMarinis, Matthew Heon, and Dolan Murvihill
 */

package net.lldp.checksims.algorithm.similaritymatrix;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Ordering;

import net.lldp.checksims.algorithm.AlgorithmResults;
import net.lldp.checksims.algorithm.InternalAlgorithmError;
import net.lldp.checksims.submission.NoSuchSubmissionException;
import net.lldp.checksims.submission.Submission;
import net.lldp.checksims.util.data.Real;

import org.apache.commons.lang3.tuple.Pair;

import java.util.*;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

/**
 * A Similarity Matrix represents the similarities between a given group of submissions.
 *
 * TODO consider offering Iterators for the entire similarity matrix, and for individual submissions on the X axis
 */
public final class SimilarityMatrix {
    private final AlgorithmResults[][] entries;
    private final ImmutableList<Submission> xSubmissions;
    private final ImmutableList<Submission> ySubmissions;
    private final ImmutableSet<AlgorithmResults> builtFrom;

    /**
     * Create a Similarity Matrix with given parameters. Internal constructor used by factory methods.
     *
     * Lists, not sets, of submissions, to ensure we have an ordering. We maintain the invariant that there are no
     * duplicates in the factories.
     *
     * @param entries      The matrix itself
     * @param xSubmissions Submissions on the X axis
     * @param ySubmissions Submissions on the Y axis
     * @param builtFrom    Set of Algorithm Results used to build the matrix
     */
    protected SimilarityMatrix(AlgorithmResults[][] entries, List<Submission> xSubmissions,
            List<Submission> ySubmissions, Set<AlgorithmResults> builtFrom) {
        checkNotNull(entries);
        checkNotNull(xSubmissions);
        checkNotNull(ySubmissions);
        checkNotNull(builtFrom);
        checkArgument(!xSubmissions.isEmpty(),
                "Cannot make similarity matrix with empty list of submissions to be compared!");
        checkArgument(!ySubmissions.isEmpty(),
                "Cannot make similarity matrix with empty list of submissions to compare to!");
        checkArgument(xSubmissions.size() == entries.length,
                "Array size mismatch when creating Similarity Matrix - X direction, found " + xSubmissions.size()
                        + ", expecting " + entries.length);
        checkArgument(ySubmissions.size() == entries[0].length,
                "Array size mismatch when creating Similarity Matrix - Y direction, found " + ySubmissions.size()
                        + ", expecting " + entries[0].length);
        checkArgument(!builtFrom.isEmpty(),
                "Must provide Algorithm Results used to build similarity matrix - instead got empty set!");

        this.entries = entries;
        this.xSubmissions = ImmutableList.copyOf(xSubmissions);
        this.ySubmissions = ImmutableList.copyOf(ySubmissions);
        this.builtFrom = ImmutableSet.copyOf(builtFrom);
    }

    /**
     * @param index Index of submission to retrieve
     * @return Submission for the given row in the array
     */
    public Submission getXSubmission(int index) {
        checkArgument(index >= 0, "Index into X submissions must be greater than 0!");
        checkArgument(index < xSubmissions.size(),
                "Index into X submissions must be less than X submissions size (" + xSubmissions.size() + ")!");

        return xSubmissions.get(index);
    }

    /**
     * @return List of submissions used to build the X axis, in order they are used
     */
    public ImmutableList<Submission> getXSubmissions() {
        return xSubmissions;
    }

    /**
     * @param index Index of submission to retrieve
     * @return Submission for the given column in the array
     */
    public Submission getYSubmission(int index) {
        checkArgument(index >= 0, "Index into Y submissions must be greater than 0!");
        checkArgument(index < ySubmissions.size(),
                "Index into Y submissions must be less than Y submissions size (" + ySubmissions.size() + ")!");

        return ySubmissions.get(index);
    }

    /**
     * @return List of submissions used to build the Y axis, in order they are used
     */
    public ImmutableList<Submission> getYSubmissions() {
        return ySubmissions;
    }

    /**
     * @return Size of the Similarity Matrix
     */
    public Pair<Integer, Integer> getArrayBounds() {
        return Pair.of(xSubmissions.size(), ySubmissions.size());
    }

    /**
     * @return Get the Algorithm Results that were used to build this similarity matrix
     */
    public ImmutableSet<AlgorithmResults> getBaseResults() {
        return builtFrom;
    }

    /**
     * Get similarities for one submission compared to another.
     *
     * @param xIndex Index into similarity matrix on the X axis
     * @param yIndex Index into similarity matrix on the Y axis
     * @return Matrix Entry for given X and Y index
     */
    public AlgorithmResults getEntryFor(int xIndex, int yIndex) {
        checkArgument(xIndex >= 0, "X index must be greater than 0!");
        checkArgument(xIndex < xSubmissions.size(),
                "X index must be less than X submissions size (" + xSubmissions.size() + ")!");
        checkArgument(yIndex >= 0, "Y index must be greater than 0!");
        checkArgument(yIndex < ySubmissions.size(),
                "Y index must be less than Y submissions size (" + ySubmissions.size() + ")!");

        return entries[xIndex][yIndex];
    }

    /**
     * Get similarity of X submission to Y submission.
     *
     * @param xSubmission Submission to get similarities for
     * @param ySubmission Submission to get similarities relative to
     * @return Similarities of xSubmission to ySubmission
     * @throws NoSuchSubmissionException Thrown if either xSubmission or ySubmission are not present in the matrix
     */
    public AlgorithmResults getEntryFor(Submission xSubmission, Submission ySubmission)
            throws NoSuchSubmissionException {
        checkNotNull(xSubmission);
        checkNotNull(ySubmission);

        if (!xSubmissions.contains(xSubmission)) {
            throw new NoSuchSubmissionException(
                    "X Submission with name " + xSubmission.getName() + " not found in similarity matrix!");
        } else if (!ySubmissions.contains(ySubmission)) {
            throw new NoSuchSubmissionException(
                    "Y Submission with name " + ySubmission.getName() + " not found in similarity matrix!");
        }

        int xIndex = xSubmissions.indexOf(xSubmission);
        int yIndex = ySubmissions.indexOf(ySubmission);

        return entries[xIndex][yIndex];
    }

    @Override
    public String toString() {
        return "A similarity matrix comparing " + xSubmissions.size() + " submissions to " + ySubmissions.size();
    }

    @Override
    public int hashCode() {
        return builtFrom.stream().mapToInt(AlgorithmResults::hashCode).sum();
    }

    @Override
    public boolean equals(Object other) {
        if (!(other instanceof SimilarityMatrix)) {
            return false;
        }

        SimilarityMatrix otherMatrix = (SimilarityMatrix) other;

        return otherMatrix.builtFrom.equals(builtFrom) && otherMatrix.xSubmissions.equals(xSubmissions)
                && otherMatrix.ySubmissions.equals(ySubmissions) && Arrays.deepEquals(otherMatrix.entries, entries);
    }

    /**
     * Generate a similarity matrix from a given set of submissions.
     *
     * @param inputSubmissions Submissions to generate from
     * @param results Results to build from. Must contain results for every possible unordered pair of input submissions
     * @return Similarity Matrix built from given results
     * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission not in the input
     */
    public static SimilarityMatrix generateMatrix(Set<Submission> inputSubmissions, Set<AlgorithmResults> results)
            throws InternalAlgorithmError {
        checkNotNull(inputSubmissions);
        checkNotNull(results);
        checkArgument(!inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from");
        checkArgument(!results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!");

        // Generate the matrix we'll use
        AlgorithmResults[][] matrix = new AlgorithmResults[inputSubmissions.size()][inputSubmissions.size()];

        //Ordering sortBy = Ordering.natural();
        Ordering<Submission> sortBy = Ordering.from(new Comparator<Submission>() {
            public int compare(Submission a, Submission b) {
                return ((Double) b.getTotalCopyScore()).compareTo(a.getTotalCopyScore());
            }
        });

        // Order the submissions
        List<Submission> orderedSubmissions = sortBy.immutableSortedCopy(inputSubmissions);

        // Generate the matrix

        // Start with the diagonal, filling with 100% similarity
        for (int i = 0; i < orderedSubmissions.size(); i++) {
            Submission s = orderedSubmissions.get(i);

            matrix[i][i] = new AlgorithmResults(Pair.of(s, s), Real.ONE, Real.ONE);
        }

        // Now go through all the results, and build appropriate two MatrixEntry objects for each
        for (AlgorithmResults result : results) {
            int aIndex = orderedSubmissions.indexOf(result.a);
            int bIndex = orderedSubmissions.indexOf(result.b);

            if (aIndex == -1) {
                if (!result.a.testFlag("invalid")) {
                    throw new InternalAlgorithmError(
                            "Processed Algorithm Result with submission not in given input submissions with name \""
                                    + result.a.getName() + "\"");
                }
            } else if (bIndex == -1) {
                if (!result.b.testFlag("invalid")) {
                    throw new InternalAlgorithmError(
                            "Processed Algorithm Result with submission not in given input submissions with name \""
                                    + result.b.getName() + "\"");
                }
            } else {
                matrix[aIndex][bIndex] = result.inverse();
                matrix[bIndex][aIndex] = result;
            }
        }

        // Verification pass: Go through and ensure that the entire array was populated
        for (int x = 0; x < orderedSubmissions.size(); x++) {
            for (int y = 0; y < orderedSubmissions.size(); y++) {
                if (matrix[x][y] == null) {
                    throw new InternalAlgorithmError("Missing Algorithm Results for comparison of submissions \""
                            + orderedSubmissions.get(x).getName() + "\" and \""
                            + orderedSubmissions.get(y).getName() + "\"");
                }
            }
        }

        return new SimilarityMatrix(matrix, orderedSubmissions, orderedSubmissions, results);
    }

    /**
     * Generate a Similarity Matrix with archive submissions.
     *
     * The result is not a square matrix. Only the input submissions are on the X axis, but the Y axis contains both
     * input and archive submissions.
     *
     * @param inputSubmissions Submissions used to generate matrix
     * @param archiveSubmissions Archive submissions - only compared to input submissions, not to each other
     * @param results Results used to build matrix
     * @return Similarity matrix built from given results
     * @throws InternalAlgorithmError Thrown on missing results, or results containing a submission not in the input
     */
    public static SimilarityMatrix generateMatrix(Set<Submission> inputSubmissions,
            Set<Submission> archiveSubmissions, Set<AlgorithmResults> results) throws InternalAlgorithmError {
        checkNotNull(inputSubmissions);
        checkNotNull(archiveSubmissions);
        checkNotNull(results);
        checkArgument(!inputSubmissions.isEmpty(), "Must provide at least 1 submission to build matrix from");
        checkArgument(!results.isEmpty(), "Must provide at least 1 AlgorithmResults to build matrix from!");

        Set<Submission> setOfBoth = new HashSet<>();
        setOfBoth.addAll(inputSubmissions);
        setOfBoth.addAll(archiveSubmissions);

        checkArgument(setOfBoth.size() == (archiveSubmissions.size() + inputSubmissions.size()),
                "Some submissions were found in both archive and input submissions!");

        // If there are no archive submissions, just generate using the other function
        if (archiveSubmissions.isEmpty()) {
            return generateMatrix(inputSubmissions, results);
        }

        List<Submission> xSubmissions = Ordering.natural().immutableSortedCopy(inputSubmissions);
        List<Submission> ySubmissions = new ArrayList<>();
        ySubmissions.addAll(Ordering.natural().immutableSortedCopy(inputSubmissions));
        ySubmissions.addAll(Ordering.natural().immutableSortedCopy(archiveSubmissions));

        AlgorithmResults[][] matrix = new AlgorithmResults[xSubmissions.size()][ySubmissions.size()];

        // Generate the matrix

        // First, handle identical submissions
        for (Submission xSub : xSubmissions) {
            // Get the X index
            int xIndex = xSubmissions.indexOf(xSub);
            int yIndex = ySubmissions.indexOf(xSub);

            matrix[xIndex][yIndex] = new AlgorithmResults(Pair.of(xSub, xSub), Real.ONE, Real.ONE);
        }

        // Now iterate through all given algorithm results
        for (AlgorithmResults result : results) {
            int aXCoord = xSubmissions.indexOf(result.a);
            int bXCoord = xSubmissions.indexOf(result.b);

            if (aXCoord == -1 && bXCoord == -1) {
                throw new InternalAlgorithmError("Neither submission \"" + result.a.getName() + "\" nor \""
                        + result.b.getName() + "\" were found in input submissions!");
            }

            if (aXCoord != -1) {
                int bYCoord = ySubmissions.indexOf(result.b);

                matrix[aXCoord][bYCoord] = result.inverse();
            }

            if (bXCoord != -1) {
                int aYCoord = ySubmissions.indexOf(result.a);

                matrix[bXCoord][aYCoord] = result;
            }
        }

        // Verification pass - ensure we built a matrix with no nulls
        for (int x = 0; x < xSubmissions.size(); x++) {
            for (int y = 0; y < ySubmissions.size(); y++) {
                if (matrix[x][y] == null) {
                    throw new InternalAlgorithmError("Missing Algorithm Results for comparison of submissions \""
                            + xSubmissions.get(x).getName() + "\" and \"" + ySubmissions.get(y).getName() + "\"");
                }
            }
        }

        return new SimilarityMatrix(matrix, xSubmissions, ySubmissions, results);
    }
}