org.clueminer.chameleon.GraphPropertyStore.java Source code

Java tutorial

Introduction

Here is the source code for org.clueminer.chameleon.GraphPropertyStore.java

Source

/*
 * Copyright (C) 2011-2015 clueminer.org
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.clueminer.chameleon;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.util.Locale;
import org.apache.commons.math3.util.FastMath;

/**
 * Linear array storage of a triple - EIC, ECL and a counter. It has basically
 * size of n^2 because for storing leaves we need n*(n-1)/2.
 *
 * We need space for n*n similarity matrix + similarities of binary tree with
 * height log2(n). Each inner node of the tree (new cluster) will compute a
 * distance to unmerged clusters. While distances between leaves forms a dense
 * matrix, distances between inner nodes are sparse.
 *
 * Should be safe to use up to dataset of size 98621
 *
 * @author deric
 */
public class GraphPropertyStore {

    //dimension of the matrix
    private final int n;
    private double defaultValue = 0.0;

    private final ExtProp[] store;
    private final Table<Integer, Integer, ExtProp> sparse;

    public GraphPropertyStore(int capacity) {
        this.n = capacity;
        //int nodes = innerTreeNodes(capacity);
        //we need similarities for newly created nodes (merged clusters)
        int simMatrixSize = triangleSize(capacity);
        store = new ExtProp[simMatrixSize];
        sparse = HashBasedTable.create();
    }

    /**
     * Compute size of triangular matrix (n x n) minus diagonal
     *
     * @param n number of rows (or columns) for square matrix
     * @return
     */
    private int triangleSize(int n) {
        return ((n - 1) * n) >>> 1;
    }

    protected final int innerTreeNodes(int leaves) {
        //height of a binary tree with {capacity} nodes
        double h = FastMath.log(2, leaves);
        //total number of inner nodes of a binary tree
        return (int) Math.floor(FastMath.pow(2, h) - 1);
    }

    /**
     * Return an index where is actually item stored
     *
     * A simple hash function for storing lower triangular matrix in one
     * dimensional array
     *
     * i should not be equal to j (diagonal numbers are not stored!)
     *
     * @param i row index
     * @param j column index
     * @return index in one-dimensional array
     */
    private int map(int i, int j) {
        if (i < j) {
            /**
             * swap variables, matrix is symmetrical, we work with lower
             * triangular matrix
             */
            int tmp = i;
            i = j;
            j = tmp;
        }
        /**
         * it's basically a sum of arithmetic row (we need to know how many
         * numbers could be allocated before given position [x,y])
         */
        return triangleSize(i) + j;
    }

    /**
     * Set value either to dense matrix or to a sparse storage (for larger i, j)
     *
     * @param i
     * @param j
     * @param idx
     * @param value
     */
    private void set(int i, int j, ExtProp value) {
        if (i >= n || j >= n) {
            if (i < j) {
                /**
                 * swap variables, matrix is symmetrical, we work with lower
                 * triangular matrix
                 */
                int tmp = i;
                i = j;
                j = tmp;
            }
            sparse.put(i, j, value);
        } else {
            store[map(i, j)] = value;
        }
    }

    public ExtProp get(int i, int j) {
        if (i >= n || j >= n) {
            if (i < j) {
                /**
                 * swap variables, matrix is symmetrical, we work with lower
                 * triangular matrix
                 */
                int tmp = i;
                i = j;
                j = tmp;
            }
            //sparse storage
            if (sparse.contains(i, j)) {
                return sparse.get(i, j);
            } else {
                ExtProp p = new ExtProp();
                set(i, j, p);
                return p;
            }
        } else {
            ExtProp p;
            try {
                p = store[map(i, j)];
                if (p == null) {
                    p = new ExtProp();
                    store[map(i, j)] = p;
                }
            } catch (ArrayIndexOutOfBoundsException ex) {
                System.out.println(
                        "was getting [" + i + ", " + j + "] -> " + map(i, j) + " length = " + store.length);
                throw ex;
            }
            return p;
        }
    }

    public double getEIC(int i, int j) {
        ExtProp r = get(i, j);
        return r.EIC;
    }

    public double getECL(int i, int j) {
        ExtProp r = get(i, j);
        return r.ECL;
    }

    /**
     * Counter value - number of edges that contributed to EIC weights sum
     *
     * @param i
     * @param j
     * @return
     */
    public double getCnt(int i, int j) {
        ExtProp r = get(i, j);
        return r.counter;
    }

    /**
     * Update interconnectivity and closeness values
     *
     * @param i
     * @param j
     * @param edgeWeight
     */
    public void updateWeight(int i, int j, double edgeWeight) {
        if (i == j) {
            throw new IllegalArgumentException("diagonal items are not writable");
        }
        ExtProp p = get(i, j);
        p.EIC += edgeWeight;
        p.counter += 1;
        p.ECL = p.EIC / p.counter;
        set(i, j, p);
    }

    /**
     * Directly set all values
     *
     * @param i
     * @param j
     * @param eic
     * @param ecl
     * @param cnt
     */
    public void set(int i, int j, double eic, double ecl, double cnt) {
        ExtProp p = get(i, j);
        p.EIC = eic;
        p.ECL = ecl;
        p.counter = (int) cnt;
        set(i, j, p);
    }

    public void dump() {
        printFancy(2, 2);
    }

    public void printFancy(int w, int d) {
        DecimalFormat format = new DecimalFormat();
        format.setDecimalFormatSymbols(new DecimalFormatSymbols(Locale.US));
        format.setMinimumIntegerDigits(1);
        format.setMaximumFractionDigits(d);
        format.setMinimumFractionDigits(d);
        format.setGroupingUsed(false);
        printFancy(new PrintWriter(System.out, true), format, w + 2);
    }

    public void printFancy(PrintWriter output, NumberFormat format, int width) {
        String s;
        int padding;
        ExtProp d;
        output.println(); // start on new line.
        for (int i = 0; i < store.length; i++) {
            //print row label
            s = String.valueOf(i);
            padding = Math.max(1, width - s.length() - 1);
            for (int k = 0; k < padding; k++) {
                output.print(' ');
            }
            output.print(s);
            output.print(" |");
            d = store[i];
            output.print(d.EIC + ", ");
            output.print(d.ECL + ", ");
            output.println(d.counter);
        }
        //footer
        for (int i = 0; i < width * (3 + 1); i++) {
            output.print('-');
        }
        output.println();
        for (int k = 0; k < width; k++) {
            output.print(' ');
        }
        String[] values = new String[] { "EIC", "ECL", "CNT" };
        for (String value : values) {
            s = value;
            padding = Math.max(1, width - s.length()); // At _least_ 1 space
            for (int k = 0; k < padding; k++) {
                output.print(' ');
            }
            output.print(s);
        }
        output.println();

        output.println("== sparse (" + sparse.size() + "): ");

        for (Table.Cell<Integer, Integer, ExtProp> entry : sparse.cellSet()) {
            output.print(entry.getRowKey() + ", " + entry.getColumnKey() + ": ");
            d = entry.getValue();
            output.print("EIC= " + d.EIC + ", ");
            output.print("ECL= " + d.ECL + ", ");
            output.println("CNT= " + d.counter);
        }
    }

    public int getCapacity() {
        return store.length;
    }

    public double getDefaultValue() {
        return defaultValue;
    }

    public void setDefaultValue(double defaultValue) {
        this.defaultValue = defaultValue;
    }

    protected class ExtProp {

        public double EIC, ECL;
        public int counter;

        public ExtProp() {
            EIC = ECL = counter = 0;
        }
    }

}