be.hogent.tarsos.util.histogram.Histogram.java Source code

Java tutorial

Introduction

Here is the source code for be.hogent.tarsos.util.histogram.Histogram.java

Source

package be.hogent.tarsos.util.histogram;

import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.logging.Logger;

import org.apache.commons.math.stat.StatUtils;

import be.hogent.tarsos.util.FileUtils;
import be.hogent.tarsos.util.SimplePlot;

/**
 * A histogram is defined by a start value, a stop value and a number of
 * classes. The 'key' of a class is the middle of the class. E.g. the keys of a
 * histogram that starts at 0, stops at 5 and has 5 classes are
 * {0.5,1.5,2.5,3.5,4.5}. The intervals for each key are
 * {[0,1[;[1,2[;[2,3[;[3,4];[4,5[} with [0,1[ meaning the interval between 0
 * inclusive and 1 exclusive.
 * <p>
 * The histogram uses a red and black tree as underlying structure: Search,
 * insert and delete are O(LOG n). The tree keeps the keys in order and makes
 * iteration (in order) easy. Optimization is possible by replacing the tree
 * with arrays.
 * </p>
 * <p>
 * The histogram uses doubles as key values. Java doubles are prone to rounding
 * errors. To prevent rounding errors the keys are rounded to a predefined
 * number of decimals. The number can be found in {@link #PRECISION_FACTOR}.
 * E.g. if {@link #PRECISION_FACTOR} is 10000 then the number of significant
 * decimals is 4; the minimum classWidth is 0.0001.
 * </p>
 * 
 * @author Joren Six
 */
public class Histogram implements Cloneable {
    private static final Logger LOG = Logger.getLogger(Histogram.class.getName());

    /**
     * The width of each class (or bin) is equal to stop - start / number of
     * classes.
     */
    private final double classWidth;
    /**
     * The number of classes (or bins) in the histogram.
     */
    private final int numberOfClasses;
    /**
     * A red black tree backing the frequency table, easy to iterate (in order)
     * TODO Optimization: serious optimization possible by using a plain array
     * (or two).
     */
    private final TreeMap<Double, Long> freqTable;
    /**
     * The starting value != the first class middle start == the first class
     * middle - classWidth / 2.
     */
    private final double start; // the starting value
    /**
     * The last value != the last class middle stop == the last class middle +
     * classWidth / 2.
     */
    private final double stop; // the stopping value
    /**
     * If the histogram wraps values outside the range
     * <code>]start - classWidht / 2, stop + classWidth / 2 [</code> are mapped
     * to values inside the range using a modulo calculation.
     */
    private final boolean wraps;

    /**
     * if <code>true</code> values outside the valid range are ignored.
     * Otherwise if a value outside the valid range is added an
     * IllegalArgumentException is thrown.
     */
    private final boolean ignoreValuesOutsideRange;

    /**
     * <p>
     * Create a Histogram with a certain number of classes with values in the
     * range <code>]start - classWidht / 2, stop + classWidth / 2 [</code> if
     * the histogram wraps otherwise values outside the range are mapped to
     * values inside using a modulo calculation.
     * </p>
     * 
     * @param startVal
     *            the starting value of the histogram. The starting value is not
     *            the same as the first class middle. The starting value is
     *            equal to <code>the first class middle - classWidth / 2</code>
     * @param stopVal
     *            the stopping value of the histogram. The stopping value is not
     *            the same as the last class middle. The stopping value is equal
     *            to <code>the last class middle + classWidth / 2</code>
     * @param totalClasses
     *            the number of classes between the starting and stopping
     *            values. Also defines the classWidth.
     * @param wrapping
     *            indicates if the histogram wraps around the edges. More
     *            formal: If the histogram wraps values outside the range
     *            <code>]start - classWidht / 2, stop + classWidth / 2 [</code>
     *            are mapped to values inside the range using a modulo
     *            calculation.
     * @ignoreValuesOutsideRange if <code>true</code> values outside the valid
     *                           range are ignored. Otherwise if a value outside
     *                           the valid range is added an
     *                           IllegalArgumentException is thrown.
     */
    public Histogram(final double startVal, final double stopVal, final int totalClasses, final boolean wrapping,
            final boolean ignoreOutsideRange) {
        if (stopVal <= startVal) {
            throw new IllegalArgumentException("The stopping value (" + stopVal
                    + ") should be bigger than the starting value (" + startVal + ") .");
        }

        this.classWidth = preventRoundingErrors((stopVal - startVal) / totalClasses);
        this.start = startVal;
        this.stop = stopVal;
        this.freqTable = new TreeMap<Double, Long>();
        this.wraps = wrapping;
        this.ignoreValuesOutsideRange = ignoreOutsideRange;

        final double lastKey = stopVal - getClassWidth() / 2;

        final double stopValue;
        if (wrapping) {
            stopValue = lastKey;
        } else {
            stopValue = lastKey + getClassWidth() / 2;
        }
        freqTable.put(preventRoundingErrors(startVal + getClassWidth() / 2), 0L);
        for (double current = startVal + getClassWidth() / 2; current <= stopValue;) {
            freqTable.put(valueToKey(current), 0L);
            current = current + getClassWidth();
        }

        this.numberOfClasses = freqTable.keySet().size();
    }

    /**
     * Creates a new, empty histogram using the same parameters of the original
     * histogram. The parameter being start, wraps and stop and number of
     * classes.
     * 
     * @param original
     *            the original histogram
     */
    public Histogram(final Histogram original) {
        this(original.getStart(), original.getStop(), original.numberOfClasses, original.wraps,
                original.ignoreValuesOutsideRange);
    }

    /**
     * <p>
     * Create a Histogram with a certain number of classes with values in the
     * range <code>]start - classWidht / 2, stop + classWidth / 2 [</code> if
     * the histogram wraps otherwise values outside the range are mapped to
     * values inside using a modulo calculation.
     * </p>
     * 
     * @param startVal
     *            the starting value of the histogram. The starting value is not
     *            the same as the first class middle. The starting value is
     *            equal to <code>the first class middle - classWidth / 2</code>
     * @param stopVal
     *            the stopping value of the histogram. The stopping value is not
     *            the same as the last class middle. The stopping value is equal
     *            to <code>the last class middle + classWidth / 2</code>
     * @param totalClasses
     *            the number of classes between the starting and stopping
     *            values. Also defines the classWidth.
     */
    public Histogram(final double startVal, final double stopVal, final int totalClasses) {
        this(startVal, stopVal, totalClasses, false, false);
    }

    /**
     * <p>
     * Create a Histogram with a certain number of classes with values in the
     * range <code>]start - classWidht / 2, stop + classWidth / 2 [</code> if
     * the histogram wraps otherwise values outside the range are mapped to
     * values inside using a modulo calculation.
     * </p>
     * 
     * @param startVal
     *            the starting value of the histogram. The starting value is not
     *            the same as the first class middle. The starting value is
     *            equal to <code>the first class middle - classWidth / 2</code>
     * @param stopVal
     *            the stopping value of the histogram. The stopping value is not
     *            the same as the last class middle. The stopping value is equal
     *            to <code>the last class middle + classWidth / 2</code>
     * @param totalClasses
     *            the number of classes between the starting and stopping
     *            values. Also defines the classWidth.
     * @param wrapping
     *            indicates if the histogram wraps around the edges. More
     *            formal: If the histogram wraps values outside the range
     *            <code>]start - classWidht / 2, stop + classWidth / 2 [</code>
     *            are mapped to values inside the range using a modulo
     *            calculation.
     */
    public Histogram(final double startVal, final double stopVal, final int totalClasses, final boolean wrapping) {
        this(startVal, stopVal, totalClasses, wrapping, true);
    }

    /**
     * Returns the key for class with index bufferCount.
     * 
     * @param bufferCount
     *            a class index. If bufferCount lays outside the interval
     *            <code>[0,getNumberOfClasses()[</code> it is mapped to a value
     *            inside the interval using a modulo calculation.
     * @return the key for class with index bufferCount
     */
    public final double getKeyForClass(final int i) {
        int classIndex = i;
        // make sure classIndex is positive
        while (classIndex < 0) {
            classIndex += getNumberOfClasses();
        }
        // make sure bufferCount is within range
        classIndex = classIndex % getNumberOfClasses();
        final double key = getStart() + classIndex * getClassWidth() + getClassWidth() / 2.0;
        return preventRoundingErrors(key);
    }

    /**
     * Returns the number of items in class with index bufferCount.
     * 
     * @param bufferCount
     *            a class index. If bufferCount lays outside the interval
     *            <code>[0,getNumberOfClasses()[</code> it is mapped to a value
     *            inside the interval using a modulo calculation.
     * @return the number of items in bin with index bufferCount
     */
    public final long getCountForClass(final int i) {
        return getCount(getKeyForClass(i));
    }

    /**
     * @return the set with histogram keys; Do not add keys to the set directly.
     *         Use histogram methods instead. For performance reasons it is not
     *         wrapped in an immutable set so handle with care.
     */
    public final Set<Double> keySet() {
        return freqTable.keySet();
    }

    /**
     * Adds a value to the Histogram. Assigns the value to the right bin
     * automatically.
     * 
     * @param value
     *            The value to add.
     * @throws IllegalArgumentException
     *             when the value is not in the range of the histogram.
     */
    public final Histogram add(final double value) {

        if (!wraps && !ignoreValuesOutsideRange && !validValue(value)) {
            throw new IllegalArgumentException("Value not in the correct interval: " + value + " not between " + "["
                    + this.firstValidValue() + "," + this.lastValidValue() + "].");
        } else if (!wraps && ignoreValuesOutsideRange && !validValue(value)) {
            LOG.info("Ignored value " + value + " (not between " + "[" + this.firstValidValue() + ","
                    + this.lastValidValue() + "]).");
        }

        if (value > 0) {
            final double key = valueToKey(value);
            final Long count = freqTable.get(key);
            assert count != null : "All key values should be initialized, " + key + " is not.";
            if (count != null) {
                freqTable.put(key, Long.valueOf(count.longValue() + 1));
            }
        } else {
            LOG.warning("Using values below zero in is not tested, "
                    + "it can yield unexpected results. Values below zero are ignored!");
        }
        valueAddedHook(value);
        return this;
    }

    /**
     * A hook to intercept added values.
     * 
     * @param value
     *            The value added
     */
    protected void valueAddedHook(final double value) {
    }

    private static final double PRECISION_FACTOR = 10000.0;

    /**
     * Prevents rounding errors by multiplying and dividing by
     * {@link Histogram#PRECISION_FACTOR} Limits the use of the histogram class
     * for values (class widths) smaller than 1 /
     * {@link Histogram#PRECISION_FACTOR} XXX This is dangerous. Alternatives to
     * using doubles: casting to BigDecimal internally? BigDecimal has ordering
     * problems in a map. See
     * http://java.sun.com/j2se/1.5.0/docs/api/java/math/BigDecimal.html.
     * 
     * @param value
     *            to prevent errors for.
     * @return a rounded value to
     */
    private double preventRoundingErrors(final double value) {
        return Math.floor(value * PRECISION_FACTOR) / PRECISION_FACTOR;
    }

    /**
     * returns the key for a value. E.g. if the bin width is 1 then
     * valueToKey(3.2) returns 3.5
     * 
     * @param value
     *            the value to get the key to
     * @return the key closest to the value
     */
    private double valueToKey(final double value) {
        // TODO remove the value below zero limitation
        // by changing the wraps modulo calculation and test
        if (value < 0) {
            throw new IllegalArgumentException("Currently no values below zero are accepted");
        }

        double roundedValue = value;
        if (wraps) {
            final double interval = stop - start;
            while (roundedValue < freqTable.firstKey()) {
                roundedValue = preventRoundingErrors(roundedValue + interval);
            }
            roundedValue = preventRoundingErrors(start + (roundedValue - start) % interval);
        }

        // assert validValue(roundedValue);

        final double classes = Math.floor((roundedValue + start) / classWidth);
        final double offset = classWidth / 2 - start;
        final double key = preventRoundingErrors(classes * classWidth + offset);
        // assert key >= freqTable.firstKey();
        // assert key <= freqTable.lastKey();
        return key;
    }

    /**
     * Returns the number of values = v.
     * 
     * @param value
     *            the value to lookup.
     * @return the frequency of v.
     */
    public final long getCount(final double value) {
        final double key = valueToKey(value);
        long result = 0;
        final Long count = freqTable.get(key);
        if (count != null) {
            result = count.longValue();
        }
        return result;
    }

    /**
     * Sets the number of values for a key (bin) The value is automatically
     * mapped to a key.
     * 
     * @param value
     *            the value mapped to a key of the class to set the count for.
     * @param count
     *            the number of items in the bin
     */
    public final void setCount(final double value, final long count) {
        final double key = valueToKey(value);
        freqTable.put(key, count);
    }

    /**
     * @return the width of a class (bin)
     */
    public final double getClassWidth() {
        return classWidth;
    }

    /**
     * @return the number of classes
     */
    public final int getNumberOfClasses() {
        return numberOfClasses;
    }

    /**
     * The starting value is not the same as the first key. It is equal to
     * <code>firstKey - classWidth / 2.0</code>
     * 
     * @return the starting value
     */
    public final double getStart() {
        // assert Math.abs(start - freqTable.firstKey() - classWidth / 2.0) <
        // 0.0001;
        return start;
    }

    /**
     * The stopping value is not the same as the last key. It is equal to
     * <code>lastKey + classWidth / 2.0</code>
     * 
     * @return the stop value
     */
    public double getStop() {
        // assert Math.abs(stop - freqTable.lastKey() + classWidth / 2.0) <
        // 0.001;
        // stop is cached for performance reasons
        return stop;
    }

    /**
     * @return <code>true</code> if values outside the interval are wrapped,
     *         <code>false</code> otherwise.
     */
    public boolean isWrapped() {
        return this.wraps;
    }

    /**
     * @return the first value that correctly maps to a key. A valid value lays
     *         in the interval [{@link Histogram#firstValidValue()},
     *         {@link Histogram#lastValidValue()}]
     */
    private double firstValidValue() {
        return this.freqTable.firstKey() - classWidth / 2.0;
    }

    /**
     * @return the last value that correctly maps to a key. A valid value lays
     *         in the interval [{@link Histogram#firstValidValue()},
     *         {@link Histogram#lastValidValue()}]
     */
    private double lastValidValue() {
        return this.freqTable.lastKey() + classWidth / 2.0;
    }

    /**
     * A valid value lays in the interval [{@link Histogram#firstValidValue()} ,
     * {@link Histogram#lastValidValue()}].
     * 
     * @param value
     *            the value to check
     * @return
     */
    private boolean validValue(final double value) {
        return value >= firstValidValue() && value <= lastValidValue();
    }

    /**
     * Returns the cumulative frequency of values less than or equal to v.
     * <p>
     * Returns 0 if v is not comparable to the values set.
     * </p>
     * <p>
     * Uses code from <a href="http://commons.apache.org/math">Apache Commons
     * Math"</a> licensed to the Apache Software Foundation (ASF) under one or
     * more contributor license agreements.
     * </p>
     * 
     * @param v
     *            the value to lookup.
     * @return the proportion of values equal to v
     */
    public long getCumFreq(final Double v) {
        long cumulativeFreq = -1;
        if (getSumFreq() == 0) {
            cumulativeFreq = 0;
        } else if (v.compareTo(freqTable.firstKey()) < 0) {
            cumulativeFreq = 0;
        } else if (v.compareTo(freqTable.lastKey()) >= 0) {
            cumulativeFreq = getSumFreq();
        } else {

            // the frequency of this key
            long result = 0;
            final Long value = freqTable.get(v);
            if (value != null) {
                result = value.longValue();
            }

            // add the frequencies of values smaller than this key
            final Iterator<Double> values = freqTable.keySet().iterator();
            while (values.hasNext()) {
                final Double nextValue = values.next();
                if (v.compareTo(nextValue) > 0) {
                    result += getCount(nextValue);
                } else {
                    cumulativeFreq = result;
                    break;
                }
            }
        }
        if (cumulativeFreq == -1) {
            throw new AssertionError("The key is greather than te last key but this is impossible."
                    + " It should have been returned already.");
        }
        return cumulativeFreq;
    }

    /**
     * Returns the cumulative percentage of values less than or equal to v (as a
     * proportion between 0 and 1).
     * <p>
     * Returns <code>Double.NaN</code> if no values have been added.
     * </p>
     * 
     * @param v
     *            the value to lookup
     * @return the proportion of values less than or equal to v
     */
    public double getCumPct(final Double v) {
        final long sumFreq = getSumFreq();
        final double cumPercentage;
        if (sumFreq == 0) {
            cumPercentage = Double.NaN;
        } else {
            cumPercentage = (double) getCumFreq(v) / (double) sumFreq;
        }
        return cumPercentage;
    }

    /**
     * Returns the sum of all frequencies (bin counts). If there are negative
     * bin counts the sum is smaller than <code>getAbsoluteSumFreq()</code>
     * 
     * @return the total frequency count.
     */
    public long getSumFreq() {
        long result = 0;
        final Iterator<Long> iterator = freqTable.values().iterator();
        while (iterator.hasNext()) {
            result += iterator.next().longValue();
        }
        return result;
    }

    /**
     * Returns the sum of all frequencies. The absolute values of the bin counts
     * are used.
     * 
     * @return the total frequency count.
     */
    public long getAbsoluteSumFreq() {
        long result = 0;
        final Iterator<Long> iterator = freqTable.values().iterator();
        while (iterator.hasNext()) {
            result += Math.abs(iterator.next().longValue());
        }
        return result;
    }

    /**
     * Returns the percentage of values that are equal to v (as a proportion
     * between 0 and 1).
     * <p>
     * Returns <code>Double.NaN</code> if no values have been added.
     * </p>
     * 
     * @param v
     *            the value to lookup
     * @return the proportion of values equal to v
     */
    public double getPct(final Double v) {
        final long sumFreq = getSumFreq();
        double percentage;
        if (sumFreq == 0) {
            percentage = Double.NaN;
        } else {
            percentage = (double) getCount(v) / (double) sumFreq;
        }
        return percentage;
    }

    /**
     * Returns the entropy of the histogram.
     * <p>
     * The histogram entropy is defined to be the negation of the sum of the
     * products of the probability associated with each bin with the base-2 LOG
     * of the probability.
     * </p>
     * <p>
     * Uses code from https://jai-core.dev.java.net/ The source code for the
     * core Java Advanced Imaging API reference implementation is licensed under
     * the Java Research License (JRL) for non-commercial use. The JRL allows
     * users to download, build, and modify the source code in the jai-core
     * project for research use, subject to the terms of the license.
     * </p>
     * 
     * @return The entropy of the histogram.
     */
    public double getEntropy() {
        final double log2 = Math.log(2.0);
        double entropy = 0.0;
        final double total = getSumFreq();
        for (int b = 0; b < numberOfClasses; b++) {
            final double p = getCountForClass(b) / total;
            if (p != 0.0) {
                entropy -= p * Math.log(p) / log2;
            }
        }
        return entropy;
    }

    /**
     * Calculates the mean count of each bin. It iterates over each bin, stores
     * the bin count temporarily and returns the mean bin count. It does not
     * cache the result.
     * 
     * @return the mean bin count.
     */
    public double getMean() {
        final double[] binCounts = new double[this.getNumberOfClasses() + 1];
        int i = 0;
        for (final double key : this.keySet()) {
            binCounts[i] = this.getCount(key);
            i++;
        }
        return StatUtils.mean(binCounts);
    }

    /**
     * Return a string representation of this histogram.
     * 
     * @return a string representation.
     */

    @Override
    public String toString() {
        return toString(false);
    }

    /**
     * Returns a string representation of the histogram.
     * <p>
     * Uses code from <a href="http://commons.apache.org/math">Apache Commons
     * Math"</a> licensed to the Apache Software Foundation (ASF) under one or
     * more contributor license agreements.
     * </p>
     * 
     * @param asciiArt
     *            If true it generates an ascii representation of a histogram,
     *            otherwise it generates a frequency table
     * @return a string representation.
     */
    public String toString(final boolean asciiArt) {
        if (asciiArt) {
            final StringBuffer outBuffer = new StringBuffer();
            outBuffer.append('\n');
            final Iterator<Double> iter = freqTable.keySet().iterator();
            while (iter.hasNext()) {
                final Double value = iter.next();
                outBuffer.append(value).append("\t\t|");
                for (int i = 0; i < getPct(value) * 100; i++) {
                    outBuffer.append('x');
                }
                outBuffer.append('\n');
            }
            outBuffer.append('\n');
            return outBuffer.toString();
        } else {
            final NumberFormat nf = NumberFormat.getPercentInstance();
            final StringBuffer outBuffer = new StringBuffer();
            outBuffer.append("\nValue \t Freq. \t Pct. \t Cum Pct. \n");
            final Iterator<Double> iter = freqTable.keySet().iterator();
            while (iter.hasNext()) {
                final Double value = iter.next();
                outBuffer.append(value);
                outBuffer.append('\t');
                outBuffer.append(getCount(value));
                outBuffer.append('\t');
                outBuffer.append(nf.format(getPct(value)));
                outBuffer.append('\t');
                outBuffer.append(nf.format(getCumPct(value)));
                outBuffer.append('\n');
            }
            return outBuffer.toString();
        }
    }

    // -----------------------------------------------------------
    // -- --
    // -- Modifications & Math --
    // -- --
    // -----------------------------------------------------------

    /**
     * Normalizes the peaks in a histogram. Every peak is reduced to it's
     * relative weight (percent).
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @return a Histogram with normalized peak.
     */
    public Histogram normalize() {
        final List<Long> normalizedCounts = new ArrayList<Long>();
        for (final double key : freqTable.keySet()) {
            normalizedCounts.add((long) (getPct(key) * 10000));
        }
        int index = 0;
        for (final double key : freqTable.keySet()) {
            this.setCount(key, normalizedCounts.get(index));
            index++;
        }
        return this;
    }

    /**
     * Adds a number of items to each bin. Use a negative number to subtract a
     * value from each bin.
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @param value
     *            the number of items to add.
     * @return returns the current histogram so it is possible to chain
     *         modifications.
     */
    public Histogram addToEachBin(final long value) {
        // do nothing if value == 0
        if (value != 0) {
            for (final double key : freqTable.keySet()) {
                this.setCount(key, getCount(key) + value);
            }
        }
        return this;
    }

    /**
     * Searches the minimum number of items in a bin and subtracts all bins with
     * this value.
     * 
     * <pre>
     * *
     * * *              *
     * * *   *          * *
     * * * * *    =>    * *   *
     * -------          -------
     * </pre>
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @return a baselined histogram
     */
    public Histogram baselineHistogram() {
        long smallestValue = Long.MAX_VALUE;
        for (final double key : freqTable.keySet()) {
            smallestValue = Math.min(getCount(key), smallestValue);
        }
        final long valueToAdd = (long) -1.0 * smallestValue;
        return addToEachBin(valueToAdd);
    }

    /**
     * Calculates the sum of two histograms. The value for each bin of other is
     * added to the corresponding bin of this histogram. The other histogram
     * must have the same start, stop and binWidth otherwise adding histograms
     * makes no sense!
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g.<code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @param other
     *            The other histogram
     * @return the changed histogram with more (or the same) number of items in
     *         the bins.
     */
    public Histogram add(final Histogram other) {
        assert freqTable.keySet().size() == other.keySet().size();
        assert start == other.start;
        assert stop == other.stop;
        for (final double key : freqTable.keySet()) {
            this.setCount(key, this.getCount(key) + other.getCount(key));
        }
        return this;
    }

    /**
     * Takes the maximum of the bin value in each histogram and keeps it.
     * 
     * @param other
     * @return
     */
    public Histogram max(final Histogram other) {
        assert freqTable.keySet().size() == other.keySet().size();
        assert start == other.start;
        assert stop == other.stop;
        for (final double key : freqTable.keySet()) {
            this.setCount(key, Math.max(this.getCount(key), other.getCount(key)));
        }
        return this;
    }

    /**
     * Subtracts two histograms. The value for each bin of other is removed to
     * the corresponding bin of this histogram. The other histogram must have
     * the same start, stop and binWidth otherwise subtracting histograms makes
     * no sense!
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g.<code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @param other
     *            The other histogram
     * @return the changed histogram with less (or the same) number of items in
     *         the bins.
     */
    public Histogram subtract(final Histogram other) {
        this.invert();
        this.add(other);
        return this;
    }

    /**
     * Inverts this histograms. The value for each bin is multiplied with -1.
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g.<code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @return the changed histogram with each bin multiplied with.
     */
    public Histogram invert() {
        this.multiply(-1.0);
        return this;
    }

    /**
     * Multiplies each class (bin) count with a factor.
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @param factor
     *            the factor to multiply each bin value with.
     * @return histogram with each bin value multiplied by the factor.
     */
    public Histogram multiply(final double factor) {
        for (final double key : this.freqTable.keySet()) {
            this.setCount(key, Math.round(this.getCount(key) * factor));
        }
        return this;
    }

    /**
     * Raises each class count to the power of exponent.
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modifications. E.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * 
     * @param exponent
     *            The exponent to raise each bincount with.
     * @return Histogram with each bin count raised with exponent.
     */
    public Histogram raise(final double exponent) {
        for (final double key : this.freqTable.keySet()) {
            this.setCount(key, Math.round(Math.pow(this.getCount(key), exponent)));
        }
        return this;
    }

    /*
     * (non-Javadoc)
     * 
     * @see java.lang.Object#clone()
     */

    @Override
    public Histogram clone() throws CloneNotSupportedException {
        final Histogram clone = (Histogram) super.clone();
        for (final double key : this.freqTable.keySet()) {
            clone.setCount(key, this.getCount(key));
        }
        return clone;
    }

    /**
     * Calculates a histogram mean of a list of histograms. All histograms must
     * have the same start, stop and binWidth otherwise the mean histogram makes
     * no sense!
     * 
     * @param histograms
     *            a list of histograms
     * @return a histogram with the mean values. If the list is empty it returns
     *         null.
     */
    public static Histogram mean(final List<Histogram> histograms) {
        Histogram mean = null;
        if (!histograms.isEmpty()) {
            final Histogram first = histograms.get(0);
            mean = new Histogram(first);
            for (final double key : first.freqTable.keySet()) {
                final double[] values = new double[histograms.size()];
                int countIndex = 0;
                for (final Histogram h : histograms) {
                    assert h.keySet().size() == first.keySet().size();
                    assert first.classWidth == h.classWidth;
                    assert first.start == h.start;
                    assert first.stop == h.stop;
                    values[countIndex] = h.getCount(key);
                    countIndex++;
                }
                final long currentMean = Math.round(StatUtils.mean(values));
                mean.setCount(key, currentMean);
            }
        }
        return mean;
    }

    // -----------------------------------------------------------
    // -- --
    // -- smoothing methods --
    // -- --
    // -----------------------------------------------------------

    /**
     * Computes a smoothed version of the histogram.
     * <p>
     * The histogram is smoothed by averaging over a moving window of a size
     * specified by the method parameter: if the value of the parameter is
     * <bufferCount>k</bufferCount> then the width of the window is
     * <bufferCount>2*k + 1</bufferCount>. If the window runs off the end of the
     * histogram only those values which intersect the histogram are taken into
     * consideration. The smoothing may optionally be weighted to favor the
     * central value using a "triangular" weighting. For example, for a value of
     * <bufferCount>k</bufferCount> equal to 2 the central bin would have weight
     * 1/3, the adjacent bins 2/9, and the next adjacent bins 1/9.
     * <p>
     * Changes the current histogram and returns it so it is possible to chain
     * modification e.g. <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * <p>
     * Uses code from https://jai-core.dev.java.net/ The source code for the
     * core Java Advanced Imaging API reference implementation is licensed under
     * the Java Research License (JRL) for non-commercial use. The JRL allows
     * users to download, build, and modify the source code in the jai-core
     * project for research use, subject to the terms of the license.
     * </p>
     * 
     * @param isWeighted
     *            Whether bins will be weighted using a triangular weighting
     *            scheme favoring bins near the central bin.
     * @param k
     *            The smoothing parameter which must be non-negative or an
     *            <code>IllegalArgumentException</code> will be thrown. If zero,
     *            the histogram object will be returned with no smoothing
     *            applied.
     * @return A smoothed version of the histogram.
     */
    public Histogram smooth(final boolean isWeighted, final int k) {
        if (k < 0) {
            throw new IllegalArgumentException("k");
        } else if (k == 0) {
            return this;
        }

        // Initialize the smoothing weights if needed.
        double[] weights = null;
        if (isWeighted) {
            final int numWeights = 2 * k + 1;
            final double denom = numWeights * numWeights;
            weights = new double[numWeights];
            for (int i = 0; i <= k; i++) {
                weights[i] = (i + 1) / denom;
            }
            for (int i = k + 1; i < numWeights; i++) {
                weights[i] = weights[numWeights - 1 - i];
            }
        }

        final int[] smoothedCounts = new int[numberOfClasses];

        // Clear the band total count for the smoothed histogram.
        int sum = 0;

        if (isWeighted) {
            for (int b = 0; b < numberOfClasses; b++) {
                // Determine clipped range.
                final int min = Math.max(b - k, 0);
                final int max = Math.min(b + k, numberOfClasses);

                // Calculate the offset into the weight array.
                int offset = k > b ? k - b : 0;

                // Accumulate the total for the range.
                double acc = 0;
                double weightTotal = 0;
                for (int i = min; i < max; i++) {
                    final double w = weights[offset++];
                    acc += getCountForClass(i) * w;
                    weightTotal += w;
                }

                // Round the accumulated value.
                smoothedCounts[b] = (int) (acc / weightTotal + 0.5);

                // Accumulate total for band.
                sum += smoothedCounts[b];
            }
        } else {
            for (int b = 0; b < numberOfClasses; b++) {
                // Determine clipped range.
                final int min = Math.max(b - k, 0);
                final int max = Math.min(b + k, numberOfClasses);

                // Accumulate the total for the range.
                int acc = 0;
                for (int i = min; i < max; i++) {
                    acc += getCountForClass(i);
                }

                // Calculate the average for the range.
                smoothedCounts[b] = (int) (acc / (double) (max - min + 1) + 0.5);

                // Accumulate total for band.
                sum += smoothedCounts[b];
            }
        }

        // Rescale the counts such that the band total is approximately
        // the same as for the same band of the original histogram.
        final double factor = getSumFreq() / (double) sum;
        for (int b = 0; b < numberOfClasses; b++) {
            final int smoothedCount = (int) (smoothedCounts[b] * factor + 0.5);
            final double key = getKeyForClass(b);
            this.setCount(key, smoothedCount);
        }

        return this;
    }

    /**
     * Smooth the histogram using Gaussians.
     * <p>
     * Each band of the histogram is smoothed by discrete convolution with a
     * kernel approximating a Gaussian impulse response with the specified
     * standard deviation.
     * <p>
     * <em>Changes the current histogram</em> and returns it so it is possible
     * to chain modification e.g.
     * <code>histo.normalize().addToEachBin(10)</code>
     * </p>
     * <p>
     * Uses code from https://jai-core.dev.java.net/ The source code for the
     * core Java Advanced Imaging API reference implementation is licensed under
     * the Java Research License (JRL) for non-commercial use. The JRL allows
     * users to download, build, and modify the source code in the JAI-core
     * project for research use, subject to the terms of the license.
     * </p>
     * 
     * @param standardDeviation
     *            The standard deviation of the Gaussian smoothing kernel which
     *            must be non-negative or an
     *            <code>IllegalArgumentException</code> will be thrown. If zero,
     *            the histogram object will be returned with no smoothing
     *            applied.
     * @return A Gaussian smoothed version of the histogram.
     */
    public Histogram gaussianSmooth(final double standardDeviation) {
        if (standardDeviation < 0.0) {
            throw new IllegalArgumentException("standardDeviation invalid");
        } else if (standardDeviation == 0.0) {
            return this;
        }

        // Determine the number of weights (must be odd).
        int numWeights = (int) (2 * 2.58 * standardDeviation + 0.5);
        if (numWeights % 2 == 0) {
            numWeights++;
        }

        // Initialize the smoothing weights.
        final double[] weights = new double[numWeights];
        final int m = numWeights / 2;
        final double var = standardDeviation * standardDeviation;
        final double gain = 1.0 / Math.sqrt(2.0 * Math.PI * var);
        final double exp = -1.0 / (2.0 * var);
        for (int i = m; i < numWeights; i++) {
            final double del = i - m;
            weights[i] = gain * Math.exp(exp * del * del);
            weights[numWeights - 1 - i] = weights[i];
        }

        // Clear the band total count for the smoothed histogram.
        int sum = 0;

        final int[] smoothedCounts = new int[numberOfClasses];

        for (int b = 0; b < numberOfClasses; b++) {
            // Determine clipped range.
            final int min = Math.max(b - m, 0);
            final int max = Math.min(b + m, numberOfClasses);

            // Calculate the offset into the weight array.
            int offset = m > b ? m - b : 0;

            // Accumulate the total for the range.
            double acc = 0;
            double weightTotal = 0;
            for (int i = min; i < max; i++) {
                final double w = weights[offset++];
                acc += getCountForClass(i) * w;
                weightTotal += w;
            }

            // Round the accumulated value.
            smoothedCounts[b] = (int) (acc / weightTotal + 0.5);

            // Accumulate total for band.
            sum += smoothedCounts[b];
        }

        // Rescale the counts such that the band total is approximately
        // the same as for the same band of the original histogram.
        final double factor = getSumFreq() / (double) sum;
        for (int b = 0; b < numberOfClasses; b++) {
            final int smoothedCount = (int) (smoothedCounts[b] * factor + 0.5);
            final double key = getKeyForClass(b);
            this.setCount(key, smoothedCount);
        }
        return this;
    }

    // -----------------------------------------------------------
    // -- --
    // -- correlation methods --
    // -- --
    // -----------------------------------------------------------
    // TODO Document correlation methods

    public int displacementForOptimalCorrelation(final Histogram otherHistogram) {
        return displacementForOptimalCorrelation(otherHistogram, CorrelationMeasure.INTERSECTION);
    }

    public void displace(final int displacement) {
        try {
            Histogram original = clone();
            // Makes sure the displacement is positive.
            final int actualDisplacement = (displacement + numberOfClasses) % numberOfClasses;

            for (final double key : freqTable.keySet()) {
                final double displacedValue = (key + actualDisplacement * classWidth)
                        % (numberOfClasses * classWidth);
                this.setCount(key, original.getCount(displacedValue));
            }
        } catch (CloneNotSupportedException e) {
            throw new AssertionError("Cloning a histogram is supported!");
        }
    }

    public double correlationWithDisplacement(final int displacement, final Histogram otherHistogram,
            final CorrelationMeasure correlationMeasure) {
        return correlationMeasure.getHistogramCorrelation().correlation(this, displacement, otherHistogram);
    }

    public double correlationWithDisplacement(final int displacement, final Histogram otherHistogram) {
        return correlationWithDisplacement(displacement, otherHistogram, CorrelationMeasure.INTERSECTION);
    }

    /**
     * Return the correlation of this histogram with another one.
     * 
     * @param otherHistogram
     * @param correlationMeasure
     * @return the correlation between this histogram with another histogram.
     */
    public double correlation(final Histogram otherHistogram, final CorrelationMeasure correlationMeasure) {
        if (otherHistogram.classWidth != classWidth) {
            throw new IllegalArgumentException(
                    "Computation of correlation only correct when the classwidth of both histograms are the same");
        }
        return correlationWithDisplacement(0, otherHistogram, correlationMeasure);

    }

    /**
     * Return the correlation of this histogram with another one. By default it
     * uses the {@link CorrelationMeasure#INTERSECTION INTERSECTION} correlation
     * measure.
     * 
     * @param otherHistogram
     *            the other histogram
     * @return the correlation the computed correlation
     */
    public double correlation(final Histogram otherHistogram) {
        return correlation(otherHistogram, CorrelationMeasure.INTERSECTION);
    }

    /**
     * Returns the number of classes the other histogram needs to be displaced
     * to get optimal correlation with this histogram. The correlation is
     * defined by the chosen correlation measure.
     * 
     * @param otherHistogram
     *            The other histogram.
     * @param correlationMeasure
     *            The correlation strategy.
     * @return Returns the number of classes the other histogram needs to be
     *         displaced to get optimal correlation with this histogram.
     */
    public int displacementForOptimalCorrelation(final Histogram otherHistogram,
            final CorrelationMeasure correlationMeasure) {
        int optimalDisplacement = 0; // displacement with best correlation
        double maximumCorrelation = -1; // best found correlation
        final int numberOfClasses = getNumberOfClasses();

        // current displacement, incremented with class width
        for (int currentDisplacement = 0; currentDisplacement < numberOfClasses; currentDisplacement++) {
            final double currentCorrelation = correlationWithDisplacement(currentDisplacement, otherHistogram,
                    correlationMeasure);
            if (maximumCorrelation < currentCorrelation) {
                maximumCorrelation = currentCorrelation;
                optimalDisplacement = currentDisplacement;
            }
        }
        if (optimalDisplacement > getNumberOfClasses() / 2.0) {
            optimalDisplacement = optimalDisplacement - getNumberOfClasses();
        }

        return optimalDisplacement;
    }

    public void plotCorrelation(final Histogram otherHistogram, final CorrelationMeasure correlationMeasure,
            final String fileName, final String title) {
        final int displacement = displacementForOptimalCorrelation(otherHistogram);
        correlationMeasure.getHistogramCorrelation().plotCorrelation(this, displacement, otherHistogram, fileName,
                title);
    }

    // -----------------------------------------------------------
    // -- --
    // -- Plot method --
    // -- --
    // -----------------------------------------------------------

    /**
     * Plots the histogram to a x y plot. The file is saved in PNG file format
     * so the fileName should end on PNG.
     * 
     * @param fileName
     *            The file is saved in PNG file format so the fileName should
     *            end on PNG.
     * @param title
     *            The title of the histogram. Use an empty string or null for an
     *            empty title.
     */
    public void plot(final String fileName, final String title) {
        final String actualTitle = title == null ? "" : title;
        final SimplePlot plot = new SimplePlot(actualTitle);
        plot.addData(0, this);
        plot.save(fileName);
    }

    /**
     * Export the histogram data as a plain text file. The format uses ; to
     * separate keys from values.
     * 
     * @param fileName
     *            Where to save the text file.
     */
    public final void export(final String fileName) {
        final StringBuilder sb = new StringBuilder();
        sb.append("key;value\n");
        for (final double key : keySet()) {
            sb.append(key).append(";").append(getCount(key)).append("\n");
        }
        FileUtils.writeFile(sb.toString(), fileName);
    }

    /**
     * Export the histogram data as a matlab text file. The format can be read
     * using octave or MatLab.
     * 
     * @param fileName
     *            Where to save the matlab (.m) file.
     */
    public final void exportMatLab(final String fileName) {
        final StringBuilder sb = new StringBuilder();
        sb.append("histogram_values = [");
        for (final double key : keySet()) {
            for (int i = 0; i < getCount(key); i++) {
                sb.append(key).append(",");
            }
        }
        sb.append("]\n");
        sb.append("hist(histogram_values,1200)");
        FileUtils.writeFile(sb.toString(), fileName);
    }

    /**
     * @return The maximum bin count.
     */
    public final long getMaxBinCount() {
        long maxValue = -1;
        if (!freqTable.values().isEmpty()) {
            for (final long value : freqTable.values()) {
                maxValue = Math.max(maxValue, value);
            }
        }
        return maxValue;
    }

    /**
     * Sets each bin to 0.
     */
    public void clear() {
        for (final double key : keySet()) {
            setCount(key, 0);
        }
    }
}