com.itemanalysis.psychometrics.histogram.Histogram.java Source code

Java tutorial

Introduction

Here is the source code for com.itemanalysis.psychometrics.histogram.Histogram.java

Source

/*
 * Copyright 2012 J. Patrick Meyer
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.itemanalysis.psychometrics.histogram;

import com.itemanalysis.psychometrics.distribution.DistributionApproximation;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math3.stat.descriptive.rank.Max;
import org.apache.commons.math3.stat.descriptive.rank.Min;
import org.apache.commons.math3.stat.descriptive.rank.Percentile;
import org.apache.commons.math3.util.ResizableDoubleArray;

import java.util.ArrayList;
import java.util.Formatter;
import java.util.Iterator;

/**
 * A histogram involves an array of value for the x-axis (i.e. the points) and an array of values for the
 * y-axis (i.e. the values). The type of y-axis values returned the evaluate methods are determined
 * by the argument {@link #histogramType}.
 *
 * This class provides two different ways to create a histogram. One way involves passing an entire double array
 * to {@link #evaluate(double[])} a second way requires that you first incrementally update the data with
 * calls to {@link #increment(double)} or {@link #setData(double[])} and then call {@link #evaluate()}. If you
 * call the method {@link #evaluate()} without first calling {@link #setData(double[])} or incrementally
 * updating the data, you will get a null pointer exception.
 *
 * @author J. Patrick Meyer
 *
 */
public class Histogram implements DistributionApproximation {

    private int numberOfBins = 1;

    private double binWidth = 1.0;

    private double n = 0;

    private ArrayList<Bin> bins = new ArrayList<Bin>();

    private BinCalculationType binCalculationType = BinCalculationType.STURGES;

    private double[] points = null;

    private double[] value = null;

    private ResizableDoubleArray data = null;

    private boolean lowerInclusive = true;

    private BinCalculation binCalc = null;

    private HistogramType histogramType = HistogramType.FREQUENCY;

    public Histogram(HistogramType histogramType) {
        this(histogramType, BinCalculationType.STURGES, true);
    }

    /**
     * This constructor is the most general. It can only use three of the four bin calculation classes. That is,
     * the {@link #binCalculationType} must be a FreedmanDiaconisBinCalculation, ScottBinCalculation, or
     * SturgesBinCalculation. A SimpleBinCalculation is not permitted.
     *
     * @param histogramType indicates the type of y-axis values that will be returned when {@link #evaluate()}
     *                      is called. It has no other effect.
     * @param lowerInclusive true if the bins should be lower inclusive. If false, the bins will be upper inclusive.
     */
    public Histogram(HistogramType histogramType, BinCalculationType binCalculationType, boolean lowerInclusive) {
        if (binCalculationType == BinCalculationType.SIMPLE)
            throw new IllegalArgumentException("Cannot use a SimpleBinCalculation with this constructor");
        this.histogramType = histogramType;
        this.binCalculationType = binCalculationType;
        this.lowerInclusive = lowerInclusive;
        data = new ResizableDoubleArray();
        bins = new ArrayList<Bin>();
    }

    /**
     * This constructor is for use with a {@link com.itemanalysis.psychometrics.histogram.SimpleBinCalculation}
     * where the user provides the min, max, and number of points.
     *
     * @param histogramType type of histogram
     * @param numberOfBins number of bins to include in histogram
     * @param min minimum value
     * @param max maximum value
     * @param lowerInclusive true if lower bound is included in the interval but teh upper bound is not.
     *                       If false lower bound not included but upper bound is included.
     */
    public Histogram(HistogramType histogramType, int numberOfBins, double min, double max,
            boolean lowerInclusive) {
        this.histogramType = histogramType;
        this.binCalculationType = BinCalculationType.SIMPLE;
        binCalc = new SimpleBinCalculation(numberOfBins, min, max);
        this.lowerInclusive = lowerInclusive;
        data = new ResizableDoubleArray();
        bins = new ArrayList<Bin>();
    }

    private void createHistogram(double[] x) {
        n = x.length;
        Min min = new Min();
        Max max = new Max();
        Mean mean = new Mean();
        StandardDeviation sd = new StandardDeviation();

        for (int i = 0; i < x.length; i++) {
            min.increment(x[i]);
            max.increment(x[i]);
            mean.increment(x[i]);
            sd.increment(x[i]);
        }

        double range = max.getResult() - min.getResult();
        double lowestBoundary = min.getResult() - range / 1000;
        double largestBoundary = max.getResult() + range / 1000;

        if (binCalculationType == BinCalculationType.SCOTT) {
            binCalc = new ScottBinCalculation(n, min.getResult(), max.getResult(), sd.getResult());
        } else if (binCalculationType == BinCalculationType.FREEDMAN_DIACONIS) {
            Percentile percentile = new Percentile();
            double q1 = percentile.evaluate(x, 25);
            double q3 = percentile.evaluate(x, 75);
            binCalc = new FreedmanDiaconisBinCalculation(n, min.getResult(), max.getResult(), q1, q3);
        } else if (binCalculationType == BinCalculationType.STURGES) {
            binCalc = new SturgesBinCalculation(n, min.getResult(), max.getResult());
        }

        numberOfBins = binCalc.numberOfBins();
        binWidth = binCalc.binWidth();

        //create bins
        createBins(lowestBoundary, largestBoundary);

        //count observations in each bin
        for (int i = 0; i < n; i++) {
            for (Bin b : bins) {
                b.increment(x[i]);
            }
        }
    }

    private double[] getFrequency(double[] x) {
        createHistogram(x);

        int index = 0;
        double sum = 0;
        for (Bin b : bins) {
            points[index] = b.getMidPoint();
            value[index] = b.getFrequency();
            index++;
        }
        return value;
    }

    private double[] getRelativeFrequency(double[] x) {
        createHistogram(x);

        int index = 0;
        for (Bin b : bins) {
            points[index] = b.getMidPoint();
            value[index] = b.getFrequency() / n;
            index++;
        }
        return value;
    }

    private double[] getNormalizedRelativeFrequency(double[] x) {
        createHistogram(x);

        int index = 0;
        double sum = 0;
        for (Bin b : bins) {
            points[index] = b.getMidPoint();
            value[index] = b.getFrequency() / n;
            index++;
        }

        for (int i = 0; i < numberOfBins; i++) {
            value[i] /= sum;
        }
        return value;
    }

    private double[] getDensity(double[] x) {
        createHistogram(x);

        int index = 0;
        for (Bin b : bins) {
            points[index] = b.getMidPoint();
            value[index] = b.getFrequency() / (n * binWidth);
            index++;
        }
        return value;
    }

    private double[] getFrequency() {
        return getFrequency(data.getElements());
    }

    private double[] getRelativeFrequency() {
        return getRelativeFrequency(data.getElements());
    }

    private double[] getNormalizedRelativeFrequency() {
        return getNormalizedRelativeFrequency(data.getElements());
    }

    private double[] getDensity() {
        return getDensity(data.getElements());
    }

    public double getSumOfValues() {
        double sum = 0;
        for (int i = 0; i < numberOfBins; i++) {
            sum += value[i];
        }
        return sum;
    }

    public double getValueAt(int index) {
        return value[index];
    }

    /**
     * Create the bins.
     */
    private void createBins(double lowestBoundary, double largestBoundary) {
        if (bins != null)
            bins.clear();
        Bin bin = null;
        points = new double[numberOfBins];
        value = new double[numberOfBins];

        for (int i = 1; i < numberOfBins; i++) {
            bin = new Bin(lowestBoundary + (i - 1) * binWidth, lowestBoundary + i * binWidth, lowerInclusive);
            bins.add(bin);
        }
        bin = new Bin(lowestBoundary + (numberOfBins - 1) * binWidth, largestBoundary, lowerInclusive);
        bins.add(bin);

        for (int i = 0; i < bins.size(); i++) {
            points[i] = bins.get(i).getMidPoint();
        }

    }

    /**
     * Incrementally counts the number of observations in each bin. The value is counted only in the bin to which
     * it belongs.
     *
     * @param value a value to be counted in a bin.
     */
    public void increment(double value) {
        data.addElement(value);
    }

    public void increment(double value, double frequency) {
        for (int w = 0; w < frequency; w++) {
            data.addElement(value);
        }
    }

    /**
     * Gets the number of histogram bins.
     *
     * @return number of bins.
     */
    public int getNumberOfBins() {
        return numberOfBins;
    }

    public double getSampleSize() {
        return n;
    }

    /**
     * Histogram bins are stored in an array list. Gets the bin at the specified index.
     *
     * @param index position in the array list of the bin.
     * @return a histogram bin.
     */
    public Bin getBinAt(int index) {
        return bins.get(index);
    }

    /**
     * Gets an array of evaluation points. This method is required by the {@link DistributionApproximation} interface.
     * The evaluation points are the bin midpoints.
     *
     * @return an array of evaluation points
     */
    public double[] getPoints() {
        return points;
    }

    public void setData(double[] x) {
        if (data != null)
            data.clear();

        for (int i = 0; i < x.length; i++) {
            data.addElement(x[i]);
        }
    }

    /**
     * Gets an array of value values. This method is required by the {@link DistributionApproximation} interface.
     *
     * @return an array of value values.
     */
    public double[] evaluate() {
        if (value != null)
            return value;

        if (histogramType == HistogramType.FREQUENCY) {
            return getFrequency();
        } else if (histogramType == HistogramType.RELATIVE_FREQUENCY) {
            return getRelativeFrequency();
        } else if (histogramType == HistogramType.NORMALIZED_RELATIVE_FREQUENCY) {
            return getNormalizedRelativeFrequency();
        } else {
            return getDensity();
        }
    }

    public double[] evaluate(double[] x) {
        if (value != null)
            return value;

        if (histogramType == HistogramType.FREQUENCY) {
            return getFrequency(x);
        } else if (histogramType == HistogramType.RELATIVE_FREQUENCY) {
            return getRelativeFrequency(x);
        } else if (histogramType == HistogramType.NORMALIZED_RELATIVE_FREQUENCY) {
            return getNormalizedRelativeFrequency(x);
        } else {
            return getDensity(x);
        }
    }

    /**
     * Gets an evaluation points at the specified index. This method is required by the
     * {@link DistributionApproximation} interface. The evaluation points are the bin midpoints.
     *
     * @param index array index of evaluation point.
     * @return an evaluation point.
     */
    public double getPointAt(int index) {
        if (points == null)
            evaluate();
        return points[index];
    }

    /**
     * Gets a value value at the specified index.  This method is required by the
     * {@link DistributionApproximation} interface.
     *
     * @param index array index of value value.
     * @return value value.
     */
    public double getDensityAt(int index) {
        if (value == null)
            evaluate();
        return value[index];
    }

    public void setDensityAt(int index, double value) {
        this.value[index] = value;
    }

    public void setPointAt(int index, double value) {
        points[index] = value;
    }

    public double getMinimum() {
        return points[0];
    }

    public double getMaximum() {
        return points[numberOfBins - 1];
    }

    public double getMean() {
        double m = 0.0;
        for (int i = 0; i < numberOfBins; i++) {
            m += points[i] * value[i];
        }
        return m;
    }

    public double getStandardDeviation() {
        double m = getMean();
        double m2 = 0;
        for (int i = 0; i < numberOfBins; i++) {
            m2 += (points[i] - m) * (points[i] - m) * value[i];
        }
        return Math.sqrt(m2);
    }

    public double getBinWidth() {
        return binWidth;
    }

    /**
     * Gets the number of evaluation points (and corresponding number of value values).
     *  This method is required by the {@link DistributionApproximation} interface.
     *
     * @return number of evaluation points.
     */
    public int getNumberOfPoints() {
        return bins.size();
    }

    /**
     * Gets the iterator for the array list of bin objects.
     *
     * @return an iterator.
     */
    public Iterator<Bin> iterator() {
        return bins.iterator();
    }

    /**
     * A string representation of the histogram. It lists the bin intervals, midpoints, and value values.
     *
     * @return histogram values for display as plain text.
     */
    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        Formatter f = new Formatter(sb);

        String li = "", ui = "";

        f.format("%n");
        f.format("%64s", "                        HISTOGRAM VALUES                        ");
        f.format("%n");
        f.format("%64s", "================================================================");
        f.format("%n");
        f.format("%16s", "Lower Bound");
        f.format("%18s", "Upper Bound");
        f.format("%15s", "MidPoint");

        if (histogramType == HistogramType.FREQUENCY) {
            f.format("%15s", "Frequency");
        } else if (histogramType == HistogramType.RELATIVE_FREQUENCY) {
            f.format("%15s", "Rel. Freq.");
        } else if (histogramType == HistogramType.NORMALIZED_RELATIVE_FREQUENCY) {
            f.format("%15s", "Rel. Freq.");
        } else {
            f.format("%15s", "Density");
        }

        f.format("%n");
        f.format("%64s", "----------------------------------------------------------------");
        f.format("%n");

        Bin b = null;
        for (int i = bins.size() - 1; i > -1; i--) {
            b = bins.get(i);
            //      for(Bin b : bins){
            if (b.lowerInclusive()) {
                li = "[";
                ui = ")";
            } else {
                li = "(";
                ui = "]";
            }

            f.format("%1s", li);
            f.format("% 15.5f, ", b.getLowerBound());
            f.format("% 15.5f", b.getUpperBound());
            f.format("%1s", ui);
            f.format("% 15.5f", points[i]);
            f.format("% 15.5f", value[i]);
            f.format("%n");
        }
        f.format("%64s", "================================================================");
        f.format("%n");
        f.format("%11s", "Binwidth = ");
        f.format("% .4f", binWidth);
        f.format("%n");
        return f.toString();
    }

}