Java Array Normalize normalizeZscore(double[] x)

Here you can find the source of normalizeZscore(double[] x)

Description

Calculates x_i = (x_i - mean(x)) / std(x) This function can deal with NaNs

License

Open Source License

Parameter

Parameter Description
x x

Return

x

Declaration

public static double[] normalizeZscore(double[] x) 

Method Source Code

//package com.java2s;
/**//from  w w  w.  ja  va  2 s  . co m
 * Copyright 2004-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

import java.util.Arrays;

public class Main {
    /***
     * Calculates x_i = (x_i - mean(x)) / std(x) This function can deal with NaNs
     * 
     * @param x
     *            x
     * @return x
     */
    public static double[] normalizeZscore(double[] x) {
        double mn = mean(x, 0);
        double sd = standardDeviation(x, 0);
        for (int i = 0; i < x.length; i++)
            if (!Double.isNaN(x[i]))
                x[i] = (x[i] - mn) / sd;
        return x;
    }

    public static double mean(double[] data) {
        return mean(data, 0, data.length - 1);
    }

    /**
     * Compute the mean of all elements in the array. No missing values (NaN) are allowed.
     * 
     * @param data
     *            data
     * @param startIndex
     *            start index
     * @param endIndex
     *            end index
     * @throws IllegalArgumentException
     *             if the array contains NaN values.
     * @return mean
     */
    public static double mean(double[] data, int startIndex, int endIndex) {
        double mean = 0;
        int total = 0;
        startIndex = Math.max(startIndex, 0);
        startIndex = Math.min(startIndex, data.length - 1);
        endIndex = Math.max(endIndex, 0);
        endIndex = Math.min(endIndex, data.length - 1);

        if (startIndex > endIndex)
            startIndex = endIndex;

        for (int i = startIndex; i <= endIndex; i++) {
            if (Double.isNaN(data[i]))
                throw new IllegalArgumentException("NaN not allowed in mean calculation");
            mean += data[i];
            total++;
        }
        mean /= total;
        return mean;
    }

    /**
     * Compute the mean of all elements in the array with given indices. No missing values (NaN) are allowed.
     * 
     * @param data
     *            data
     * @param inds
     *            inds
     * @throws IllegalArgumentException
     *             if the array contains NaN values.
     * @return mean
     */
    public static double mean(double[] data, int[] inds) {
        double mean = 0;
        for (int i = 0; i < inds.length; i++) {
            if (Double.isNaN(data[inds[i]]))
                throw new IllegalArgumentException("NaN not allowed in mean calculation");

            mean += data[inds[i]];
        }
        mean /= inds.length;
        return mean;
    }

    /**
     * Compute the mean of all elements in the array. No missing values (NaN) are allowed.
     * 
     * @param data
     *            data
     * @param startIndex
     *            start index
     * @param endIndex
     *            end index
     * @throws IllegalArgumentException
     *             if the array contains NaN values.
     * @return mean
     */
    public static float mean(float[] data, int startIndex, int endIndex) {
        float mean = 0;
        int total = 0;
        startIndex = Math.max(startIndex, 0);
        startIndex = Math.min(startIndex, data.length - 1);
        endIndex = Math.max(endIndex, 0);
        endIndex = Math.min(endIndex, data.length - 1);

        if (startIndex > endIndex)
            startIndex = endIndex;

        for (int i = startIndex; i <= endIndex; i++) {
            if (Float.isNaN(data[i]))
                throw new IllegalArgumentException("NaN not allowed in mean calculation");
            mean += data[i];
            total++;
        }
        mean /= total;
        return mean;
    }

    public static float mean(float[] data) {
        return mean(data, 0, data.length - 1);
    }

    /**
     * Compute the mean of all elements in the array with given indices. No missing values (NaN) are allowed.
     * 
     * @param data
     *            data
     * @param inds
     *            inds
     * @throws IllegalArgumentException
     *             if the array contains NaN values.
     * @return mean
     */
    public static float mean(float[] data, int[] inds) {
        float mean = 0;
        for (int i = 0; i < inds.length; i++) {
            if (Float.isNaN(data[inds[i]]))
                throw new IllegalArgumentException("NaN not allowed in mean calculation");

            mean += data[inds[i]];
        }
        mean /= inds.length;
        return mean;
    }

    /**
     * Compute the mean of all elements in the array. this function can deal with NaNs
     * 
     * @param data
     *            double[]
     * @param opt
     *            0: arithmetic mean, 1: geometric mean
     * @return math.exp(mean)
     */
    public static double mean(double[] data, int opt) {
        if (opt == 0) {
            int numData = 0;
            double mean = 0;
            for (int i = 0; i < data.length; i++) {
                if (!Double.isNaN(data[i])) {
                    mean += data[i];
                    numData++;
                }
            }
            mean /= numData;
            return mean;
        } else {
            int numData = 0;
            double mean = 0;
            for (int i = 0; i < data.length; i++) {
                if (!Double.isNaN(data[i])) {
                    mean += Math.log(data[i]);
                    numData++;
                }
            }
            mean = mean / numData;
            return Math.exp(mean);
        }

    }

    public static double[] mean(double[][] x) {
        return mean(x, true);
    }

    public static double[] mean(double[][] x, boolean isAlongRows) {
        int[] indices = null;
        int i;

        if (isAlongRows) {
            indices = new int[x.length];
            for (i = 0; i < x.length; i++)
                indices[i] = i;
        } else {
            indices = new int[x[0].length];
            for (i = 0; i < x[0].length; i++)
                indices[i] = i;
        }

        return mean(x, isAlongRows, indices);
    }

    public static double[] mean(double[][] x, boolean isAlongRows, int[] indicesOfX) {
        double[] meanVector = null;
        int i, j;
        if (isAlongRows) {
            meanVector = new double[x[indicesOfX[0]].length];
            Arrays.fill(meanVector, 0.0);

            for (i = 0; i < indicesOfX.length; i++) {
                for (j = 0; j < x[indicesOfX[0]].length; j++)
                    meanVector[j] += x[indicesOfX[i]][j];
            }

            for (j = 0; j < meanVector.length; j++)
                meanVector[j] /= indicesOfX.length;
        } else {
            meanVector = new double[x.length];
            Arrays.fill(meanVector, 0.0);

            for (i = 0; i < indicesOfX.length; i++) {
                for (j = 0; j < x.length; j++)
                    meanVector[j] += x[j][indicesOfX[i]];
            }

            for (j = 0; j < meanVector.length; j++)
                meanVector[j] /= indicesOfX.length;
        }

        return meanVector;
    }

    public static double standardDeviation(double[] data) {
        return standardDeviation(data, mean(data));
    }

    public static double standardDeviation(double[] data, double meanVal) {
        return standardDeviation(data, meanVal, 0, data.length - 1);
    }

    public static double standardDeviation(double[] data, double meanVal, int startIndex, int endIndex) {
        return Math.sqrt(variance(data, meanVal, startIndex, endIndex));
    }

    /**
     * Compute the standard deviation of the given data, this function can deal with NaNs
     * 
     * @param data
     *            double[]
     * @param opt
     *            0: normalizes with N-1, this provides the square root of best unbiased estimator of the variance, 1: normalizes
     *            with N, this provides the square root of the second moment around the mean
     * @return Math.sqrt(variance(data, opt))
     */
    public static double standardDeviation(double[] data, int opt) {
        if (opt == 0)
            return Math.sqrt(variance(data, opt));
        else
            return Math.sqrt(variance(data, opt));
    }

    /**
     * Find the maximum of all elements in the array, ignoring elements that are NaN.
     * 
     * @param data
     *            data
     * @return max
     */
    public static double max(double[] data) {
        double max = Double.NaN;
        for (int i = 0; i < data.length; i++) {
            if (Double.isNaN(data[i]))
                continue;
            if (Double.isNaN(max) || data[i] > max)
                max = data[i];
        }
        return max;
    }

    public static int max(int[] data) {
        int max = data[0];
        for (int i = 1; i < data.length; i++) {
            if (data[i] > max)
                max = data[i];
        }
        return max;
    }

    /**
     * Find the minimum of all elements in the array, ignoring elements that are NaN.
     * 
     * @param data
     *            data
     * @return min
     */
    public static double min(double[] data) {
        double min = Double.NaN;
        for (int i = 0; i < data.length; i++) {
            if (Double.isNaN(data[i]))
                continue;
            if (Double.isNaN(min) || data[i] < min)
                min = data[i];
        }
        return min;
    }

    public static int min(int[] data) {
        int min = data[0];
        for (int i = 1; i < data.length; i++) {
            if (data[i] < min)
                min = data[i];
        }
        return min;
    }

    public static double[] log(double[] a) {
        double[] c = new double[a.length];
        for (int i = 0; i < a.length; i++) {
            c[i] = Math.log(a[i]);
        }
        return c;
    }

    public static double[] log(double[] a, double minimumValue, double fixedValue) {
        double[] c = new double[a.length];
        for (int i = 0; i < a.length; i++) {
            if (a[i] > minimumValue)
                c[i] = Math.log(a[i]);
            else
                c[i] = fixedValue;
        }
        return c;
    }

    public static double[] exp(double[] a) {
        double[] c = new double[a.length];
        for (int i = 0; i < a.length; i++) {
            c[i] = Math.exp(a[i]);
        }
        return c;
    }

    /**
     * Compute the variance in the array. This function can deal with NaNs
     * 
     * @param data
     *            double[]
     * @param opt
     *            0: normalizes with N-1, this provides the square root of best unbiased estimator of the variance, 1: normalizes
     *            with N, this provides the square root of the second moment around the mean
     * @return S / numData -1 if opt is 0, S / numData otherwise
     */
    public static double variance(double[] data, int opt) {
        // Pseudocode from wikipedia, which cites Knuth:
        // n = 0
        // mean = 0
        // S = 0
        // foreach x in data:
        // n = n + 1
        // delta = x - mean
        // mean = mean + delta/n
        // S = S + delta*(x - mean) // This expression uses the new value of mean
        // end for
        // variance = S/(n - 1)
        double mean = 0;
        double S = 0;
        double numData = 0;
        for (int i = 0; i < data.length; i++) {
            if (!Double.isNaN(data[i])) {
                double delta = data[i] - mean;
                mean += delta / (numData + 1);
                S += delta * (data[i] - mean);
                numData++;
            }
        }
        if (opt == 0)
            return (S / (numData - 1));
        else
            return (S / numData);
    }

    public static double variance(double[] data) {
        return variance(data, mean(data));
    }

    public static float variance(float[] data) {
        return variance(data, mean(data));
    }

    public static double variance(double[] data, double meanVal) {
        return variance(data, meanVal, 0, data.length - 1);
    }

    public static float variance(float[] data, float meanVal) {
        return variance(data, meanVal, 0, data.length - 1);
    }

    public static float variance(float[] data, float meanVal, int startIndex, int endIndex) {
        double[] ddata = new double[data.length];
        for (int i = 0; i < data.length; i++)
            ddata[i] = data[i];

        return (float) variance(ddata, meanVal, startIndex, endIndex);
    }

    public static double variance(double[] data, double meanVal, int startIndex, int endIndex) {
        double var = 0.0;

        if (startIndex < 0)
            startIndex = 0;
        if (startIndex > data.length - 1)
            startIndex = data.length - 1;
        if (endIndex < startIndex)
            endIndex = startIndex;
        if (endIndex > data.length - 1)
            endIndex = data.length - 1;

        for (int i = startIndex; i <= endIndex; i++)
            var += (data[i] - meanVal) * (data[i] - meanVal);

        if (endIndex - startIndex > 1)
            var /= (endIndex - startIndex);

        return var;
    }

    public static double[] variance(double[][] x, double[] meanVector) {
        return variance(x, meanVector, true);
    }

    /**
     * Returns the variance of rows or columns of matrix x
     * 
     * @param x
     *            the matrix consisting of row vectors
     * @param meanVector
     *            the vector of mean values -- a column vector if row-wise variances are to be computed, or a row vector if
     *            column-wise variances are to be calculated. param isAlongRows if true, compute the variance of x[0][0], x[1][0]
     *            etc. given mean[0]; if false, compute the variances for the vectors x[0], x[1] etc. separately, given the
     *            respective mean[0], mean[1] etc.
     * @param isAlongRows
     *            isAlongRows
     * @return var
     */
    public static double[] variance(double[][] x, double[] meanVector, boolean isAlongRows) {
        double[] var = null;

        if (x != null && x[0] != null && x[0].length > 0 && meanVector != null) {
            if (isAlongRows) {
                var = new double[x[0].length];
                int j, i;
                for (j = 0; j < x[0].length; j++) {
                    for (i = 0; i < x.length; i++)
                        var[j] += (x[i][j] - meanVector[j]) * (x[i][j] - meanVector[j]);

                    var[j] /= (x.length - 1);
                }
            } else {
                var = new double[x.length];
                for (int i = 0; i < x.length; i++) {
                    var[i] = variance(x[i], meanVector[i]);
                }
            }
        }

        return var;
    }
}

Related

  1. normalizeVectorMax(double[] input)
  2. normalizeVectorMaxMin(float[] samples)
  3. normalizeVectors(float[][] vectors, boolean maxMin)
  4. normalizeVoxelDimensions(final double[] voxelDimensions)
  5. normalizeWith(double[] arr, double v)
  6. normBySortedPointersInverse(double[] d, int[] pointers)
  7. normData(double[] data)
  8. normII(double[] b)
  9. normLat(double[] latLng)