com.addthis.hydra.data.util.FindChangePoints.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.hydra.data.util.FindChangePoints.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.data.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import com.addthis.codec.codables.Codable;

import org.apache.commons.lang3.ArrayUtils;

/**
 *         Tools for finding change points in an integer array.
 */
public class FindChangePoints implements Codable {

    /**
     * Finds high points in an array of integers.
     *
     * @param data       The array of integers in which to search
     * @param max_width  The maximum width to group high-points together
     * @param min_height The minimum height to consider
     * @return A list of pairs of integers of the form (index, size)
     */
    public static List<ChangePoint> findHighPoints(Long[] data, int max_width, int min_height) {
        List<ChangePoint> rv = new ArrayList<>();
        int currIndex = 0;
        long currHt = data[0];
        int currWidth = 0;
        boolean started = false;
        if (data.length <= 1)
            return rv;
        for (int i = 0; i < data.length; i++) {
            if (data[i] > currHt && data[i] > min_height) {
                started = true;
                currIndex = i;
                currHt = data[i];
                currWidth = 0;
            } else {
                if (started) {
                    currWidth++;
                    if (currWidth >= max_width) {
                        started = false;
                        rv.add(new ChangePoint(currHt, currIndex, ChangePoint.ChangePointType.PEAK));
                        currWidth = 0;
                        currIndex = -1;
                        currHt = 0;
                    }
                }
            }
        }
        if (started) {
            rv.add(new ChangePoint(currHt, currIndex, ChangePoint.ChangePointType.PEAK));
        }
        return rv;
    }

    /**
     * Finds places where the data changed dramatically, either sustained or "instantaneously"
     *
     * @param data The array of integers in which to search
     * @return A list of pairs of integers of the form (index, size)
     */
    public static List<ChangePoint> findSignificantPoints(Long[] data, int minChange, double minRatio,
            double minZScore, int inactiveThreshold, int windowSize) {
        List<ChangePoint> rv = new ArrayList<>();
        rv.addAll(findAndSmoothOverPeaks(data, minChange, minZScore, windowSize));
        rv.addAll(findChangePoints(data, minChange, minRatio, minZScore, inactiveThreshold, windowSize));
        return rv;
    }

    private static List<ChangePoint> findChangePoints(Long[] data, int minChange, double minRatio, double minZScore,
            int inactiveThreshold, int windowSize) {
        List<Long> dataList = Arrays.asList(data);
        ArrayList<ChangePoint> rvList = new ArrayList<>();
        for (int i = 2; i < data.length; i++) {
            int startIndex = Math.max(i - windowSize + 1, 0);
            Long[] currSlice = dataList.subList(startIndex, i).toArray(new Long[] {});
            long nextValue = data[i];
            double predicted = linearPredictNext(currSlice);
            double diff = nextValue - predicted;
            double zScoreDiff = diff / sd(currSlice);
            double changeRatio = -1 + (double) (nextValue) / Math.max(predicted, 1.);
            if (Math.abs(zScoreDiff) > minZScore && Math.abs(diff) > minChange
                    && Math.abs(changeRatio) > minRatio) {
                ChangePoint.ChangePointType type = chooseTypeForChange((long) mean(currSlice), nextValue,
                        inactiveThreshold);
                rvList.add(new ChangePoint((int) diff, i, type));
            }
        }
        return rvList;
    }

    private static ChangePoint.ChangePointType chooseTypeForChange(long before, long after, int inactiveThreshold) {
        if (before > after) {
            return after > inactiveThreshold ? ChangePoint.ChangePointType.FALL : ChangePoint.ChangePointType.STOP;
        } else {
            return before < inactiveThreshold ? ChangePoint.ChangePointType.START
                    : ChangePoint.ChangePointType.RISE;
        }
    }

    private static List<ChangePoint> findAndSmoothOverPeaks(Long[] data, int minChange, double minZscore,
            int width) {
        ArrayList<ChangePoint> rvList = new ArrayList<>();
        for (int i = 0; i < data.length; i++) {
            int leftEndpoint = Math.max(0, i - width);
            int rightEndpoint = Math.min(i + width, data.length);
            Long[] neighborhood = Arrays.copyOfRange(data, leftEndpoint, rightEndpoint);
            Long[] neighborhoodWithout = ArrayUtils.addAll(Arrays.copyOfRange(data, leftEndpoint, i),
                    Arrays.copyOfRange(data, i + 1, rightEndpoint));
            if (sd(neighborhood) > minZscore * sd(neighborhoodWithout)) {
                double change = data[i] - mean(neighborhoodWithout);
                if (Math.abs(change) > minChange) {
                    rvList.add(new ChangePoint((int) change, i, ChangePoint.ChangePointType.PEAK));
                    data[i] = (long) mean(neighborhoodWithout);
                }
            }
        }
        return rvList;
    }

    private static int sum(Long[] longs) {
        int rv = 0;
        for (Long z : longs) {
            rv += z;
        }
        return rv;
    }

    public static double mean(Long[] longs) {
        return (double) (sum(longs)) / longs.length;
    }

    private static double sd(Long[] longs) {
        double mean = mean(longs);
        double sumSquareResiduals = 0;
        for (long z : longs) {
            sumSquareResiduals += Math.pow(mean - z, 2);
        }
        return Math.max(Math.sqrt(sumSquareResiduals), .0001);
    }

    private static double linearPredictNext(Long[] ints) {
        double slope;
        double intercept;
        int len = ints.length;
        Long[] xx = new Long[len];
        Long[] xy = new Long[len];
        for (int i = 0; i < len; i++) {
            xx[i] = (long) (i * i);
            xy[i] = i * ints[i];
        }
        double meanx = .5 * (len - 1.);
        slope = (mean(xy) - meanx * mean(ints)) / (mean(xx) - Math.pow(meanx, 2));
        intercept = mean(ints) - slope * meanx;
        return slope * ints.length + intercept;
    }
}