Example usage for java.util Arrays binarySearch

List of usage examples for java.util Arrays binarySearch

Introduction

In this page you can find the example usage for java.util Arrays binarySearch.

Prototype

public static int binarySearch(Object[] a, int fromIndex, int toIndex, Object key) 

Source Link

Document

Searches a range of the specified array for the specified object using the binary search algorithm.

Usage

From source file:io.warp10.continuum.gts.GTSHelper.java

/**
 * Compute STL i.e. Seasonal-Trend decompostition precedure based on LOWESS
 * @see http://www.wessa.net/download/stl.pdf
 * // w w w  . ja va2 s  . c om
 * Global parameters:
 * @param gts                : Input GTS, must be bucketized
 * @param buckets_per_period : number of buckets for one period of the seasonality
 * @param inner Precision    : number of inner loops (to make the decomposition)
 * @param outer Robustness   : number of outer loops (to alleviate the impact of outliers upon the decompsition)
 * @param ...
 * 
 * Optional sets of parameters shared by call of lowess of the same kind:
 * @param neighbour_*        : Bandwith, i.e. number of nearest neighbours to consider when applying LOWESS
 * @param degree_*           : Degree, i.e. degree of the polynomial fit
 * @param jump_*             : Jump, i.e. number of bucket to skip to speed up comptutation. These buckets are interpolated afterward.
 * 
 * With:
 * @param _* = _s            : lowess calls during seasonal extract step
 * @param _* = _l            : lowess calls during low frequency filtering step
 * @param _* = _t            : lowess calls during trend extract step
 * @param _* = _p            : lowess calls during post seasonal smoothing step
 * 
 * @return a list of 2 GTS consisting of the seasonal and trend part of the decomposition.
 */
public static List<GeoTimeSerie> stl(GeoTimeSerie gts,

        // big picture STL parameters
        int buckets_per_period, int inner, int outer,

        // lowess parameters for seasonal extract
        int neighbour_s, int degree_s, int jump_s,

        // lowess parameters for low-pass filtering
        int neighbour_l, int degree_l, int jump_l,

        // lowess parameters for trend extract
        int neighbour_t, int degree_t, int jump_t,

        // lowess parameters for seasonal post-smoothing
        int neighbour_p, int degree_p, int jump_p

) throws WarpScriptException {
    if (TYPE.DOUBLE != gts.type && TYPE.LONG != gts.type) {
        throw new WarpScriptException("Can only be applied on numeric Geo Time Series.");
    }

    if (!isBucketized(gts)) {
        throw new WarpScriptException("Can only be applied on bucketized Geo Time Series");
    }

    //
    // Sort the GTS
    //

    sort(gts, false);

    //
    // Allocate tables: Y = S + T + R
    // Y: initial GTS
    // S: seasonal
    // T: trend
    // R: residual
    //

    int nonnull = gts.values;
    int size = gts.bucketcount;

    // limit size at the moment
    if (size - nonnull > 500000) {
        throw new WarpScriptException("More than 500000 missing values");
    }

    // At each iteration of inner loop, seasonal will be augmented of 2*bpp buckets.
    // The current implementation fill every gap in seasonal with a value estimated by lowess.
    // Hence sizehint choice.
    // TODO(JCV): maybe rethink STL so that it can handle missing values better.
    int sizehint = size + 2 * buckets_per_period;
    GeoTimeSerie seasonal = gts.cloneEmpty(sizehint);
    try {
        seasonal.reset(Arrays.copyOf(gts.ticks, sizehint), null, null, new double[sizehint], nonnull);
    } catch (IOException ioe) {
        throw new WarpScriptException("IOException in reset method: " + ioe.getMessage());
    }

    GeoTimeSerie trend = gts.cloneEmpty(sizehint);
    try {
        trend.reset(Arrays.copyOf(gts.ticks, sizehint), null, null, new double[sizehint], nonnull);
    } catch (IOException ioe) {
        throw new WarpScriptException("IOException in reset method: " + ioe.getMessage());
    }

    // lowpassed will borrow the body of trend in step 3
    GeoTimeSerie lowpassed = trend;

    // Allocate an array to update the weights through the outer loop
    // The weights used will be rho*weights
    double[] rho = new double[nonnull];
    Arrays.fill(rho, 1.0D);

    // residual will borrow body of rho
    double[] residual = rho;

    // Weights used in lowess computations
    int nei = Math.max(Math.max(neighbour_s, neighbour_l), neighbour_t);
    double[] weights = new double[nei];
    // FIXME(JCV): maybe smarter size to put in here ?

    //
    // Outer loop
    //

    for (int s = 0; s < outer + 1; s++) {

        //
        // Inner loop
        //

        for (int k = 0; k < inner; k++) {

            //
            // Step 1: Detrending
            //

            int idx_t = 0;
            for (int idx = 0; idx < nonnull; idx++) {
                idx_t = Arrays.binarySearch(trend.ticks, idx_t, nonnull, gts.ticks[idx]);
                seasonal.doubleValues[idx] = ((Number) valueAtIndex(gts, idx)).doubleValue()
                        - trend.doubleValues[idx_t];
            }

            //
            // Step 2: Cycle-subseries smoothing
            //

            GeoTimeSerie subCycle = null;
            GeoTimeSerie subRho = null;

            // smoothing of each cycle-subserie
            for (int c = 0; c < buckets_per_period; c++) {

                // extracting a cycle-subserie and extending by one value at both ends
                subCycle = subCycleSerie(seasonal, seasonal.lastbucket - c * seasonal.bucketspan,
                        buckets_per_period, true, subCycle);
                subCycle.lastbucket += subCycle.bucketspan;
                subCycle.bucketcount += 2;

                // extracting subRho
                if (s > 0) {
                    double[] tmp = seasonal.doubleValues;
                    seasonal.doubleValues = rho;
                    subRho = subCycleSerie(seasonal, seasonal.lastbucket - c * seasonal.bucketspan,
                            buckets_per_period, true, subRho);
                    seasonal.doubleValues = tmp;
                }

                // applying lowess
                lowess_stl(subCycle, seasonal, neighbour_s, degree_s, jump_s, weights,
                        s > 0 ? subRho.doubleValues : rho);
            }

            /*
             * Concretely, with the added points, seasonal.lastbucket is 1 period behind the reality,
             * and it has twice bpp more buckets than its bucketcount.
             */

            // Updating bucket parameters of seasonal for clarity
            seasonal.lastbucket += seasonal.bucketspan * buckets_per_period;
            seasonal.bucketcount += 2 * buckets_per_period;

            //
            // Step 3: Low-Pass Filtering of Smoothed Cycle-subseries
            //

            sort(seasonal);

            // FIXME(JCV): what happens if buckets_per_period < bucketcount / buckets_per_period ?

            // Applying first moving average of size bpp

            // First average
            double sum = 0;
            for (int r = 0; r < buckets_per_period; r++) {
                sum += seasonal.doubleValues[r];
            }
            lowpassed.doubleValues[0] = sum / buckets_per_period;

            // other averages
            for (int r = 1; r < seasonal.bucketcount - buckets_per_period + 1; r++) {
                sum += seasonal.doubleValues[r + buckets_per_period - 1] - seasonal.doubleValues[r - 1];
                lowpassed.doubleValues[r] = sum / buckets_per_period;
            }

            // Applying 2nd moving average of size bpp
            sum = 0;
            for (int r = 0; r < buckets_per_period; r++) {
                sum += lowpassed.doubleValues[r];
            }
            double tmp = lowpassed.doubleValues[0];
            lowpassed.doubleValues[0] = sum / buckets_per_period;
            for (int r = 1; r <= seasonal.bucketcount - 2 * buckets_per_period + 1; r++) {
                sum += lowpassed.doubleValues[r + buckets_per_period - 1] - tmp;
                tmp = lowpassed.doubleValues[r];
                lowpassed.doubleValues[r] = sum / buckets_per_period;
            }

            // Applying 3rd moving average of size 3
            for (int r = 0; r < seasonal.bucketcount - 2 * buckets_per_period; r++) {
                lowpassed.doubleValues[r] += lowpassed.doubleValues[r + 1] + lowpassed.doubleValues[r + 2];
                lowpassed.doubleValues[r] /= 3;
            }

            // Update size of gts
            lowpassed.bucketcount = seasonal.bucketcount - 2 * buckets_per_period;
            lowpassed.lastbucket = seasonal.lastbucket - buckets_per_period * seasonal.bucketspan;
            lowpassed.values = lowpassed.bucketcount;

            // Lowess_l
            lowpassed = rlowess(lowpassed, neighbour_l, 0, (jump_l + 1) * lowpassed.bucketspan, degree_l,
                    weights, null, true);

            //
            // Step 4: Detrending of Smoothed Cycle-Subseries
            //

            // shifting seasonal bucket parameters back
            seasonal.lastbucket -= seasonal.bucketspan * buckets_per_period;
            seasonal.bucketcount -= 2 * buckets_per_period;

            if (seasonal.bucketcount != lowpassed.values) {
                throw new WarpScriptException(
                        "stl impl error #1: " + seasonal.values + " vs " + lowpassed.values);
            }

            for (int r = 0; r < seasonal.bucketcount; r++) {
                seasonal.doubleValues[r] = seasonal.doubleValues[r + buckets_per_period]
                        - lowpassed.doubleValues[r];
                seasonal.ticks[r] = seasonal.ticks[r + buckets_per_period];
            }

            // trim seasonal back
            seasonal.values = seasonal.bucketcount;

            //
            // Step 5: Deseasonalizing
            //

            int idx_s = 0;
            for (int idx = 0; idx < nonnull; idx++) {
                idx_s = Arrays.binarySearch(seasonal.ticks, idx_s, nonnull, gts.ticks[idx]);
                trend.doubleValues[idx] = ((Number) valueAtIndex(gts, idx)).doubleValue()
                        - seasonal.doubleValues[idx_s];
            }

            trend.values = nonnull;
            trend.lastbucket = gts.lastbucket;
            trend.bucketspan = gts.bucketspan;
            trend.bucketcount = size;

            //
            // Step 6: Trend smoothing
            //

            // FIXME(JCV): currently all buckets are estimated or interpolated, but it is not necessary to estimate ones with no value unless it is the last iteration
            // But won't gain that much in speed if enough points are already interpolated.

            trend = rlowess(trend, neighbour_t, 0, (jump_t + 1) * trend.bucketspan, degree_t, weights, rho,
                    true);
        }

        //
        // Robustifying operations of outer loop (except last time)
        //

        if (s < outer) {

            // compute residual
            int idx_s = 0;
            int idx_t = 0;
            for (int idx = 0; idx < nonnull; idx++) {
                idx_s = Arrays.binarySearch(seasonal.ticks, idx_s, nonnull, gts.ticks[idx]);
                //idx_t = Arrays.binarySearch(trend.ticks, idx_t, nonnull, gts.ticks[idx]);
                // we assume idx_t == idx_s
                idx_t = idx_s;
                residual[idx] = Math.abs(((Number) valueAtIndex(gts, idx)).doubleValue()
                        - seasonal.doubleValues[idx_s] - trend.doubleValues[idx_t]);
            }

            //
            // Update robustifying weights 
            //

            // compute median of abs(residual)
            double median;
            double[] sorted = Arrays.copyOf(residual, gts.values);
            Arrays.sort(sorted);
            if (gts.values % 2 == 0) {
                median = (sorted[gts.values / 2] + sorted[gts.values / 2 - 1]) / 2;
            } else {
                median = sorted[gts.values / 2];
            }

            // compute h = 6 * median and rho = bisquare(|residual|/h)
            double h = 6 * median;

            for (int k = 0; k < gts.values; k++) {
                if (0 == h) {
                    rho[k] = 1.0D;
                } else {
                    double u = residual[k] / h;

                    if (u >= 1.0) {
                        rho[k] = 0.0D;
                    } else {
                        rho[k] = 1.0D - u * u;
                        rho[k] = rho[k] * rho[k];
                    }
                }
            }
        }
    }

    //
    // Post seasonal smoothing
    //

    if (neighbour_p > 0) {
        seasonal = rlowess(seasonal, neighbour_p, 0, (jump_p + 1) * seasonal.bucketspan, degree_p);
    }

    //
    // Locations and elevations
    //

    int v = 0;
    if (null != gts.locations) {
        for (int u = 0; u < size; u++) {
            v = Arrays.binarySearch(gts.ticks, v, nonnull, seasonal.ticks[u]);
            seasonal.locations[u] = v < 0 ? GeoTimeSerie.NO_LOCATION : gts.locations[v];
            trend.locations[u] = seasonal.locations[u];
        }
    } else {
        seasonal.locations = null;
        trend.locations = null;
    }

    v = 0;
    if (null != gts.elevations) {
        for (int u = 0; u < size; u++) {
            v = Arrays.binarySearch(gts.ticks, v, nonnull, seasonal.ticks[u]);
            seasonal.elevations[u] = v < 0 ? GeoTimeSerie.NO_ELEVATION : gts.elevations[v];
            trend.elevations[u] = seasonal.elevations[u];
        }
    } else {
        seasonal.elevations = null;
        trend.elevations = null;
    }

    //
    // Output
    //

    String prefix = (null == gts.getName()) || (0 == gts.getName().length()) ? "" : gts.getName() + "_";
    seasonal.setName(prefix + "seasonal");
    trend.setName(prefix + "trend");

    List<GeoTimeSerie> output = new ArrayList<GeoTimeSerie>();
    output.add(seasonal);
    output.add(trend);

    return output;
}