List of usage examples for java.util Arrays binarySearch
public static int binarySearch(Object[] a, int fromIndex, int toIndex, Object key)
From source file:io.warp10.continuum.gts.GTSHelper.java
/** * Compute STL i.e. Seasonal-Trend decompostition precedure based on LOWESS * @see http://www.wessa.net/download/stl.pdf * // w w w . ja va2 s . c om * Global parameters: * @param gts : Input GTS, must be bucketized * @param buckets_per_period : number of buckets for one period of the seasonality * @param inner Precision : number of inner loops (to make the decomposition) * @param outer Robustness : number of outer loops (to alleviate the impact of outliers upon the decompsition) * @param ... * * Optional sets of parameters shared by call of lowess of the same kind: * @param neighbour_* : Bandwith, i.e. number of nearest neighbours to consider when applying LOWESS * @param degree_* : Degree, i.e. degree of the polynomial fit * @param jump_* : Jump, i.e. number of bucket to skip to speed up comptutation. These buckets are interpolated afterward. * * With: * @param _* = _s : lowess calls during seasonal extract step * @param _* = _l : lowess calls during low frequency filtering step * @param _* = _t : lowess calls during trend extract step * @param _* = _p : lowess calls during post seasonal smoothing step * * @return a list of 2 GTS consisting of the seasonal and trend part of the decomposition. */ public static List<GeoTimeSerie> stl(GeoTimeSerie gts, // big picture STL parameters int buckets_per_period, int inner, int outer, // lowess parameters for seasonal extract int neighbour_s, int degree_s, int jump_s, // lowess parameters for low-pass filtering int neighbour_l, int degree_l, int jump_l, // lowess parameters for trend extract int neighbour_t, int degree_t, int jump_t, // lowess parameters for seasonal post-smoothing int neighbour_p, int degree_p, int jump_p ) throws WarpScriptException { if (TYPE.DOUBLE != gts.type && TYPE.LONG != gts.type) { throw new WarpScriptException("Can only be applied on numeric Geo Time Series."); } if (!isBucketized(gts)) { throw new WarpScriptException("Can only be applied on bucketized Geo Time Series"); } // // Sort the GTS // sort(gts, false); // // Allocate tables: Y = S + T + R // Y: initial GTS // S: seasonal // T: trend // R: residual // int nonnull = gts.values; int size = gts.bucketcount; // limit size at the moment if (size - nonnull > 500000) { throw new WarpScriptException("More than 500000 missing values"); } // At each iteration of inner loop, seasonal will be augmented of 2*bpp buckets. // The current implementation fill every gap in seasonal with a value estimated by lowess. // Hence sizehint choice. // TODO(JCV): maybe rethink STL so that it can handle missing values better. int sizehint = size + 2 * buckets_per_period; GeoTimeSerie seasonal = gts.cloneEmpty(sizehint); try { seasonal.reset(Arrays.copyOf(gts.ticks, sizehint), null, null, new double[sizehint], nonnull); } catch (IOException ioe) { throw new WarpScriptException("IOException in reset method: " + ioe.getMessage()); } GeoTimeSerie trend = gts.cloneEmpty(sizehint); try { trend.reset(Arrays.copyOf(gts.ticks, sizehint), null, null, new double[sizehint], nonnull); } catch (IOException ioe) { throw new WarpScriptException("IOException in reset method: " + ioe.getMessage()); } // lowpassed will borrow the body of trend in step 3 GeoTimeSerie lowpassed = trend; // Allocate an array to update the weights through the outer loop // The weights used will be rho*weights double[] rho = new double[nonnull]; Arrays.fill(rho, 1.0D); // residual will borrow body of rho double[] residual = rho; // Weights used in lowess computations int nei = Math.max(Math.max(neighbour_s, neighbour_l), neighbour_t); double[] weights = new double[nei]; // FIXME(JCV): maybe smarter size to put in here ? // // Outer loop // for (int s = 0; s < outer + 1; s++) { // // Inner loop // for (int k = 0; k < inner; k++) { // // Step 1: Detrending // int idx_t = 0; for (int idx = 0; idx < nonnull; idx++) { idx_t = Arrays.binarySearch(trend.ticks, idx_t, nonnull, gts.ticks[idx]); seasonal.doubleValues[idx] = ((Number) valueAtIndex(gts, idx)).doubleValue() - trend.doubleValues[idx_t]; } // // Step 2: Cycle-subseries smoothing // GeoTimeSerie subCycle = null; GeoTimeSerie subRho = null; // smoothing of each cycle-subserie for (int c = 0; c < buckets_per_period; c++) { // extracting a cycle-subserie and extending by one value at both ends subCycle = subCycleSerie(seasonal, seasonal.lastbucket - c * seasonal.bucketspan, buckets_per_period, true, subCycle); subCycle.lastbucket += subCycle.bucketspan; subCycle.bucketcount += 2; // extracting subRho if (s > 0) { double[] tmp = seasonal.doubleValues; seasonal.doubleValues = rho; subRho = subCycleSerie(seasonal, seasonal.lastbucket - c * seasonal.bucketspan, buckets_per_period, true, subRho); seasonal.doubleValues = tmp; } // applying lowess lowess_stl(subCycle, seasonal, neighbour_s, degree_s, jump_s, weights, s > 0 ? subRho.doubleValues : rho); } /* * Concretely, with the added points, seasonal.lastbucket is 1 period behind the reality, * and it has twice bpp more buckets than its bucketcount. */ // Updating bucket parameters of seasonal for clarity seasonal.lastbucket += seasonal.bucketspan * buckets_per_period; seasonal.bucketcount += 2 * buckets_per_period; // // Step 3: Low-Pass Filtering of Smoothed Cycle-subseries // sort(seasonal); // FIXME(JCV): what happens if buckets_per_period < bucketcount / buckets_per_period ? // Applying first moving average of size bpp // First average double sum = 0; for (int r = 0; r < buckets_per_period; r++) { sum += seasonal.doubleValues[r]; } lowpassed.doubleValues[0] = sum / buckets_per_period; // other averages for (int r = 1; r < seasonal.bucketcount - buckets_per_period + 1; r++) { sum += seasonal.doubleValues[r + buckets_per_period - 1] - seasonal.doubleValues[r - 1]; lowpassed.doubleValues[r] = sum / buckets_per_period; } // Applying 2nd moving average of size bpp sum = 0; for (int r = 0; r < buckets_per_period; r++) { sum += lowpassed.doubleValues[r]; } double tmp = lowpassed.doubleValues[0]; lowpassed.doubleValues[0] = sum / buckets_per_period; for (int r = 1; r <= seasonal.bucketcount - 2 * buckets_per_period + 1; r++) { sum += lowpassed.doubleValues[r + buckets_per_period - 1] - tmp; tmp = lowpassed.doubleValues[r]; lowpassed.doubleValues[r] = sum / buckets_per_period; } // Applying 3rd moving average of size 3 for (int r = 0; r < seasonal.bucketcount - 2 * buckets_per_period; r++) { lowpassed.doubleValues[r] += lowpassed.doubleValues[r + 1] + lowpassed.doubleValues[r + 2]; lowpassed.doubleValues[r] /= 3; } // Update size of gts lowpassed.bucketcount = seasonal.bucketcount - 2 * buckets_per_period; lowpassed.lastbucket = seasonal.lastbucket - buckets_per_period * seasonal.bucketspan; lowpassed.values = lowpassed.bucketcount; // Lowess_l lowpassed = rlowess(lowpassed, neighbour_l, 0, (jump_l + 1) * lowpassed.bucketspan, degree_l, weights, null, true); // // Step 4: Detrending of Smoothed Cycle-Subseries // // shifting seasonal bucket parameters back seasonal.lastbucket -= seasonal.bucketspan * buckets_per_period; seasonal.bucketcount -= 2 * buckets_per_period; if (seasonal.bucketcount != lowpassed.values) { throw new WarpScriptException( "stl impl error #1: " + seasonal.values + " vs " + lowpassed.values); } for (int r = 0; r < seasonal.bucketcount; r++) { seasonal.doubleValues[r] = seasonal.doubleValues[r + buckets_per_period] - lowpassed.doubleValues[r]; seasonal.ticks[r] = seasonal.ticks[r + buckets_per_period]; } // trim seasonal back seasonal.values = seasonal.bucketcount; // // Step 5: Deseasonalizing // int idx_s = 0; for (int idx = 0; idx < nonnull; idx++) { idx_s = Arrays.binarySearch(seasonal.ticks, idx_s, nonnull, gts.ticks[idx]); trend.doubleValues[idx] = ((Number) valueAtIndex(gts, idx)).doubleValue() - seasonal.doubleValues[idx_s]; } trend.values = nonnull; trend.lastbucket = gts.lastbucket; trend.bucketspan = gts.bucketspan; trend.bucketcount = size; // // Step 6: Trend smoothing // // FIXME(JCV): currently all buckets are estimated or interpolated, but it is not necessary to estimate ones with no value unless it is the last iteration // But won't gain that much in speed if enough points are already interpolated. trend = rlowess(trend, neighbour_t, 0, (jump_t + 1) * trend.bucketspan, degree_t, weights, rho, true); } // // Robustifying operations of outer loop (except last time) // if (s < outer) { // compute residual int idx_s = 0; int idx_t = 0; for (int idx = 0; idx < nonnull; idx++) { idx_s = Arrays.binarySearch(seasonal.ticks, idx_s, nonnull, gts.ticks[idx]); //idx_t = Arrays.binarySearch(trend.ticks, idx_t, nonnull, gts.ticks[idx]); // we assume idx_t == idx_s idx_t = idx_s; residual[idx] = Math.abs(((Number) valueAtIndex(gts, idx)).doubleValue() - seasonal.doubleValues[idx_s] - trend.doubleValues[idx_t]); } // // Update robustifying weights // // compute median of abs(residual) double median; double[] sorted = Arrays.copyOf(residual, gts.values); Arrays.sort(sorted); if (gts.values % 2 == 0) { median = (sorted[gts.values / 2] + sorted[gts.values / 2 - 1]) / 2; } else { median = sorted[gts.values / 2]; } // compute h = 6 * median and rho = bisquare(|residual|/h) double h = 6 * median; for (int k = 0; k < gts.values; k++) { if (0 == h) { rho[k] = 1.0D; } else { double u = residual[k] / h; if (u >= 1.0) { rho[k] = 0.0D; } else { rho[k] = 1.0D - u * u; rho[k] = rho[k] * rho[k]; } } } } } // // Post seasonal smoothing // if (neighbour_p > 0) { seasonal = rlowess(seasonal, neighbour_p, 0, (jump_p + 1) * seasonal.bucketspan, degree_p); } // // Locations and elevations // int v = 0; if (null != gts.locations) { for (int u = 0; u < size; u++) { v = Arrays.binarySearch(gts.ticks, v, nonnull, seasonal.ticks[u]); seasonal.locations[u] = v < 0 ? GeoTimeSerie.NO_LOCATION : gts.locations[v]; trend.locations[u] = seasonal.locations[u]; } } else { seasonal.locations = null; trend.locations = null; } v = 0; if (null != gts.elevations) { for (int u = 0; u < size; u++) { v = Arrays.binarySearch(gts.ticks, v, nonnull, seasonal.ticks[u]); seasonal.elevations[u] = v < 0 ? GeoTimeSerie.NO_ELEVATION : gts.elevations[v]; trend.elevations[u] = seasonal.elevations[u]; } } else { seasonal.elevations = null; trend.elevations = null; } // // Output // String prefix = (null == gts.getName()) || (0 == gts.getName().length()) ? "" : gts.getName() + "_"; seasonal.setName(prefix + "seasonal"); trend.setName(prefix + "trend"); List<GeoTimeSerie> output = new ArrayList<GeoTimeSerie>(); output.add(seasonal); output.add(trend); return output; }