state.FindOutliers.java Source code

Java tutorial

Introduction

Here is the source code for state.FindOutliers.java

Source

/*
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2015  
 */
package state;

import java.util.Random;

import org.apache.commons.math.stat.descriptive.moment.Mean;
import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;

import com.ibm.streamsx.topology.TStream;
import com.ibm.streamsx.topology.Topology;
import com.ibm.streamsx.topology.context.StreamsContextFactory;
import com.ibm.streamsx.topology.function.Predicate;
import com.ibm.streamsx.topology.function.Supplier;

/**
 * Finds outliers from a sequence of doubles (e.g. simulating a sensor reading).
 * 
 * Demonstrates function logic that maintains state across tuples.
 *
 */
public class FindOutliers {

    public static void main(String[] args) throws Exception {

        final double threshold = args.length == 0 ? 2.0 : Double.parseDouble(args[0]);

        Topology t = new Topology("StandardDeviationFilter");

        final Random rand = new Random();

        // Produce a stream of random double values with a normal
        // distribution, mean 0.0 and standard deviation 1.
        TStream<Double> values = t.limitedSource(new Supplier<Double>() {
            private static final long serialVersionUID = 1L;

            @Override
            public Double get() {
                return rand.nextGaussian();
            }

        }, 100000);

        /*
         * Filters the values based on calculating the mean and standard
         * deviation from the incoming data. In this case only outliers are
         * present in the output stream outliers. A outlier is defined as one
         * more than (threshold*standard deviation) from the mean.
         * 
         * This demonstrates an anonymous functional logic class that is
         * stateful. The two fields mean and sd maintain their values across
         * multiple invocations of the test method, that is for multiple tuples.
         * 
         * Note both Mean & StandardDeviation classes are serializable.
         */
        TStream<Double> outliers = values.filter(new Predicate<Double>() {

            private static final long serialVersionUID = 1L;
            private final Mean mean = new Mean();
            private final StandardDeviation sd = new StandardDeviation();

            @Override
            public boolean test(Double tuple) {
                mean.increment(tuple);
                sd.increment(tuple);

                double multpleSd = threshold * sd.getResult();
                double absMean = Math.abs(mean.getResult());
                double absTuple = Math.abs(tuple);

                return absTuple > absMean + multpleSd;
            }
        });

        outliers.print();

        StreamsContextFactory.getEmbedded().submit(t).get();
    }
}