sadl.run.pipelines.Pipeline.java Source code

Java tutorial

Introduction

Here is the source code for sadl.run.pipelines.Pipeline.java

Source

/**
 * This file is part of SADL, a library for learning all sorts of (timed) automata and performing sequence-based anomaly detection.
 * Copyright (C) 2013-2016  the original author or authors.
 *
 * SADL is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
 *
 * SADL is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with SADL.  If not, see <http://www.gnu.org/licenses/>.
 */
package sadl.run.pipelines;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;

import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;

import jsat.distributions.empirical.kernelfunc.BiweightKF;
import jsat.distributions.empirical.kernelfunc.EpanechnikovKF;
import jsat.distributions.empirical.kernelfunc.GaussKF;
import jsat.distributions.empirical.kernelfunc.KernelFunction;
import jsat.distributions.empirical.kernelfunc.TriweightKF;
import jsat.distributions.empirical.kernelfunc.UniformKF;
import sadl.anomalydetecion.AnomalyDetection;
import sadl.constants.AnomalyInsertionType;
import sadl.constants.DetectorMethod;
import sadl.constants.DistanceMethod;
import sadl.constants.FeatureCreatorMethod;
import sadl.constants.KdeKernelFunction;
import sadl.constants.MergeTest;
import sadl.constants.ProbabilityAggregationMethod;
import sadl.constants.ScalingMethod;
import sadl.detectors.AnomalyDetector;
import sadl.detectors.VectorDetector;
import sadl.detectors.featureCreators.FeatureCreator;
import sadl.detectors.featureCreators.FullFeatureCreator;
import sadl.detectors.featureCreators.MinimalFeatureCreator;
import sadl.detectors.featureCreators.SmallFeatureCreator;
import sadl.detectors.threshold.AggregatedThresholdDetector;
import sadl.detectors.threshold.FullThresholdDetector;
import sadl.experiments.ExperimentResult;
import sadl.input.TimedInput;
import sadl.interfaces.ProbabilisticModel;
import sadl.interfaces.ProbabilisticModelLearner;
import sadl.modellearner.PdttaLearner;
import sadl.oneclassclassifier.LibSvmClassifier;
import sadl.oneclassclassifier.clustering.DbScanClassifier;
import sadl.utils.IoUtils;
import sadl.utils.MasterSeed;
import sadl.utils.Settings;

/**
 * 
 * @author Timo Klerx
 *
 */
@Deprecated
public class Pipeline implements Serializable {
    private static final long serialVersionUID = 4962328747559099050L;

    private static Logger logger = LoggerFactory.getLogger(Pipeline.class);
    // TODO move this to experiment project

    @Parameter(names = "-mergeTest")
    MergeTest mergeTest = MergeTest.ALERGIA;

    @Parameter(names = "-detectorMethod", description = "the anomaly detector method")
    DetectorMethod detectorMethod = DetectorMethod.SVM;

    @Parameter(names = "-featureCreator")
    FeatureCreatorMethod featureCreatorMethod = FeatureCreatorMethod.FULL;

    @Parameter(names = "-scalingMethod")
    ScalingMethod scalingMethod = ScalingMethod.NONE;

    @Parameter(names = "-distanceMetric", description = "Which distance metric to use for DBSCAN")
    DistanceMethod dbScanDistanceMethod = DistanceMethod.EUCLIDIAN;

    @Parameter(names = "-mergeAlpha")
    private double mergeAlpha;
    @Parameter(names = "-dbScanEps")
    private double dbscan_eps;
    @Parameter(names = "-dbScanN")
    private int dbscan_n;

    @Parameter(names = "-smoothingPrior")
    double smoothingPrior = 0;

    @Parameter(names = "-mergeT0")
    int mergeT0 = 3;

    @Parameter(names = "-kdeBandwidth")
    double kdeBandwidth;

    @Parameter(names = "-debug")
    private final boolean debug = false;

    @Parameter(names = "-aggregateSublists", arity = 1)
    private boolean aggregateSublists;

    @Parameter(names = "-kdeKernelFunction")
    KdeKernelFunction kdeKernelFunctionQualifier;
    KernelFunction kdeKernelFunction;

    @Parameter(names = "-recursiveMergeTest", arity = 1)
    private boolean recursiveMergeTest;
    @Parameter(names = "-aggregatedTimeThreshold")
    private double aggregatedTimeThreshold;
    @Parameter(names = "-aggregatedEventThreshold")
    private double aggregatedEventThreshold;

    @Parameter(names = "-singleEventThreshold")
    private double singleEventThreshold;

    @Parameter(names = "-singleTimeThreshold")
    private double singleTimeThreshold;

    @Parameter(names = "-probabilityAggregationMethod")
    ProbabilityAggregationMethod aggType = ProbabilityAggregationMethod.NORMALIZED_MULTIPLY;
    @Parameter(names = "-anomalyInsertionType")
    AnomalyInsertionType anomalyInsertionType = AnomalyInsertionType.ALL;

    @Parameter(names = "-svmNu")
    double svmNu;

    @Parameter(names = "-svmGamma")
    double svmGamma;

    @Parameter(names = "-svmEps")
    double svmEps;

    @Parameter(names = "-svmKernel")
    int svmKernelType;

    @Parameter(names = "-svmDegree")
    int svmDegree;

    @Parameter(names = "-svmProbabilityEstimate")
    int svmProbabilityEstimate;

    @Parameter(names = "-seed")
    long seed;

    @Parameter(names = "-trainFile")
    Path trainFile;
    @Parameter(names = "-testFile")
    Path testFile;
    @Parameter(names = "-trainTestFile")
    Path trainTestFile;

    private static final boolean abort = true;

    /**
     * @param args
     * @throws IOException
     * @throws InterruptedException
     */
    public static void main(String[] args) throws IOException, InterruptedException {
        if (abort) {
            throw new UnsupportedOperationException(
                    "This class is no longer supported! Use SADL main class with smac command");
        }
        final Pipeline sp = new Pipeline();
        final JCommander jc = new JCommander(sp);
        if (args.length != 1) {
            logger.error("Please provide the following inputs: [configFile]");
            jc.usage();
            System.exit(1);
        }
        jc.parse(args);
        MasterSeed.setSeed(sp.seed);

        try {
            boolean fileExisted = true;
            final ExperimentResult result = sp.run();
            final Path resultPath = Paths.get("result.csv");
            if (!Files.exists(resultPath)) {
                Files.createFile(resultPath);
                fileExisted = false;
            }
            final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

            try (BufferedWriter bw = Files.newBufferedWriter(resultPath, StandardCharsets.UTF_8,
                    StandardOpenOption.APPEND)) {
                if (!fileExisted) {
                    bw.append(ExperimentResult.CsvHeader());
                    bw.append('\n');
                }
                bw.append(df.format(new Date()));
                bw.append(" ; ");
                bw.append(Arrays.toString(args));
                bw.append("; ");
                bw.append(result.toCsvString());
                bw.append('\n');
            }

            System.exit(0);
        } catch (final Exception e) {
            logger.error("Unexpected exception with parameters" + Arrays.toString(args), e);
            throw e;
        }
    }

    FeatureCreator featureCreator;
    AnomalyDetector pdttaDetector;

    public ExperimentResult run() throws IOException, InterruptedException {
        if (debug) {
            Settings.setDebug(debug);
        }
        if (featureCreatorMethod == FeatureCreatorMethod.FULL) {
            featureCreator = new FullFeatureCreator();
        } else if (featureCreatorMethod == FeatureCreatorMethod.SMALL) {
            featureCreator = new SmallFeatureCreator();
        } else if (featureCreatorMethod == FeatureCreatorMethod.MINIMAL) {
            featureCreator = new MinimalFeatureCreator();
        } else {
            featureCreator = null;
        }
        if (detectorMethod == DetectorMethod.SVM) {
            pdttaDetector = new VectorDetector(aggType, featureCreator, new LibSvmClassifier(svmProbabilityEstimate,
                    svmGamma, svmNu, svmKernelType, svmEps, svmDegree, scalingMethod));
            // pdttaDetector = new PdttaOneClassSvmDetector(aggType, featureCreator, svmProbabilityEstimate, svmGamma, svmNu, svmCosts, svmKernelType, svmEps,
            // svmDegree, scalingMethod);
        } else if (detectorMethod == DetectorMethod.THRESHOLD_AGG_ONLY) {
            pdttaDetector = new AggregatedThresholdDetector(aggType, aggregatedEventThreshold,
                    aggregatedTimeThreshold, aggregateSublists);
        } else if (detectorMethod == DetectorMethod.THRESHOLD_ALL) {
            pdttaDetector = new FullThresholdDetector(aggType, aggregatedEventThreshold, aggregatedTimeThreshold,
                    aggregateSublists, singleEventThreshold, singleTimeThreshold);
        } else if (detectorMethod == DetectorMethod.DBSCAN) {
            // pdttaDetector = new PdttaDbScanDetector(aggType, featureCreator, dbscan_eps, dbscan_n, distanceMethod, scalingMethod);
            pdttaDetector = new VectorDetector(aggType, featureCreator,
                    new DbScanClassifier(dbscan_eps, dbscan_n, dbScanDistanceMethod, scalingMethod));
        } else {
            pdttaDetector = null;
        }

        if (kdeKernelFunctionQualifier == KdeKernelFunction.BIWEIGHT) {
            kdeKernelFunction = BiweightKF.getInstance();
        } else if (kdeKernelFunctionQualifier == KdeKernelFunction.EPANECHNIKOV) {
            kdeKernelFunction = EpanechnikovKF.getInstance();
        } else if (kdeKernelFunctionQualifier == KdeKernelFunction.GAUSS) {
            kdeKernelFunction = GaussKF.getInstance();
        } else if (kdeKernelFunctionQualifier == KdeKernelFunction.TRIWEIGHT) {
            kdeKernelFunction = TriweightKF.getInstance();
        } else if (kdeKernelFunctionQualifier == KdeKernelFunction.UNIFORM) {
            kdeKernelFunction = UniformKF.getInstance();
        } else if (kdeKernelFunctionQualifier == KdeKernelFunction.ESTIMATE) {
            kdeKernelFunction = null;
        }
        final TimedInput trainInput;
        final TimedInput testInput;
        if (trainTestFile != null) {
            final Pair<TimedInput, TimedInput> pair = IoUtils.readTrainTestFile(trainTestFile);
            trainInput = pair.getKey();
            testInput = pair.getValue();
        } else {
            trainInput = TimedInput.parse(trainFile);
            testInput = TimedInput.parse(testFile);
        }
        final ProbabilisticModelLearner learner = new PdttaLearner(mergeAlpha, recursiveMergeTest,
                kdeKernelFunction, kdeBandwidth, mergeTest, smoothingPrior, mergeT0, null);
        final ProbabilisticModel model = learner.train(trainInput);
        final AnomalyDetection detection = new AnomalyDetection(pdttaDetector, model);
        final ExperimentResult result = detection.test(testInput);
        System.out.println("Result for SMAC: SUCCESS, 0, 0, " + (1 - result.getFMeasure()) + ", 0");
        // IoUtils.xmlSerialize(automaton, Paths.get("pdtta.xml"));
        // automaton = (PDTTA) IoUtils.xmlDeserialize(Paths.get("pdtta.xml"));
        return result;
    }

}