List of usage examples for org.apache.commons.lang3.tuple Pair getKey
@Override public final L getKey()
Gets the key from this pair.
This method implements the Map.Entry interface returning the left element as the key.
From source file:sadl.oneclassclassifier.NumericClassifierTest.java
@Test public void testGMeansClassifier() throws URISyntaxException, IOException { final PdttaLearner learner = new PdttaLearner(new AlergiaRedBlue(0.05, true)); final FeatureCreator featureCreator = new UberFeatureCreator(); final NumericClassifier classifier = new GMeansClassifier(ScalingMethod.NORMALIZE, 0.05, 0, DistanceMethod.EUCLIDIAN);/*from w ww . j av a2 s .co m*/ final VectorDetector detector = new VectorDetector(ProbabilityAggregationMethod.NORMALIZED_MULTIPLY, featureCreator, classifier, false); final AnomalyDetection detection = new AnomalyDetection(detector, learner); final Path p = Paths.get(this.getClass().getResource("/pdtta/smac_mix_type1.txt").toURI()); final Pair<TimedInput, TimedInput> inputSets = IoUtils.readTrainTestFile(p); final ExperimentResult actual = detection.trainTest(inputSets.getKey(), inputSets.getValue()); final ExperimentResult expected = new ExperimentResult(467, 4239, 294, 0); assertEquals(expected, actual); }
From source file:sadl.oneclassclassifier.NumericClassifierTest.java
@Test public void testXMeansClassifier() throws URISyntaxException, IOException { final PdttaLearner learner = new PdttaLearner(new AlergiaRedBlue(0.05, true)); final FeatureCreator featureCreator = new UberFeatureCreator(); final NumericClassifier classifier = new XMeansClassifier(ScalingMethod.NORMALIZE, 0.05, 0, DistanceMethod.EUCLIDIAN);/*from w ww. j a va 2 s. c o m*/ final VectorDetector detector = new VectorDetector(ProbabilityAggregationMethod.NORMALIZED_MULTIPLY, featureCreator, classifier, false); final AnomalyDetection detection = new AnomalyDetection(detector, learner); final Path p = Paths.get(this.getClass().getResource("/pdtta/smac_mix_type1.txt").toURI()); final Pair<TimedInput, TimedInput> inputSets = IoUtils.readTrainTestFile(p); final ExperimentResult actual = detection.trainTest(inputSets.getKey(), inputSets.getValue()); final ExperimentResult expected = new ExperimentResult(467, 4311, 222, 0); assertEquals(expected, actual); }
From source file:sadl.oneclassclassifier.NumericClassifierTest.java
@Test public void testClusteredClassifier() throws URISyntaxException, IOException { final PdttaLearner learner = new PdttaLearner(new AlergiaRedBlue(0.05, true)); final FeatureCreator featureCreator = new UberFeatureCreator(); final NumericClassifier classifier = new ClusteredClassifier(ScalingMethod.NORMALIZE, new GMeans(new HamerlyKMeans(new EuclideanDistance(), SeedSelection.KPP, MasterSeed.nextRandom()))); final VectorDetector detector = new VectorDetector(ProbabilityAggregationMethod.NORMALIZED_MULTIPLY, featureCreator, classifier, false); final AnomalyDetection detection = new AnomalyDetection(detector, learner); final Path p = Paths.get(this.getClass().getResource("/pdtta/smac_mix_type1.txt").toURI()); final Pair<TimedInput, TimedInput> inputSets = IoUtils.readTrainTestFile(p); final ExperimentResult actual = detection.trainTest(inputSets.getKey(), inputSets.getValue()); final ExperimentResult expected = new ExperimentResult(467, 0, 4533, 0); assertEquals(expected, actual);//from ww w .j a v a 2 s . co m }
From source file:sadl.run.commands.SmacRun.java
@SuppressWarnings("null") public ExperimentResult run(JCommander jc) { final RamGobbler gobbler = new RamGobbler(); gobbler.start();/*from ww w.ja v a 2 s . c om*/ logger.info("Starting new SmacRun with commands={}", jc.getUnknownOptions()); MasterSeed.setSeed(Long.parseLong(mainParams.get(4))); // TODO Try to use this again // final Pair<TimedInput, TimedInput> inputs = IoUtils.readTrainTestFile(inputSeqs); // trainRun.trainSeqs = inputs.getFirst(); // testRun.trainSeqs = inputs.getFirst(); // testRun.testSeqs = inputs.getSecond(); // // final Model m = trainRun.run(jc); // testRun.testModel = m; // final ExperimentResult result = testRun.run(); FeatureCreator featureCreator; AnomalyDetector anomalyDetector; OneClassClassifier classifier; if (featureCreatorMethod == FeatureCreatorMethod.FULL) { featureCreator = new FullFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.SMALL) { featureCreator = new SmallFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.MINIMAL) { featureCreator = new MinimalFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.UBER) { featureCreator = new UberFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.SINGLE) { featureCreator = new AggregatedSingleFeatureCreator(); } else { featureCreator = null; } if (detectorMethod == DetectorMethod.SVM) { if (svmGammaEstimate) { svmGamma = 0; } classifier = new LibSvmClassifier(svmProbabilityEstimate, svmGamma, svmNu, svmKernelType, svmEps, svmDegree, scalingMethod); } else if (detectorMethod == DetectorMethod.THRESHOLD_SINGLE) { // only works with minimal feature creator if (featureCreatorMethod != null && featureCreatorMethod != FeatureCreatorMethod.SINGLE) { throw new IllegalArgumentException("Please do only specify " + FeatureCreatorMethod.SINGLE + " or no featureCreatorMethod for " + detectorMethod); } featureCreator = new AggregatedSingleFeatureCreator(); classifier = new ThresholdClassifier(aggregatedEventThreshold); } else if (detectorMethod == DetectorMethod.THRESHOLD_AGG_ONLY) { // only works with minimal feature creator if (featureCreatorMethod != null && featureCreatorMethod != FeatureCreatorMethod.MINIMAL) { throw new IllegalArgumentException("Please do only specify " + FeatureCreatorMethod.MINIMAL + " or no featureCreatorMethod for " + detectorMethod); } featureCreator = new MinimalFeatureCreator(); classifier = new ThresholdClassifier(aggregatedEventThreshold, aggregatedTimeThreshold); } else if (detectorMethod == DetectorMethod.THRESHOLD_ALL) { // only works with small feature creator if (featureCreatorMethod != null && featureCreatorMethod != FeatureCreatorMethod.SMALL) { throw new IllegalArgumentException("Please do only specify " + FeatureCreatorMethod.SMALL + " or no featureCreatorMethod for " + detectorMethod); } featureCreator = new SmallFeatureCreator(); classifier = new ThresholdClassifier(aggregatedEventThreshold, aggregatedTimeThreshold, singleEventThreshold, singleTimeThreshold); } else if (detectorMethod == DetectorMethod.DBSCAN) { if (dbscan_threshold <= 0) { dbscan_threshold = dbscan_eps; } classifier = new DbScanClassifier(dbscan_eps, dbscan_n, dbscan_threshold, clusteringDistanceMethod, scalingMethod); } else if (detectorMethod == DetectorMethod.GMEANS) { classifier = new GMeansClassifier(scalingMethod, kmeans_threshold, kmeans_minPoints, clusteringDistanceMethod); } else if (detectorMethod == DetectorMethod.XMEANS) { classifier = new XMeansClassifier(scalingMethod, kmeans_threshold, kmeans_minPoints, clusteringDistanceMethod); } else if (detectorMethod == DetectorMethod.KMEANS) { classifier = new KMeansClassifier(scalingMethod, kmeans_k, kmeans_threshold, kmeans_minPoints, clusteringDistanceMethod); } else { classifier = null; } final ProbabilisticModelLearner learner = getLearner(Algoname.getAlgoname(mainParams.get(0)), jc); final AnomalyDetection detection; if (detectorMethod == DetectorMethod.ANODA) { detection = new AnomalyDetection(new AnodaDetector(), learner); } else { if (classifier == null || featureCreator == null) { throw new IllegalStateException("classifier or featureCreator is null"); } anomalyDetector = new VectorDetector(aggType, featureCreator, classifier, aggregateSublists); detection = new AnomalyDetection(anomalyDetector, learner); } ExperimentResult result = null; try { final Pair<TimedInput, TimedInput> trainTest = IoUtils.readTrainTestFile(Paths.get(mainParams.get(1)), skipFirstElement); TimedInput trainSet = trainTest.getKey(); TimedInput testSet = trainTest.getValue(); if (applyButlaPreprocessing) { double bandwidth; if (butlaPreprocessingBandwidthEstimate) { bandwidth = 0; } else { bandwidth = butlaPreprocessingBandwidth; } final ButlaPdtaLearner butla = new ButlaPdtaLearner(bandwidth, EventsCreationStrategy.SplitEvents, KDEFormelVariant.OriginalKDE); final Pair<TimedInput, Map<String, Event>> pair = butla.splitEventsInTimedSequences(trainSet); trainSet = pair.getKey(); testSet = butla.getSplitInputForMapping(testSet, pair.getValue()); } result = detection.trainTest(trainSet, testSet); } catch (final IOException e) { logger.error("Error when loading input from file: " + e.getMessage()); smacErrorAbort(); } // Can stay the same double qVal = 0.0; switch (qCrit) { case F_MEASURE: qVal = result.getFMeasure(); break; case PRECISION: qVal = result.getPrecision(); break; case RECALL: qVal = result.getRecall(); break; case PHI_COEFFICIENT: qVal = result.getPhiCoefficient(); break; case ACCURACY: qVal = result.getAccuracy(); break; default: logger.error("Quality criterion not found!"); break; } logger.info("{}={}", qCrit.name(), qVal); result.setAvgMemoryUsage(gobbler.getAvgRam()); result.setMaxMemoryUsage(gobbler.getMaxRam()); result.setMinMemoryUsage(gobbler.getMinRam()); logger.info("{}", result); gobbler.shutdown(); if (Double.isInfinite(qVal) || Double.isNaN(qVal)) { qVal = 0; } System.out.println("Result for SMAC: SUCCESS, 0, 0, " + (1 - qVal) + ", 0"); return result; }
From source file:sadl.run.datagenerators.SmacDataGenerator.java
private void run() throws IOException, InterruptedException { if (Files.notExists(outputDir)) { Files.createDirectories(outputDir); }/*from w w w. j a v a 2 s .c o m*/ Files.walk(outputDir).filter(p -> !Files.isDirectory(p)).forEach(p -> { try { logger.info("Deleting file {}", p); Files.delete(p); } catch (final Exception e) { e.printStackTrace(); } }); int k = 0; final boolean splitTimedEvents = true; // parse timed sequences TimedInput trainingTimedSequences = TimedInput.parseAlt(Paths.get(dataString), 1); if (splitTimedEvents) { final ButlaPdtaLearner butla = new ButlaPdtaLearner(10000, EventsCreationStrategy.SplitEvents, KDEFormelVariant.OriginalKDE); final Pair<TimedInput, Map<String, Event>> p = butla .splitEventsInTimedSequences(trainingTimedSequences); trainingTimedSequences = p.getKey(); } final Random r = MasterSeed.nextRandom(); final List<TimedWord> trainSequences = new ArrayList<>(); final List<TimedWord> testSequences = new ArrayList<>(); final TauPtaLearner learner = new TauPtaLearner(); final TauPTA pta = learner.train(trainingTimedSequences); final TauPTA typeTwoNormalPta = SerializationUtils.clone(pta); final DecimalFormat df = new DecimalFormat("00"); // final Path p = Paths.get("pta_normal.dot"); // pta.toGraphvizFile(outputDir.resolve(p), false); // final Process ps = Runtime.getRuntime().exec("dot -Tpdf -O " + outputDir.resolve(p)); // System.out.println(outputDir.resolve(p)); // ps.waitFor(); logger.info("Finished TauPTA ({} states) creation.", pta.getStateCount()); TauPTA currentPta; while (k < 54) { for (final AnomalyInsertionType type : AnomalyInsertionType.values()) { if (type != AnomalyInsertionType.NONE && type != AnomalyInsertionType.ALL) { // if (type != AnomalyInsertionType.TYPE_TWO) { // continue; // } if (type == AnomalyInsertionType.TYPE_TWO) { currentPta = SerializationUtils.clone(typeTwoNormalPta); currentPta.setRandom(MasterSeed.nextRandom()); } else { currentPta = pta; } trainSequences.clear(); testSequences.clear(); final TauPTA anomaly = SerializationUtils.clone(currentPta); logger.info("inserting Anomaly Type {}", type); anomaly.makeAbnormal(type); if (type == AnomalyInsertionType.TYPE_TWO) { anomaly.removeAbnormalSequences(currentPta); } for (int i = 0; i < TRAIN_SIZE; i++) { trainSequences.add(currentPta.sampleSequence()); } // PTAs of Type 2 and 4 always produce abnormal sequences // it is possible to sample abnormal and normal sequences with abnormal ptas of the other types (1,3,5). // but I don't know how the distribution is, so to be fair, i sample all anomalies the same for (int i = 0; i < TEST_SIZE; i++) { if (r.nextDouble() < ANOMALY_PERCENTAGE) { boolean wasAnormal = false; TimedWord seq = null; while (!wasAnormal) { seq = anomaly.sampleSequence(); wasAnormal = seq.isAnomaly(); } testSequences.add(seq); } else { testSequences.add(currentPta.sampleSequence()); } } final TimedInput trainset = new TimedInput(trainSequences); final TimedInput testset = new TimedInput(testSequences); final Path outputFile = outputDir .resolve(Paths.get(df.format(k) + "_smac_type" + type.getTypeIndex() + ".txt")); try (BufferedWriter bw = Files.newBufferedWriter(outputFile, StandardCharsets.UTF_8)) { trainset.toFile(bw, true); bw.write('\n'); bw.write(TRAIN_TEST_SEP); bw.write('\n'); testset.toFile(bw, true); } logger.info("Wrote file #{} ({})", k, outputFile); k++; } } } }
From source file:sadl.run.datagenerators.SmacDataGeneratorMixed.java
private void run() throws IOException, InterruptedException { if (Files.notExists(outputDir)) { Files.createDirectories(outputDir); }//from w ww. j a v a2s. co m Files.walk(outputDir).filter(p -> !Files.isDirectory(p)).forEach(p -> { try { logger.info("Deleting file {}", p); Files.delete(p); } catch (final Exception e) { e.printStackTrace(); } }); logger.info("Starting to learn TauPTA..."); int k = 0; // parse timed sequences TimedInput trainingTimedSequences = TimedInput.parseAlt(Paths.get(dataString), 1); final boolean splitTimedEvents = true; if (splitTimedEvents) { final ButlaPdtaLearner butla = new ButlaPdtaLearner(10000, EventsCreationStrategy.SplitEvents, KDEFormelVariant.OriginalKDE); final Pair<TimedInput, Map<String, Event>> p = butla .splitEventsInTimedSequences(trainingTimedSequences); trainingTimedSequences = p.getKey(); } final Random r = MasterSeed.nextRandom(); final List<TimedWord> trainSequences = new ArrayList<>(); final List<TimedWord> testSequences = new ArrayList<>(); final TauPtaLearner learner = new TauPtaLearner(); final TauPTA pta = learner.train(trainingTimedSequences); final DecimalFormat df = new DecimalFormat("00"); // final Path p = Paths.get("pta_normal.dot"); // pta.toGraphvizFile(outputDir.resolve(p), false); // final Process ps = Runtime.getRuntime().exec("dot -Tpdf -O " + outputDir.resolve(p)); // System.out.println(outputDir.resolve(p)); // ps.waitFor(); logger.info("Finished TauPTA creation."); logger.info("Before inserting anomalies, normal PTA has {} states and {} transitions", pta.getStateCount(), pta.getTransitionCount()); final List<TauPTA> abnormalPtas = new ArrayList<>(); for (final AnomalyInsertionType type : AnomalyInsertionType.values()) { if (type != AnomalyInsertionType.NONE && type != AnomalyInsertionType.ALL) { final TauPTA anomaly = SerializationUtils.clone(pta); logger.info("inserting Anomaly Type {}", type); anomaly.makeAbnormal(type); abnormalPtas.add(anomaly); if (type == AnomalyInsertionType.TYPE_TWO) { anomaly.removeAbnormalSequences(pta); } logger.info("After inserting anomaly type {}, normal PTA has {} states and {} transitions", type, pta.getStateCount(), pta.getTransitionCount()); } } logger.info("After inserting all anomalies, normal PTA has {} states and {} transitions", pta.getStateCount(), pta.getTransitionCount()); final TObjectIntMap<TauPTA> anomalyOccurences = new TObjectIntHashMap<>(); final Random anomalyChooser = MasterSeed.nextRandom(); while (k < SAMPLE_FILES) { trainSequences.clear(); testSequences.clear(); for (int i = 0; i < TRAIN_SIZE; i++) { trainSequences.add(pta.sampleSequence()); } for (int i = 0; i < TEST_SIZE; i++) { if (r.nextDouble() < ANOMALY_PERCENTAGE) { boolean wasAnormal = false; TimedWord seq = null; final TauPTA chosen = CollectionUtils.chooseRandomObject(abnormalPtas, anomalyChooser); while (!wasAnormal) { seq = chosen.sampleSequence(); wasAnormal = seq.isAnomaly(); } anomalyOccurences.adjustOrPutValue(chosen, 1, 1); testSequences.add(seq); } else { testSequences.add(pta.sampleSequence()); } } final TimedInput trainset = new TimedInput(trainSequences); final TimedInput testset = new TimedInput(testSequences); final Path outputFile = outputDir.resolve(Paths.get(df.format(k) + "_smac_mixed.txt")); try (BufferedWriter bw = Files.newBufferedWriter(outputFile, StandardCharsets.UTF_8)) { trainset.toFile(bw, true); bw.write('\n'); bw.write(TRAIN_TEST_SEP); bw.write('\n'); testset.toFile(bw, true); } logger.info("Wrote file #{} ({})", k, outputFile); k++; } for (final TauPTA anomaly : anomalyOccurences.keySet()) { logger.info("Anomaly {} was chosen {} times", anomaly.getAnomalyType(), anomalyOccurences.get(anomaly)); } }
From source file:sadl.run.pipelines.Pipeline.java
public ExperimentResult run() throws IOException, InterruptedException { if (debug) {/*from w w w. j a v a2 s .c om*/ Settings.setDebug(debug); } if (featureCreatorMethod == FeatureCreatorMethod.FULL) { featureCreator = new FullFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.SMALL) { featureCreator = new SmallFeatureCreator(); } else if (featureCreatorMethod == FeatureCreatorMethod.MINIMAL) { featureCreator = new MinimalFeatureCreator(); } else { featureCreator = null; } if (detectorMethod == DetectorMethod.SVM) { pdttaDetector = new VectorDetector(aggType, featureCreator, new LibSvmClassifier(svmProbabilityEstimate, svmGamma, svmNu, svmKernelType, svmEps, svmDegree, scalingMethod)); // pdttaDetector = new PdttaOneClassSvmDetector(aggType, featureCreator, svmProbabilityEstimate, svmGamma, svmNu, svmCosts, svmKernelType, svmEps, // svmDegree, scalingMethod); } else if (detectorMethod == DetectorMethod.THRESHOLD_AGG_ONLY) { pdttaDetector = new AggregatedThresholdDetector(aggType, aggregatedEventThreshold, aggregatedTimeThreshold, aggregateSublists); } else if (detectorMethod == DetectorMethod.THRESHOLD_ALL) { pdttaDetector = new FullThresholdDetector(aggType, aggregatedEventThreshold, aggregatedTimeThreshold, aggregateSublists, singleEventThreshold, singleTimeThreshold); } else if (detectorMethod == DetectorMethod.DBSCAN) { // pdttaDetector = new PdttaDbScanDetector(aggType, featureCreator, dbscan_eps, dbscan_n, distanceMethod, scalingMethod); pdttaDetector = new VectorDetector(aggType, featureCreator, new DbScanClassifier(dbscan_eps, dbscan_n, dbScanDistanceMethod, scalingMethod)); } else { pdttaDetector = null; } if (kdeKernelFunctionQualifier == KdeKernelFunction.BIWEIGHT) { kdeKernelFunction = BiweightKF.getInstance(); } else if (kdeKernelFunctionQualifier == KdeKernelFunction.EPANECHNIKOV) { kdeKernelFunction = EpanechnikovKF.getInstance(); } else if (kdeKernelFunctionQualifier == KdeKernelFunction.GAUSS) { kdeKernelFunction = GaussKF.getInstance(); } else if (kdeKernelFunctionQualifier == KdeKernelFunction.TRIWEIGHT) { kdeKernelFunction = TriweightKF.getInstance(); } else if (kdeKernelFunctionQualifier == KdeKernelFunction.UNIFORM) { kdeKernelFunction = UniformKF.getInstance(); } else if (kdeKernelFunctionQualifier == KdeKernelFunction.ESTIMATE) { kdeKernelFunction = null; } final TimedInput trainInput; final TimedInput testInput; if (trainTestFile != null) { final Pair<TimedInput, TimedInput> pair = IoUtils.readTrainTestFile(trainTestFile); trainInput = pair.getKey(); testInput = pair.getValue(); } else { trainInput = TimedInput.parse(trainFile); testInput = TimedInput.parse(testFile); } final ProbabilisticModelLearner learner = new PdttaLearner(mergeAlpha, recursiveMergeTest, kdeKernelFunction, kdeBandwidth, mergeTest, smoothingPrior, mergeT0, null); final ProbabilisticModel model = learner.train(trainInput); final AnomalyDetection detection = new AnomalyDetection(pdttaDetector, model); final ExperimentResult result = detection.test(testInput); System.out.println("Result for SMAC: SUCCESS, 0, 0, " + (1 - result.getFMeasure()) + ", 0"); // IoUtils.xmlSerialize(automaton, Paths.get("pdtta.xml")); // automaton = (PDTTA) IoUtils.xmlDeserialize(Paths.get("pdtta.xml")); return result; }
From source file:uk.ac.susx.tag.method51.twitter.geocoding.LocationMapper.java
private String getLoc(Tweet tweet) { try {/*from w w w. ja v a2s.c o m*/ Point point = geoCoder.geoCode(tweet); String location = null; for (Pair<Polygon, String> loc : locations) { Polygon p = loc.getKey(); if (p.contains(point)) { location = loc.getValue(); } } return location; } catch (LocationUnresolvedException e) { return null; } }