Java tutorial
/** * Copyright (c) Acroquest Technology Co, Ltd. All Rights Reserved. * Please read the associated COPYRIGHTS file for more details. * * THE SOFTWARE IS PROVIDED BY Acroquest Technolog Co., Ltd., * WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDER BE LIABLE FOR ANY * CLAIM, DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING * OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES. */ package acromusashi.stream.ml.anomaly.lof; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.collections.ComparatorUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.math.util.MathUtils; import acromusashi.stream.ml.anomaly.lof.entity.DistanceResult; import acromusashi.stream.ml.anomaly.lof.entity.DistanceResultComparator; import acromusashi.stream.ml.anomaly.lof.entity.KDistanceResult; import acromusashi.stream.ml.anomaly.lof.entity.LofDataSet; import acromusashi.stream.ml.anomaly.lof.entity.LofPoint; import acromusashi.stream.ml.anomaly.lof.entity.LofPointComparator; /** * Local Outlier Factor?? * * @author kimura */ public class LofCalculator { /** * ???? */ private LofCalculator() { } /** * ??K??????????????<br> * ??????????<br> * * @param kn K * @param targetPoint * @param dataSet * @return LOF */ public static double calculateLofNoIntermediate(int kn, LofPoint targetPoint, LofDataSet dataSet) { // ?K??K??? KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet); LofPoint tmpTargetPoint = targetPoint.deepCopy(); tmpTargetPoint.setkDistance(kResult.getkDistance()); tmpTargetPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor()); // ???? LofDataSet tmpDataSet = dataSet.deepCopy(); initDataSet(kn, tmpDataSet); updateLrd(tmpTargetPoint, tmpDataSet); // ?? double lof = calculateLof(tmpTargetPoint, tmpDataSet); return lof; } /** * ?????<br> * ???????????????????????????Id? * * @param max ?? * @param addedPoint * @param dataSet * @return ???Id???????null */ public static String addPointToDataSet(int max, LofPoint addedPoint, LofDataSet dataSet) { // ?????????????? boolean dateDelete = false; if (max <= dataSet.getDataIdList().size()) { dateDelete = true; } // ?ON??????? String deleteId = null; if (dateDelete) { deleteId = dataSet.getDataIdList().get(0); dataSet.deleteData(deleteId); } dataSet.addData(addedPoint); return deleteId; } /** * ??K????<br> * ??????????<br> * ???????????????? * * @param kn K * @param targetPoint * @param dataSet * @return LOF */ public static double calculateLofWithoutUpdate(int kn, LofPoint targetPoint, LofDataSet dataSet) { // ?K??K??? KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet); LofPoint tmpPoint = targetPoint.deepCopy(); tmpPoint.setkDistance(kResult.getkDistance()); tmpPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor()); updateLrd(tmpPoint, dataSet); // ?? double lof = calculateLof(tmpPoint, dataSet); return lof; } /** * ??K????<br> * ???????????????????????? * * @param kn K * @param max ?? * @param addedPoint * @param dataSet * @return LOF */ public static double calculateLofWithUpdate(int kn, int max, LofPoint addedPoint, LofDataSet dataSet) { // ?????????????? String deleteId = addPointToDataSet(max, addedPoint, dataSet); // K??K?????????? Set<String> updateTargets = generateUpdateTargets(addedPoint, dataSet, deleteId); Collection<LofPoint> targetList = dataSet.getDataMap().values(); // K??K??????????????2???? for (LofPoint targetPoint : targetList) { if (updateTargets.contains(targetPoint.getDataId())) { // ?K??K??? updateKDistance(kn, targetPoint, dataSet); } } // K?????K?????K????????/??? // ???????????/????????????? for (LofPoint targetPoint : targetList) { // ???? updateLrd(targetPoint, dataSet); } // ??addedPoint???K??K????????????????????? double lof = calculateLof(addedPoint, dataSet); return lof; } /** * ??K?????<br> * ?????? * <ol> * <li>K?</li> * <li>K???ID</li> * <li>??</li> * </ol> * * @param kn K * @param dataSet */ public static void initDataSet(int kn, LofDataSet dataSet) { Collection<LofPoint> pointList = dataSet.getDataMap().values(); // K??K??????????????2???? for (LofPoint targetPoint : pointList) { // ?K??K??? updateKDistance(kn, targetPoint, dataSet); } for (LofPoint targetPoint : pointList) { // ???? updateLrd(targetPoint, dataSet); } } /** * ??<br> * ?????????????{@link #initDataSet(int, LofDataSet)}??? * * @param baseDataSet ? * @param targetDataSet ? * @param max ?? * @return ? */ public static LofDataSet mergeDataSet(LofDataSet baseDataSet, LofDataSet targetDataSet, int max) { Collection<LofPoint> basePointList = baseDataSet.getDataMap().values(); Collection<LofPoint> targetPointList = targetDataSet.getDataMap().values(); // LOF?????? List<LofPoint> mergedList = new ArrayList<>(); mergedList.addAll(basePointList); mergedList.addAll(targetPointList); Collections.sort(mergedList, new LofPointComparator()); // ????????? Collections.reverse(mergedList); // ????????? // ??????????????ID?????? // ??????ID????????????????? Set<String> registeredId = new HashSet<>(); int addedCount = 0; LofDataSet resultDataSet = new LofDataSet(); for (LofPoint targetPoint : mergedList) { if (registeredId.contains(targetPoint.getDataId()) == true) { continue; } registeredId.add(targetPoint.getDataId()); resultDataSet.addData(targetPoint); addedCount++; if (addedCount >= max) { break; } } return resultDataSet; } /** * ?????DataId???<br> * ???????????????? * <ol> * <li>??K????</li> * <li>????K????</li> * </ol> * * @param addedPoint * @param dataSet * @param deleteId ?Id * @return ?????DataId? */ protected static Set<String> generateUpdateTargets(LofPoint addedPoint, LofDataSet dataSet, String deleteId) { Set<String> updateTargets = new HashSet<>(); // ??????????????K??K?????????? // ???????????????????? // 1.??K???? // 2.??????K???? updateTargets.add(addedPoint.getDataId()); Collection<LofPoint> pointList = dataSet.getDataMap().values(); for (LofPoint targetPoint : pointList) { boolean isDeteted = false; boolean kDistUpdate = false; // ??? if (StringUtils.equals(addedPoint.getDataId(), targetPoint.getDataId()) == true) { continue; } // 1.??K????? // K?2030?????????List??contains????????? if (deleteId != null && targetPoint.getkDistanceNeighbor().contains(deleteId) == true) { isDeteted = true; } // 2.??????K????? if (MathUtils.distance(addedPoint.getDataPoint(), targetPoint.getDataPoint()) < targetPoint .getkDistance()) { kDistUpdate = true; } if (isDeteted || kDistUpdate) { updateTargets.add(targetPoint.getDataId()); } } return updateTargets; } /** * ???? * * @param targetPoint * @param dataSet */ protected static void updateLrd(LofPoint targetPoint, LofDataSet dataSet) { // ???? double lrd = calculateLrd(targetPoint, dataSet); targetPoint.setLrd(lrd); } /** * ?K??K???ID? * * @param kn K * @param targetPoint * @param dataSet */ protected static void updateKDistance(int kn, LofPoint targetPoint, LofDataSet dataSet) { // ?K??K??? KDistanceResult kResult = calculateKDistance(kn, targetPoint, dataSet); targetPoint.setkDistance(kResult.getkDistance()); targetPoint.setkDistanceNeighbor(kResult.getkDistanceNeighbor()); } /** * ??K???K??K???ID? * * @param kn K * @param targetPoint * @param dataSet * @return K??K???ID?????K?????????????????? */ protected static KDistanceResult calculateKDistance(int kn, LofPoint targetPoint, LofDataSet dataSet) { // ???? List<DistanceResult> distances = calculateDistances(targetPoint, dataSet); // ?????K??K???ID KDistanceResult kResult = calculateKDistance(kn, distances); return kResult; } /** * basePoint?targetPoint?????(Reachability distance)? * * @param basePoint * @param targetPoint * @return ??? */ protected static double calculateReachDistance(LofPoint basePoint, LofPoint targetPoint) { double distance = MathUtils.distance(basePoint.getDataPoint(), targetPoint.getDataPoint()); double reachDistance = (double) ComparatorUtils.max(distance, targetPoint.getkDistance(), ComparatorUtils.NATURAL_COMPARATOR); return reachDistance; } /** * basePoint????(Local reachability density)? * * @param basePoint * @param dataSet * @return ?? */ protected static double calculateLrd(LofPoint basePoint, LofDataSet dataSet) { int countedData = 0; double totalAmount = 0.0d; for (String targetDataId : basePoint.getkDistanceNeighbor()) { LofPoint targetPoint = dataSet.getDataMap().get(targetDataId); if (targetPoint == null) { continue; } double reachDist = calculateReachDistance(basePoint, targetPoint); totalAmount = totalAmount + reachDist; countedData++; } if (totalAmount == 0.0d) { return totalAmount; } return (countedData) / totalAmount; } /** * basePoint?(Local outlier factor)? * * @param basePoint * @param dataSet * @return */ protected static double calculateLof(LofPoint basePoint, LofDataSet dataSet) { int countedData = 0; double totalAmount = 0.0d; for (String targetDataId : basePoint.getkDistanceNeighbor()) { LofPoint targetPoint = dataSet.getDataMap().get(targetDataId); totalAmount = totalAmount + (targetPoint.getLrd() / basePoint.getLrd()); countedData++; } if (countedData == 0) { return totalAmount; } return totalAmount / (countedData); } /** * ?????????????????? * * @param targetPoint * @param dataSet * @return ???ID?????? */ protected static List<DistanceResult> calculateDistances(LofPoint targetPoint, LofDataSet dataSet) { List<DistanceResult> distances = new ArrayList<>(); for (Map.Entry<String, LofPoint> targetEntry : dataSet.getDataMap().entrySet()) { // ???? if (StringUtils.equals(targetEntry.getKey(), targetPoint.getDataId()) == true) { continue; } double distance = MathUtils.distance(targetEntry.getValue().getDataPoint(), targetPoint.getDataPoint()); DistanceResult result = new DistanceResult(targetEntry.getKey(), distance); // ????? distances.add(result); } Collections.sort(distances, new DistanceResultComparator()); return distances; } /** * ???K??K???ID? * * @param kn K * @param distances ?? * @return K??K???ID?????K?????????????????? */ protected static KDistanceResult calculateKDistance(int kn, List<DistanceResult> distances) { // ?????K??K???ID int countedDataNum = 0; List<String> idList = new ArrayList<>(); double nowDistance = 0.0d; // K???ID????? for (DistanceResult distanceResult : distances) { nowDistance = distanceResult.getDistance(); idList.add(distanceResult.getDataId()); countedDataNum++; if (kn <= countedDataNum) { break; } } KDistanceResult kResult = new KDistanceResult(); kResult.setkDistance(nowDistance); kResult.setkDistanceNeighbor(idList); return kResult; } }