org.iobserve.analysis.behavior.clustering.hierarchical.HierarchicalClustering.java Source code

Java tutorial

Introduction

Here is the source code for org.iobserve.analysis.behavior.clustering.hierarchical.HierarchicalClustering.java

Source

/***************************************************************************
 * Copyright (C) 2018 iObserve Project (https://www.iobserve-devops.net)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ***************************************************************************/

package org.iobserve.analysis.behavior.clustering.hierarchical;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.eclipse.net4j.util.collection.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import weka.clusterers.HierarchicalClusterer;
import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.ManhattanDistance;
import weka.core.SelectedTag;

/**
 * This class manages the finding of a good clustering of a given input data set and the
 * agglomerative hierarchical clustering using a WEKA hierarchical clusterer.
 *
 * @author SL
 * @since 0.0.3
 */
public class HierarchicalClustering implements IHierarchicalClustering {

    private static final Logger LOGGER = LoggerFactory.getLogger(HierarchicalClusterer.class);

    private static HierarchicalClusterer hierarchicalClusterer;
    private SelectedTag linkage;
    private DistanceFunction distanceFunction;
    private final String clusterSelectionMethod;
    private final String outputPath;

    /**
     * Constructor.
     *
     * @param distanceMetric
     *            Used distance metric for hierarchical clustering.
     * @param clusterSelectionMethod
     *            Used method for selecting a "good" number of clusters from clustering.
     * @param linkage
     *            Used linkage for hierarchical clustering.
     * @param outputPath
     *            Output path for CSV file.
     */
    public HierarchicalClustering(final String distanceMetric, final String clusterSelectionMethod,
            final String linkage, final String outputPath) {
        this.setLinkage(linkage);
        this.setDistanceFunction(distanceMetric);
        this.clusterSelectionMethod = clusterSelectionMethod;
        this.outputPath = outputPath;
    }

    /**
     * Manage the finding and creation of a "good" clustering of a given input data set.
     *
     * @param instances
     *            Input data set.
     */
    @Override
    public Map<Integer, List<Pair<Instance, Double>>> clusterInstances(final Instances instances) {
        Map<Integer, List<Pair<Instance, Double>>> clusteringResults = new HashMap<>(); // NOPMD

        // Create hierarchical clusterer and set its options.
        HierarchicalClustering.hierarchicalClusterer = new HierarchicalClusterer();
        HierarchicalClustering.hierarchicalClusterer.setDistanceFunction(this.distanceFunction);
        HierarchicalClustering.hierarchicalClusterer.setDistanceIsBranchLength(false);
        HierarchicalClustering.hierarchicalClusterer.setLinkType(this.linkage);

        // Find a "good" number of clusters by applying the clusterSelectionMethod.
        final ClustersSelector clusterSelection = new ClustersSelector(this.clusterSelectionMethod,
                HierarchicalClustering.hierarchicalClusterer, instances);
        clusteringResults = clusterSelection.findGoodClustering();

        // Write clusteringResults to a CSV file.
        final CSVSinkFilter csvFilter = new CSVSinkFilter();
        final Map<Double, List<Instance>> clusteringKVs = csvFilter
                .convertClusteringResultsToKVPair(clusteringResults);
        try {
            csvFilter.createCSVFromClusteringResult(this.outputPath, clusteringKVs);
        } catch (final IOException e) {
            HierarchicalClustering.LOGGER.error("Writing hierarchical clustering results to csv failed.", e);
        }

        HierarchicalClustering.LOGGER.info("Hierarchical clustering done.");
        return clusteringResults;
    }

    /**
     *
     * @param linkageType
     *            Type of linkage used in hierarchical clustering.
     */
    private final void setLinkage(final String linkageType) {
        switch (linkageType) {
        case "single":
            this.linkage = new SelectedTag(0, HierarchicalClusterer.TAGS_LINK_TYPE);
            break;
        case "complete":
            this.linkage = new SelectedTag(1, HierarchicalClusterer.TAGS_LINK_TYPE);
            break;
        case "average":
            this.linkage = new SelectedTag(2, HierarchicalClusterer.TAGS_LINK_TYPE);
            break;
        default: // Complete linkage as default.
            this.linkage = new SelectedTag(1, HierarchicalClusterer.TAGS_LINK_TYPE);
            break;
        }
    }

    /**
     * Getter for distance function.
     *
     * @return distance function.
     */
    public DistanceFunction getDistanceFunction() {
        return this.distanceFunction;
    }

    /**
     * Setter for distance function.
     *
     * @param distanceType
     *            Chosen distance function.
     */
    private void setDistanceFunction(final String distanceType) {
        switch (distanceType) {
        case "manhatten":
            this.distanceFunction = new ManhattanDistance();
            break;
        case "euclidean":
            this.distanceFunction = new EuclideanDistance();
            break;
        default:
            this.distanceFunction = new EuclideanDistance(); // Euclidean as default.
            break;
        }
    }
}