Example usage for weka.clusterers HierarchicalClusterer setDebug

Introduction

In this page you can find the example usage for weka.clusterers HierarchicalClusterer setDebug.

Prototype

public void setDebug(boolean debug)

Source Link

Document

Set debugging mode.

Usage

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.//from   w  w w . j a  v a2  s  . c  om
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.//from  w ww  .  j  a va2 s. c o m
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:nl.uva.sne.classifiers.Hierarchical.java

@Override
public Map<String, String> cluster(String inDir) throws IOException, ParseException {
    try {//from ww w.j  a v a 2  s  . c o  m

        Instances data = ClusterUtils.terms2Instances(inDir, false);

        //            ArffSaver s = new ArffSaver();
        //            s.setInstances(data);
        //            s.setFile(new File(inDir+"/dataset.arff"));
        //            s.writeBatch();

        DistanceFunction df;
        //            SimpleKMeans currently only supports the Euclidean and Manhattan distances.
        switch (distanceFunction) {
        case "Minkowski":
            df = new MinkowskiDistance(data);
            break;
        case "Euclidean":
            df = new EuclideanDistance(data);
            break;
        case "Chebyshev":
            df = new ChebyshevDistance(data);
            break;
        case "Manhattan":
            df = new ManhattanDistance(data);
            break;
        default:
            df = new EuclideanDistance(data);
            break;
        }

        Logger.getLogger(Hierarchical.class.getName()).log(Level.INFO, "Start clusteing");

        weka.clusterers.HierarchicalClusterer clusterer = new HierarchicalClusterer();
        clusterer.setOptions(new String[] { "-L", "COMPLETE" });
        clusterer.setDebug(true);
        clusterer.setNumClusters(numOfClusters);
        clusterer.setDistanceFunction(df);
        clusterer.setDistanceIsBranchLength(true);
        clusterer.setPrintNewick(false);

        weka.clusterers.FilteredClusterer fc = new weka.clusterers.FilteredClusterer();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "1"; // we want to ignore the attribute that is in the position '1'
        Remove remove = new Remove(); // new instance of filter
        remove.setOptions(options); // set options

        fc.setFilter(remove); //add filter to remove attributes
        fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer
        fc.buildClusterer(data);

        //             // Print normal
        //        clusterer.setPrintNewick(false);
        //        System.out.println(clusterer.graph());
        //        // Print Newick
        //        clusterer.setPrintNewick(true);
        //        System.out.println(clusterer.graph());
        //
        //        // Let's try to show this clustered data!
        //        JFrame mainFrame = new JFrame("Weka Test");
        //        mainFrame.setSize(600, 400);
        //        mainFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
        //        Container content = mainFrame.getContentPane();
        //        content.setLayout(new GridLayout(1, 1));
        //
        //        HierarchyVisualizer visualizer = new HierarchyVisualizer(clusterer.graph());
        //        content.add(visualizer);
        //
        //        mainFrame.setVisible(true);
        return ClusterUtils.bulidClusters(clusterer, data, inDir);

    } catch (Exception ex) {
        Logger.getLogger(Hierarchical.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}