Example usage for weka.clusterers SimpleKMeans setNumClusters

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans setNumClusters.

Prototype

@Override
public void setNumClusters(int n) throws Exception

Source Link

Document

set the number of clusters to generate.

Usage

From source file:entities.ArffFile.java

/**
 * Dada una lista de parametros, se ejecuta el filtro de microagregacion.
 * Todos estos parametros son entrada del usuario.
 * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada.
 * @param numCluster//from  w ww  . j ava  2s.c  om
 * @param seed
 * @param maxIterations
 * @param replaceMissingValues
 * @param preserveInstancesOrder
 * @param attributes lista de los atributos que se desean generalizar con cluster
 */
public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations,
        boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes)
        throws Exception {
    //instancesFilter = new Instances(instances);
    SimpleKMeans kMeans;
    kMeans = new SimpleKMeans();
    Instances uniqueAttributes;
    uniqueAttributes = new Instances(instancesFilter);
    List<String> names = new ArrayList<>();
    int i = 0;
    for (Integer attribute : attributes) {
        String name = new String(instancesFilter.attribute(attribute).name());
        if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString())
            throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING");
        names.add(name);
    }
    while (uniqueAttributes.numAttributes() != attributes.size()) {
        if (!names.contains(uniqueAttributes.attribute(i).name()))
            uniqueAttributes.deleteAttributeAt(i);
        else
            i++;
    }
    try {
        kMeans.setNumClusters(numCluster);
        kMeans.setMaxIterations(maxIterations);
        kMeans.setSeed(seed);
        kMeans.setDisplayStdDevs(false);
        kMeans.setDistanceFunction(df);
        kMeans.setDontReplaceMissingValues(replaceMissingValues);
        kMeans.setPreserveInstancesOrder(preserveInstancesOrder);
        kMeans.buildClusterer(uniqueAttributes);
        //System.out.println(kMeans);
        for (int j = 0; j < uniqueAttributes.numInstances(); j++) {
            int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j));
            for (int k = 0; k < uniqueAttributes.numAttributes(); k++) {
                if (uniqueAttributes.attribute(k).isNumeric())
                    uniqueAttributes.instance(j).setValue(k,
                            Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k)));
                else
                    uniqueAttributes.instance(j).setValue(k,
                            kMeans.getClusterCentroids().instance(cluster).toString(k));
            }
        }
        replaceValues(uniqueAttributes, attributes);
    } catch (Exception ex) {
        Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    //saveToFile("4");
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.//from w  ww  .  java  2 s  .  c om
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.//from   w w  w .  j  a  v a  2 s  . c om
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.server.mongo.csn.MongoCluster.java

License:Apache License

/**
 * /*w  w w .j  av a 2s.  c o m*/
 * @param message
 * @param graph_id
 * @param clusterBasedOn
 * @param numberOfClusters
 * @param httpHeaders
 * @return
 */
private DBObject clusterKmeans(String message, String graph_id, String run_id, String clusterBasedOn,
        int numberOfClusters, String name, String clusterbasedon) {
    try {
        Instances instances = getInstances(clusterBasedOn, graph_id);
        if (instances.numInstances() < 2) {
            return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2"));
        }

        SimpleKMeans kmeans = new SimpleKMeans();
        kmeans.setSeed((int) Calendar.getInstance().getTimeInMillis());
        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numberOfClusters);
        kmeans.buildClusterer(instances);

        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        int i = 0;
        HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>();
        for (int clusterNum : assignments) {
            if (clusters.containsKey(clusterNum)) {
                Vector<String> cluster = clusters.get(clusterNum);
                cluster.add(nodeIDs.get(i));
                clusters.put(clusterNum, cluster);
            } else {
                Vector<String> cluster = new Vector<String>();
                cluster.add(nodeIDs.get(i));
                clusters.put(clusterNum, cluster);
            }
            i++;
        }
        nodeIDs.clear();
        return saveClusters(graph_id, run_id, "kmeans", clusters, null, name, clusterbasedon);
    } catch (Exception e) {
        e.printStackTrace();
        return new JSONtoReturn().createJSONError(message, e);
    }
}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * //from  w  w  w  . j  a  v a2s.co  m
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:fr.unice.i3s.rockflows.experiments.main.IntermediateExecutor.java

private List<InfoClassifier> inputClassifier(Dataset original) throws Exception {
    List<InfoClassifier> cls = new ArrayList<>();
    int id = 0;/*from  w  w w .ja v a 2 s.co m*/
    //LogisticRegression:
    InfoClassifier ic1 = new InfoClassifier(id++);
    ic1.classifier = new Logistic();
    ic1.name = "Logistic Regression";
    ic1.properties.requireNumericDataset = true;
    cls.add(ic1);
    //SVM:
    InfoClassifier ic2 = new InfoClassifier(id++);
    LibSVM ccc = new LibSVM();
    //disable 
    ccc.setOptions(new String[] { "-J", //Turn off nominal to binary conversion.
            "-V" //Turn off missing value replacement
    });
    //ccc.setSVMType(new SelectedTag(LibSVM.SVMTYPE_C_SVC, LibSVM.TAGS_SVMTYPE));
    //ccc.setKernelType(new SelectedTag(LibSVM.KERNELTYPE_RBF, LibSVM.TAGS_KERNELTYPE));
    //ccc.setEps(0.001); //tolerance
    ic2.classifier = ccc;
    ic2.name = "Svm";
    ic2.properties.requireNumericDataset = true;
    cls.add(ic2);
    //J48:
    InfoClassifier ic3 = new InfoClassifier(id++);
    ic3.classifier = new J48();
    ic3.name = "J48";
    ic3.properties.manageMissingValues = true;
    cls.add(ic3);
    //NBTree:
    InfoClassifier ic4 = new InfoClassifier(id++);
    ic4.classifier = new NBTree();
    ic4.name = "NBTree";
    ic4.properties.manageMissingValues = true;
    cls.add(ic4);
    //RandomForest: 
    InfoClassifier ic5 = new InfoClassifier(id++);
    RandomForest ccc2 = new RandomForest();
    ccc2.setNumTrees(500);
    ccc2.setMaxDepth(0);
    ic5.classifier = ccc2;
    ic5.name = "Random Forest";
    ic5.properties.manageMissingValues = true;
    cls.add(ic5);
    //Logistic Model Trees (LMT):
    InfoClassifier ic6 = new InfoClassifier(id++);
    ic6.classifier = new LMT();
    ic6.name = "Logistic Model Tree";
    ic6.properties.manageMissingValues = true;
    cls.add(ic6);
    //Alternating Decision Trees (ADTree):
    InfoClassifier ic7 = new InfoClassifier(id++);
    if (original.trainingSet.numClasses() > 2) {
        MultiClassClassifier mc = new MultiClassClassifier();
        mc.setOptions(new String[] { "-M", "3" }); //1 vs 1
        mc.setClassifier(new ADTree());
        ic7.classifier = mc;
        ic7.name = "1-vs-1 Alternating Decision Tree";
    } else {
        ic7.classifier = new ADTree();
        ic7.name = "Alternating Decision Tree";
    }
    ic7.properties.manageMultiClass = false;
    ic7.properties.manageMissingValues = true;
    cls.add(ic7);
    //Naive Bayes:
    InfoClassifier ic8 = new InfoClassifier(id++);
    ic8.classifier = new NaiveBayes();
    ic8.name = "Naive Bayes";
    ic8.properties.manageMissingValues = true;
    cls.add(ic8);
    //Bayesian Networks:
    /*
    All Bayes network algorithms implemented in Weka assume the following for the data set: 
    all variables are discrete finite variables. If you have a data set with continuous variables, 
    you can use the following filter to discretize them: 
    weka.filters.unsupervised.attribute.Discretize 
    no instances have missing values. If there are missing values in the data set, 
    values are filled in using the following filter: 
    weka.filters.unsupervised.attribute.ReplaceMissingValues 
            
    The first step performed by buildClassifier is checking if the data set fulfills those assumptions. 
    If those assumptions are not met, 
    the data set is automatically filtered and a warning is written to STDERR.2         
     */
    InfoClassifier ic9 = new InfoClassifier(id++);
    ic9.classifier = new BayesNet();
    ic9.name = "Bayesian Network";
    ic9.properties.requireNominalDataset = true;
    cls.add(ic9);
    //IBK
    InfoClassifier ic10 = new InfoClassifier(id++);
    ic10.classifier = new IBk();
    ic10.name = "IBk";
    ic10.properties.manageMissingValues = true;
    cls.add(ic10);
    //JRip:
    InfoClassifier ic11 = new InfoClassifier(id++);
    ic11.classifier = new JRip();
    ic11.name = "JRip";
    ic11.properties.manageMissingValues = true;
    cls.add(ic11);
    //MultilayerPerceptron(MLP):
    InfoClassifier ic12 = new InfoClassifier(id++);
    ic12.classifier = new MultilayerPerceptron();
    ic12.name = "Multillayer Perceptron";
    ic12.properties.requireNumericDataset = true;
    cls.add(ic12);
    //Bagging RepTree:
    InfoClassifier ic14 = new InfoClassifier(id++);
    REPTree base3 = new REPTree();
    Bagging ccc4 = new Bagging();
    ccc4.setClassifier(base3);
    ic14.classifier = ccc4;
    ic14.name = "Bagging RepTree";
    ic14.properties.manageMissingValues = true;
    cls.add(ic14);
    //Bagging J48
    InfoClassifier ic15 = new InfoClassifier(id++);
    Bagging ccc5 = new Bagging();
    ccc5.setClassifier(new J48());
    ic15.classifier = ccc5;
    ic15.name = "Bagging J48";
    ic15.properties.manageMissingValues = true;
    cls.add(ic15);
    //Bagging NBTree
    InfoClassifier ic16 = new InfoClassifier(id++);
    Bagging ccc6 = new Bagging();
    ccc6.setClassifier(new NBTree());
    ic16.classifier = ccc6;
    ic16.name = "Bagging NBTree";
    ic16.properties.manageMissingValues = true;
    cls.add(ic16);

    //Bagging OneR:
    InfoClassifier ic17 = new InfoClassifier(id++);
    Bagging ccc7 = new Bagging();
    ccc7.setClassifier(new OneR());
    ic17.classifier = ccc7;
    ic17.name = "Bagging OneR";
    ic17.properties.requireNominalDataset = true;
    ic17.properties.manageMissingValues = true;
    cls.add(ic17);
    //Bagging Jrip
    InfoClassifier ic18 = new InfoClassifier(id++);
    Bagging ccc8 = new Bagging();
    ccc8.setClassifier(new JRip());
    ic18.classifier = ccc8;
    ic18.name = "Bagging JRip";
    ic18.properties.manageMissingValues = true;
    cls.add(ic18);
    //MultiboostAB DecisionStump
    InfoClassifier ic24 = new InfoClassifier(id++);
    MultiBoostAB ccc14 = new MultiBoostAB();
    ccc14.setClassifier(new DecisionStump());
    ic24.classifier = ccc14;
    ic24.name = "MultiboostAB DecisionStump";
    ic24.properties.manageMissingValues = true;
    cls.add(ic24);
    //MultiboostAB OneR
    InfoClassifier ic25 = new InfoClassifier(id++);
    MultiBoostAB ccc15 = new MultiBoostAB();
    ccc15.setClassifier(new OneR());
    ic25.classifier = ccc15;
    ic25.name = "MultiboostAB OneR";
    ic25.properties.requireNominalDataset = true;
    cls.add(ic25);
    //MultiboostAB J48
    InfoClassifier ic27 = new InfoClassifier(id++);
    MultiBoostAB ccc17 = new MultiBoostAB();
    ccc17.setClassifier(new J48());
    ic27.classifier = ccc17;
    ic27.name = "MultiboostAB J48";
    ic27.properties.manageMissingValues = true;
    cls.add(ic27);
    //MultiboostAB Jrip
    InfoClassifier ic28 = new InfoClassifier(id++);
    MultiBoostAB ccc18 = new MultiBoostAB();
    ccc18.setClassifier(new JRip());
    ic28.classifier = ccc18;
    ic28.name = "MultiboostAB JRip";
    cls.add(ic28);
    //MultiboostAB NBTree
    InfoClassifier ic29 = new InfoClassifier(id++);
    MultiBoostAB ccc19 = new MultiBoostAB();
    ccc19.setClassifier(new NBTree());
    ic29.classifier = ccc19;
    ic29.name = "MultiboostAB NBTree";
    ic29.properties.manageMissingValues = true;
    cls.add(ic29);
    //RotationForest RandomTree
    InfoClassifier ic32 = new InfoClassifier(id++);
    RotationForest ccc21 = new RotationForest();
    RandomTree rtr5 = new RandomTree();
    rtr5.setMinNum(2);
    rtr5.setAllowUnclassifiedInstances(true);
    ccc21.setClassifier(rtr5);
    ic32.classifier = ccc21;
    ic32.name = "RotationForest RandomTree";
    ic32.properties.manageMissingValues = true;
    cls.add(ic32);
    //RotationForest J48:
    InfoClassifier ic33 = new InfoClassifier(id++);
    J48 base6 = new J48();
    RotationForest ccc22 = new RotationForest();
    ccc22.setClassifier(base6);
    ic33.classifier = ccc22;
    ic33.name = "RotationForest J48";
    ic33.properties.manageMissingValues = true;
    cls.add(ic33);
    //RandomCommittee RandomTree:
    InfoClassifier ic34 = new InfoClassifier(id++);
    RandomTree rtr4 = new RandomTree();
    rtr4.setMinNum(2);
    rtr4.setAllowUnclassifiedInstances(true);
    RandomCommittee ccc23 = new RandomCommittee();
    ccc23.setClassifier(rtr4);
    ic34.classifier = ccc23;
    ic34.name = "RandomComittee RandomTree";
    ic34.properties.manageMissingValues = true;
    cls.add(ic34);
    //Class via Clustering: SimpleKMeans
    //N.B: it can't handle date attributes
    InfoClassifier ic35 = new InfoClassifier(id++);
    ClassificationViaClustering ccc24 = new ClassificationViaClustering();
    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(original.trainingSet.numClasses());
    ccc24.setClusterer(km);
    ic35.classifier = ccc24;
    ic35.name = "Classification via Clustering: KMeans";
    ic35.properties.requireNumericDataset = true;
    cls.add(ic35);
    //Class via Clustering: FarthestFirst
    InfoClassifier ic36 = new InfoClassifier(id++);
    ClassificationViaClustering ccc25 = new ClassificationViaClustering();
    FarthestFirst ff = new FarthestFirst();
    ff.setNumClusters(original.trainingSet.numClasses());
    ccc25.setClusterer(ff);
    ic36.classifier = ccc25;
    ic36.name = "Classification via Clustering: FarthestFirst";
    ic36.properties.requireNumericDataset = true;
    cls.add(ic36);
    //SMO
    InfoClassifier ic37 = new InfoClassifier(id++);
    ic37.classifier = new SMO();
    ic37.properties.requireNumericDataset = true;
    ic37.properties.manageMultiClass = false;
    ic37.name = "Smo";
    cls.add(ic37);
    //Random Subspace
    InfoClassifier ic38 = new InfoClassifier(id++);
    RandomSubSpace sub = new RandomSubSpace();
    sub.setClassifier(new REPTree());
    ic38.classifier = sub;
    ic38.name = "Random Subspaces of RepTree";
    ic38.properties.manageMissingValues = true;
    cls.add(ic38);
    //PART rule based
    InfoClassifier ic39 = new InfoClassifier(id++);
    PART p39 = new PART();
    p39.setOptions(new String[] { "-C", "0.5" });
    ic39.classifier = new PART();
    ic39.name = "PART";
    ic39.properties.manageMissingValues = true;
    cls.add(ic39);
    //Decision-Table / Naive Bayes
    InfoClassifier ic40 = new InfoClassifier(id++);
    ic40.classifier = new DTNB();
    ic40.name = "DTNB";
    ic40.properties.manageMissingValues = true;
    cls.add(ic40);
    //Ridor Rule based
    InfoClassifier ic41 = new InfoClassifier(id++);
    ic41.classifier = new Ridor();
    ic41.name = "Ridor";
    ic41.properties.manageMissingValues = true;
    cls.add(ic41);
    //Decision Table
    InfoClassifier ic42 = new InfoClassifier(id++);
    ic42.classifier = new DecisionTable();
    ic42.name = "Decision Table";
    ic42.properties.manageMissingValues = true;
    cls.add(ic42);
    //Conjunctive Rule
    InfoClassifier ic43 = new InfoClassifier(id++);
    ic43.classifier = new ConjunctiveRule();
    ic43.name = "Conjunctive Rule";
    ic43.properties.manageMissingValues = true;
    cls.add(ic43);
    //LogitBoost Decision Stump
    InfoClassifier ic44 = new InfoClassifier(id++);
    LogitBoost lb = new LogitBoost();
    lb.setOptions(new String[] { "-L", "1.79" });
    lb.setClassifier(new DecisionStump());
    ic44.classifier = lb;
    ic44.name = "LogitBoost Decision Stump";
    ic44.properties.manageMissingValues = true;
    cls.add(ic44);
    //Raced Incremental Logit Boost, Decision Stump
    InfoClassifier ic45 = new InfoClassifier(id++);
    RacedIncrementalLogitBoost rlb = new RacedIncrementalLogitBoost();
    rlb.setClassifier(new DecisionStump());
    ic45.classifier = rlb;
    ic45.name = "Raced Incremental Logit Boost, Decision Stumps";
    ic45.properties.manageMissingValues = true;
    cls.add(ic45);
    //AdaboostM1 decision stump
    InfoClassifier ic46 = new InfoClassifier(id++);
    AdaBoostM1 adm = new AdaBoostM1();
    adm.setClassifier(new DecisionStump());
    ic46.classifier = adm;
    ic46.name = "AdaboostM1, Decision Stumps";
    ic46.properties.manageMissingValues = true;
    cls.add(ic46);
    //AdaboostM1 J48
    InfoClassifier ic47 = new InfoClassifier(id++);
    AdaBoostM1 adm2 = new AdaBoostM1();
    adm2.setClassifier(new J48());
    ic47.classifier = adm2;
    ic47.name = "AdaboostM1, J48";
    ic47.properties.manageMissingValues = true;
    cls.add(ic47);
    //MultiboostAb Decision Table
    InfoClassifier ic48 = new InfoClassifier(id++);
    MultiBoostAB mba = new MultiBoostAB();
    mba.setClassifier(new DecisionTable());
    ic48.classifier = mba;
    ic48.name = "MultiboostAB, Decision Table";
    ic48.properties.manageMissingValues = true;
    cls.add(ic48);
    //Multiboost NaiveBayes
    InfoClassifier ic49 = new InfoClassifier(id++);
    MultiBoostAB mba2 = new MultiBoostAB();
    mba2.setClassifier(new NaiveBayes());
    ic49.classifier = mba2;
    ic49.name = "MultiboostAB, Naive Bayes";
    ic49.properties.manageMissingValues = true;
    cls.add(ic49);
    //Multiboost PART
    InfoClassifier ic50 = new InfoClassifier(id++);
    MultiBoostAB mba3 = new MultiBoostAB();
    mba3.setClassifier(new PART());
    ic50.classifier = mba3;
    ic50.name = "MultiboostAB, PART";
    ic50.properties.manageMissingValues = true;
    cls.add(ic50);
    //Multiboost Random Tree
    InfoClassifier ic51 = new InfoClassifier(id++);
    MultiBoostAB mba4 = new MultiBoostAB();
    RandomTree rtr3 = new RandomTree();
    rtr3.setMinNum(2);
    rtr3.setAllowUnclassifiedInstances(true);
    mba4.setClassifier(rtr3);
    ic51.classifier = mba4;
    ic51.name = "MultiboostAB, RandomTree";
    ic51.properties.manageMissingValues = true;
    cls.add(ic51);
    //Multiboost Rep Tree
    InfoClassifier ic52 = new InfoClassifier(id++);
    MultiBoostAB mba5 = new MultiBoostAB();
    mba5.setClassifier(new REPTree());
    ic52.classifier = mba5;
    ic52.name = "MultiboostAB, RepTree";
    ic52.properties.manageMissingValues = true;
    cls.add(ic52);
    //Bagging Decision Stump
    InfoClassifier ic53 = new InfoClassifier(id++);
    Bagging bag = new Bagging();
    bag.setClassifier(new DecisionStump());
    ic53.classifier = bag;
    ic53.name = "Bagging Decision Stump";
    ic53.properties.manageMissingValues = true;
    cls.add(ic53);
    //Bagging Decision Table
    InfoClassifier ic54 = new InfoClassifier(id++);
    Bagging bag1 = new Bagging();
    bag1.setClassifier(new DecisionTable());
    ic54.classifier = bag1;
    ic54.name = "Bagging Decision Table";
    ic54.properties.manageMissingValues = true;
    cls.add(ic54);
    //Bagging HyperPipes
    InfoClassifier ic55 = new InfoClassifier(id++);
    Bagging bag2 = new Bagging();
    bag2.setClassifier(new HyperPipes());
    ic55.classifier = bag2;
    ic55.name = "Bagging Hyper Pipes";
    cls.add(ic55);
    //Bagging Naive Bayes
    InfoClassifier ic56 = new InfoClassifier(id++);
    Bagging bag3 = new Bagging();
    bag3.setClassifier(new NaiveBayes());
    ic56.classifier = bag3;
    ic56.name = "Bagging Naive Bayes";
    ic56.properties.manageMissingValues = true;
    cls.add(ic56);
    //Bagging PART
    InfoClassifier ic57 = new InfoClassifier(id++);
    Bagging bag4 = new Bagging();
    bag4.setClassifier(new PART());
    ic57.classifier = bag4;
    ic57.name = "Bagging PART";
    ic57.properties.manageMissingValues = true;
    cls.add(ic57);
    //Bagging RandomTree
    InfoClassifier ic58 = new InfoClassifier(id++);
    Bagging bag5 = new Bagging();
    RandomTree rtr2 = new RandomTree();
    rtr2.setMinNum(2);
    rtr2.setAllowUnclassifiedInstances(true);
    bag5.setClassifier(rtr2);
    ic58.classifier = bag5;
    ic58.name = "Bagging RandomTree";
    ic58.properties.manageMissingValues = true;
    cls.add(ic58);
    //NNge
    InfoClassifier ic59 = new InfoClassifier(id++);
    NNge nng = new NNge();
    nng.setNumFoldersMIOption(1);
    nng.setNumAttemptsOfGeneOption(5);
    ic59.classifier = nng;
    ic59.name = "NNge";
    cls.add(ic59);
    //OrdinalClassClassifier J48
    InfoClassifier ic60 = new InfoClassifier(id++);
    OrdinalClassClassifier occ = new OrdinalClassClassifier();
    occ.setClassifier(new J48());
    ic60.classifier = occ;
    ic60.name = "OrdinalClassClassifier J48";
    ic60.properties.manageMissingValues = true;
    cls.add(ic60);
    //Hyper Pipes
    InfoClassifier ic61 = new InfoClassifier(id++);
    ic61.classifier = new HyperPipes();
    ic61.name = "Hyper Pipes";
    cls.add(ic61);
    //Classification via Regression, M5P used by default
    InfoClassifier ic62 = new InfoClassifier(id++);
    ic62.classifier = new ClassificationViaRegression();
    ic62.name = "Classification ViaRegression, M5P";
    ic62.properties.requireNumericDataset = true;
    cls.add(ic62);
    //RBF Network
    InfoClassifier ic64 = new InfoClassifier(id++);
    RBFNetwork rbf = new RBFNetwork();
    rbf.setRidge(0.00000001); //10^-8
    rbf.setNumClusters(original.trainingSet.numAttributes() / 2);
    ic64.classifier = rbf;
    ic64.name = "RBF Network";
    ic64.properties.requireNumericDataset = true;
    if (!original.properties.isStandardized) {
        ic64.properties.compatibleWithDataset = false;
    }
    cls.add(ic64);
    //RandomTree
    InfoClassifier ic66 = new InfoClassifier(id++);
    RandomTree rtr = new RandomTree();
    rtr.setMinNum(2);
    rtr.setAllowUnclassifiedInstances(true);
    ic66.classifier = rtr;
    ic66.name = "Random Tree";
    ic66.properties.manageMissingValues = true;
    cls.add(ic66);
    //RepTree
    InfoClassifier ic67 = new InfoClassifier(id++);
    REPTree rept = new REPTree();
    ic67.classifier = rept;
    ic67.name = "Rep Tree";
    ic67.properties.manageMissingValues = true;
    cls.add(ic67);
    //Decision Stump
    InfoClassifier ic68 = new InfoClassifier(id++);
    ic68.classifier = new DecisionStump();
    ic68.name = "Decision Stump";
    ic68.properties.manageMissingValues = true;
    cls.add(ic68);
    //OneR
    InfoClassifier ic69 = new InfoClassifier(id++);
    ic69.classifier = new OneR();
    ic69.name = "OneR";
    ic69.properties.requireNominalDataset = true;
    ic69.properties.manageMissingValues = true;
    cls.add(ic69);
    //LWL
    InfoClassifier ic71 = new InfoClassifier(id++);
    ic71.classifier = new LWL();
    ic71.name = "LWL";
    ic71.properties.manageMissingValues = true;
    cls.add(ic71);
    //Bagging LWL
    InfoClassifier ic72 = new InfoClassifier(id++);
    Bagging bg72 = new Bagging();
    bg72.setClassifier(new LWL());
    ic72.classifier = bg72;
    ic72.name = "Bagging LWL";
    ic72.properties.manageMissingValues = true;
    cls.add(ic72);
    //Decorate
    InfoClassifier ic73 = new InfoClassifier(id++);
    ic73.classifier = new Decorate();
    ic73.name = "Decorate";
    ic73.properties.manageMissingValues = true;
    ic73.properties.minNumTrainingInstances = 15;
    this.indexDecorate = id - 1;
    cls.add(ic73);
    //Dagging
    InfoClassifier ic74 = new InfoClassifier(id++);
    Dagging dng = new Dagging();
    dng.setClassifier(new SMO());
    dng.setNumFolds(4);
    ic74.classifier = dng;
    ic74.properties.requireNumericDataset = true;
    ic74.properties.manageMultiClass = false;
    ic74.name = "Dagging SMO";
    cls.add(ic74);
    //IB1
    InfoClassifier ic75 = new InfoClassifier(id++);
    ic75.classifier = new IB1();
    ic75.properties.manageMissingValues = true;
    ic75.name = "IB1";
    cls.add(ic75);
    //Simple Logistic
    InfoClassifier ic76 = new InfoClassifier(id++);
    ic76.classifier = new SimpleLogistic();
    ic76.properties.requireNumericDataset = true;
    ic76.name = "Simple Logistic";
    cls.add(ic76);
    //VFI
    InfoClassifier ic77 = new InfoClassifier(id++);
    ic77.classifier = new VFI();
    ic77.properties.manageMissingValues = true;
    ic77.name = "VFI";
    cls.add(ic77);

    //check if classifier satisfies the constraints of min #instances
    checkMinNumInstanes(cls, original.trainingSet);

    return cls;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception //w w w . j av  a  2 s. com
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*from  ww  w.  ja  v a  2  s  .c om*/
 * 
 * @param trainSet
 *             the type of Instances train set
 * */

public ClassifierSet initializePopulation(final Instances trainset) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = trainset;

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, trainset);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset);

    /*
    * Instead of having multiple positions for the same label combination, use only one.
    * This is the one that will be used to "cover" the centroids.
    */

    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
    * Delete the labels from the partitions.
    */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
    }
    // partitions now contains only attributes

    /*
    * delete the attributes from partitionsWithCLasses
    */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();

            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            //System.out.println(centroids);
            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomCoveringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTable tableResult, JFrame apps) {
    try {//w w  w. jav a  2 s.  c  o m
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];

        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        //            //setting columNames
        //            for (int i = 0; i < numOfCluster; i++) {
        //                columnNames[i] = "Cluster "+i+"";
        //            }

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }

        for (int i = 0; i < listOfCluster.size(); i++) {
            System.out.print("Cluster - " + i + " -> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                System.out.print(listMemberOfCluster1 + " ");
            }
            System.out.println("");
        }

        //            int i=0;
        //            for(int clusterNum : assignments) {
        //                System.out.printf("Instance %d -> Cluster %d \n", i, clusterNum);
        //                i++;
        //                System.out.println(SSE);
        //            }

        //            //output to table
        //            tableResult.setModel(new DefaultTableModel(
        //            new Object[][]{
        //            },
        //            columnNames));
        //            apps.setVisible(true);
        //            
        //            int j=0;
        //            DefaultTableModel model = (DefaultTableModel) tableResult.getModel();
        //            for(int clusterNum : assignments) {
        //                if (clusterNum==0){
        //                    model.addRow(new Object[]{j, "", "", "", "", ""});
        //                }
        //                else if (clusterNum==1){
        //                    model.addRow(new Object[]{"", j, "", "", "", ""});
        //                }
        //                else if (clusterNum==2){
        //                    model.addRow(new Object[]{"", "", j, "", "", ""});
        //                }
        //                else if (clusterNum==3){
        //                    model.addRow(new Object[]{"", "", "", j, "", ""});
        //                }
        //                else if (clusterNum==4){
        //                    model.addRow(new Object[]{"", "", "", "", j, ""});
        //                }
        //                else if (clusterNum==5){
        //                    model.addRow(new Object[]{"", "", "", "", "", j});
        //                }
        //                
        //                j++;
        //            }
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTextArea textarea) {
    try {//from www.ja v a 2 s .co m
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];
        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }
        textarea.setText("");
        String result = "";
        for (int i = 0; i < listOfCluster.size(); i++) {
            result = result + ("Cluster - " + i + " ==> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                result = result + (listMemberOfCluster1 + " ");
            }
            result = result + ("\n");
        }
        result = result + ("\nSSE : ") + kmeans.getSquaredError();
        textarea.setText(result);
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}