Example usage for weka.clusterers ClusterEvaluation ClusterEvaluation

List of usage examples for weka.clusterers ClusterEvaluation ClusterEvaluation

Introduction

In this page you can find the example usage for weka.clusterers ClusterEvaluation ClusterEvaluation.

Prototype

public ClusterEvaluation() 

Source Link

Document

Constructor.

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {//w  ww  .  ja v  a2s .  co  m
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:adams.flow.transformer.WekaTestSetClustererEvaluator.java

License:Open Source License

/**
 * Executes the flow item./*from  w w w  .j a v  a  2 s  .c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances test;
    ClusterEvaluation eval;
    weka.clusterers.Clusterer cls;
    CallableSource gs;
    Token output;

    result = null;

    try {
        // get test set
        test = null;
        gs = new CallableSource();
        gs.setCallableName(m_Testset);
        gs.setParent(getParent());
        gs.setUp();
        gs.execute();
        output = gs.output();
        if (output != null)
            test = (Instances) output.getPayload();
        else
            result = "No test set available!";
        gs.wrapUp();

        // evaluate clusterer
        if (result == null) {
            if (m_InputToken.getPayload() instanceof weka.clusterers.Clusterer)
                cls = (weka.clusterers.Clusterer) m_InputToken.getPayload();
            else
                cls = (weka.clusterers.Clusterer) ((WekaModelContainer) m_InputToken.getPayload())
                        .getValue(WekaModelContainer.VALUE_MODEL);
            eval = new ClusterEvaluation();
            eval.setClusterer(cls);
            eval.evaluateClusterer(test, null, m_OutputModel);
            m_OutputToken = new Token(new WekaClusterEvaluationContainer(eval, cls));
        }
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to evaluate: ", e);
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}

From source file:adams.flow.transformer.WekaTrainTestSetClustererEvaluator.java

License:Open Source License

/**
 * Executes the flow item./*from   w w w.ja v a  2  s. c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances train;
    Instances test;
    weka.clusterers.Clusterer cls;
    ClusterEvaluation eval;
    WekaTrainTestSetContainer cont;

    result = null;

    try {
        // cross-validate clusterer
        cls = getClustererInstance();
        if (cls == null)
            throw new IllegalStateException("Clusterer '" + getClusterer() + "' not found!");

        cont = (WekaTrainTestSetContainer) m_InputToken.getPayload();
        train = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TRAIN);
        test = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TEST);
        cls.buildClusterer(train);
        eval = new ClusterEvaluation();
        eval.setClusterer(cls);
        eval.evaluateClusterer(test, null, m_OutputModel);

        // broadcast result
        m_OutputToken = new Token(new WekaClusterEvaluationContainer(eval, cls));
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to evaluate: ", e);
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}

From source file:agnes.AgnesMain.java

public static void main(String[] args) throws Exception {
    //        Instances data = loadData("C:\\Program Files\\Weka-3-8\\data\\weather.numeric.arff");
    System.out.print("File: ");
    Scanner scanner = new Scanner(System.in);
    String filename = scanner.next();
    System.out.print("Number of clusters: ");
    int numCluster = scanner.nextInt();
    System.out.print("Single/complete: ");
    String link = scanner.next();
    Instances data = loadData("src/Dataset/weather.arff");
    MyAgnes agnes = new MyAgnes(link, numCluster);
    agnes.buildClusterer(data);/*from  ww w.  j  a  va2s.  c o m*/
    System.out.println("Cluster Hierarchies:\n");
    agnes.printClustersID();
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(agnes);
    eval.evaluateClusterer(data);
    System.out.println("Cluster Evaluation:");
    System.out.println(eval.clusterResultsToString());
    //        agnes.printClusters();
}

From source file:aw_cluster.AW_Cluster.java

/**
 * @param args the command line arguments
 *//*  ww  w .j a v  a2 s . com*/
public static void main(String[] args) throws Exception {
    // TODO code application logic here
    Scanner sc = new Scanner(System.in);
    Instances trainingData;
    ClusterEvaluation eval;
    String path;
    int pilihan;
    int jumlahCluster;
    int maxIter;
    int typeLinkage;

    do {
        System.out.println("Masukan pilihan algoritma: ");
        System.out.println("1. MyKMeans");
        System.out.println("2. MyAgnes");
        System.out.println("3. Exit");
        System.out.print("Pilihan: ");
        pilihan = sc.nextInt();
        if (pilihan == 1) {
            path = masukanFile(sc);
            System.out.println("Masukan jumlah cluster: ");
            jumlahCluster = sc.nextInt();
            System.out.println("Masukan jumlah maksimum iterasi: ");
            maxIter = sc.nextInt();
            BufferedReader data = new BufferedReader(new FileReader(path));
            trainingData = new Instances(data);
            myKMeans kmeans = new myKMeans();
            kmeans.setNumCluster(jumlahCluster);
            kmeans.setMaxIteration(maxIter);
            kmeans.buildClusterer(trainingData);
            eval = new ClusterEvaluation();
            eval.setClusterer(kmeans);
            eval.evaluateClusterer(trainingData);
            System.out.println("Cluster Evaluation: " + eval.clusterResultsToString());
            System.out.println("");
        } else if (pilihan == 2) {
            path = masukanFile(sc);
            System.out.println("Masukan jumlah cluster: ");
            jumlahCluster = sc.nextInt();
            typeLinkage = typeLinkage(sc);
            BufferedReader data = new BufferedReader(new FileReader(path));
            trainingData = new Instances(data);
            myAgnes agnes = new myAgnes();
            agnes.setNumCluster(jumlahCluster);
            agnes.setLinkage(typeLinkage);
            agnes.buildClusterer(trainingData);
            eval = new ClusterEvaluation();
            eval.setClusterer(agnes);
            eval.evaluateClusterer(trainingData);
            System.out.println("Cluster Evaluation: " + eval.clusterResultsToString());
            System.out.println("");
        }
    } while (pilihan != 3);
}

From source file:clustering.Clustering.java

public void percentageSplit(double percent) {
    try {/*from w  ww  .  java2  s.  c  o m*/
        data.randomize(new java.util.Random(0));
        int trainSize = (int) Math.round((double) data.numInstances() * percent / 100f);
        int testSize = data.numInstances() - trainSize;

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);

        buildClusterer(clusterer, train);

        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(model);
        eval.evaluateClusterer(test);
        System.out.println(eval.clusterResultsToString());
    } catch (Exception ex) {
        System.out.println(ex);
    }
}

From source file:com.actelion.research.orbit.imageAnalysis.imaging.TMAPoints.java

License:Open Source License

private int guessNumClusters(EM clusterer, Instances instances, int start, int end) throws Exception {
    ClusterEvaluation eval = new ClusterEvaluation();
    int bestNum = start;
    double best = Double.POSITIVE_INFINITY;
    double bic;//from  ww w.j  a  va 2 s  .  co  m
    for (int c = start; c <= end; c++) {
        clusterer.setNumClusters(c);
        clusterer.buildClusterer(instances);
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(instances);
        bic = bic(eval.getLogLikelihood(), c, instances.numInstances());
        logger.trace("numCluster " + c + " -> BIC: " + bic);
        if (bic < best) {
            best = bic;
            bestNum = c;
            logger.trace("bestNum: " + bestNum);
        }
    }
    return bestNum;
}

From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java

License:Open Source License

/**
 * builds the classifier//from  w  ww .ja v  a  2  s  . c  om
 * 
 * @param data the training instances
 * @throws Exception if something goes wrong
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // save original header (needed for clusters to classes output)
    m_OriginalHeader = data.stringFreeStructure();

    // remove class attribute for clusterer
    Instances clusterData = new Instances(data);
    clusterData.setClassIndex(-1);
    clusterData.deleteAttributeAt(data.classIndex());
    m_ClusteringHeader = clusterData.stringFreeStructure();

    if (m_ClusteringHeader.numAttributes() == 0) {
        System.err.println("Data contains only class attribute, defaulting to ZeroR model.");
        m_ZeroR = new ZeroR();
        m_ZeroR.buildClassifier(data);
    } else {
        m_ZeroR = null;

        // build clusterer
        m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer);
        m_ActualClusterer.buildClusterer(clusterData);

        if (!getLabelAllClusters()) {

            // determine classes-to-clusters mapping
            ClusterEvaluation eval = new ClusterEvaluation();
            eval.setClusterer(m_ActualClusterer);
            eval.evaluateClusterer(clusterData);
            double[] clusterAssignments = eval.getClusterAssignments();
            int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()];
            int[] clusterTotals = new int[eval.getNumClusters()];
            double[] best = new double[eval.getNumClusters() + 1];
            double[] current = new double[eval.getNumClusters() + 1];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance instance = data.instance(i);
                if (!instance.classIsMissing()) {
                    counts[(int) clusterAssignments[i]][(int) instance.classValue()]++;
                    clusterTotals[(int) clusterAssignments[i]]++;
                }
            }
            best[eval.getNumClusters()] = Double.MAX_VALUE;
            ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0);
            m_ClustersToClasses = new double[best.length];
            System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length);
        } else {
            m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance clusterInstance = clusterData.instance(i);
                Instance originalInstance = data.instance(i);
                if (!originalInstance.classIsMissing()) {
                    double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance);
                    for (int j = 0; j < probs.length; j++) {
                        m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j];
                    }
                }
            }
            for (int i = 0; i < m_ClusterClassProbs.length; i++) {
                Utils.normalize(m_ClusterClassProbs[i]);
            }
        }
    }
}

From source file:detplagiasi.EMClustering.java

EMClustering() {
    addd = ct.getAddress();/*from  w w  w .  j a  v  a 2  s.  c  om*/

    try {
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        DensityBasedClusterer cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----EM Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new EM(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new EM();
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            getDataTraining();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));

                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 em cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(EMClustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null em");
    }
}

From source file:detplagiasi.KMeansClustering.java

KMeansClustering() {
    addd = Container.getAddress();
    try {//from   w  w  w  .  j a  v a2  s  .c  o m
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        SimpleKMeans cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----KMeans Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new SimpleKMeans();
            cl.setNumClusters(4);
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 kmeans cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null kmeans");
    }
}