Example usage for weka.clusterers SimpleKMeans setNumClusters

List of usage examples for weka.clusterers SimpleKMeans setNumClusters

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans setNumClusters.

Prototype

@Override
public void setNumClusters(int n) throws Exception 

Source Link

Document

set the number of clusters to generate.

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {/*w  w w .ja  v  a2  s  .co m*/
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:analysis.Purity.java

/**
 * /*from  w  w  w  .  j a  v  a 2 s .c  om*/
 * @param k number of clusters
 * @param originalfile original data
 * @param imputedfile imputed data
 * @throws Exception 
 */
public void findpurity(int k, String originalfile, String imputedfile) throws Exception {
    //get original data
    ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile);
    // get imputed data
    ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile);
    //get instances for clustering
    this.instances = source.getDataSet();
    this.myinstances = mysource.getDataSet();
    //Simple Kmeans for clustering
    SimpleKMeans globalkmeans = new SimpleKMeans();
    SimpleKMeans mykmeans = new SimpleKMeans();
    //set  number of clusters
    globalkmeans.setNumClusters(k);
    mykmeans.setNumClusters(k);
    // build clusters
    globalkmeans.buildClusterer(instances);
    mykmeans.buildClusterer(myinstances);

    // to compare clusters create matrix for original data and imputed data
    // this matrix indicates the  instances in the came clusters 
    original = new Matrix(instances.numInstances(), k);
    imputed = new Matrix(myinstances.numInstances(), k);
    // get cluster numbers for each instance and initialize associated cluster value to 1
    for (int i = 0; i < myinstances.numInstances(); i++) {
        //System.out.println(instances.instance(i));
        original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1);
        imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1);
    }
    System.out.println("k is: \t" + original.getColumnDimension());
    //System.out.println(imputed.getRowDimension());
    original = original.times(original.transpose());
    imputed = imputed.times(imputed.transpose());

    int total1 = 0;// to count  instances in the imputed data in the same cluster
    int total2 = 0; // to count  instances in the original data in the same cluster
    //int value = 1;
    for (int i = 0; i < original.getRowDimension(); i++) {
        for (int j = i; j < original.getColumnDimension(); j++) {

            if ((original.get(i, j) == 1)) {
                if (imputed.get(i, j) == 1) {

                    total1++; // if i  and j th instance in the same cluster in the imputed data
                }
                total2++;// if the i and j th instance in the same cluster in the original data

            }

        }
        //System.out.println();
    }

    // calculate purity
    double purity;
    purity = (double) total1 / (double) total2;
    System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError());
    System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError());
    // System.out.println("Total Hit is \t" + total1);
    //System.out.println("Total for  hit is \t" + total2);
    System.out.println("Purity is: " + purity);

}

From source file:analysis.SilhouetteIndex.java

public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception {
    //Map<Integer, Instances> clustermap = sk.clusterInstance;
    sk.setNumClusters(c);
    sk.buildClusterer(inst);//from  w  w  w .j av a2s  . c  om
    EuclideanDistance ed = new EuclideanDistance();
    double avgSilhouetteOverAllPoints = 0.d;

    if (sk.getNumClusters() == 1) {
        //Index is not defined for k=1. needs at least 2 clusters
        return Double.NaN;
    }

    for (int i = 0; i < inst.numInstances(); i++) {
        //for the current element get its cluster
        int currentcluster = sk.clusterInstance(inst.instance(i));
        //System.out.println(inst.instance(i).value(2));
        double[] current_attr = new double[inst.numAttributes()];
        double[] other_attr = new double[inst.numAttributes()];
        //get attributes of the current instance
        for (int attr = 0; attr < inst.numAttributes(); attr++) {
            current_attr[attr] = inst.instance(i).value(attr);
        }
        // int counter
        double[] distances = new double[sk.getNumClusters()];
        int[] counters = new int[sk.getNumClusters()];
        //System.out.println("distances: "+distances.length);
        double avgInClusterDist = 0, dist = 0;
        int countsamecluster = 0;
        distances[currentcluster] = Double.MAX_VALUE;
        for (int j = 0; j < inst.numInstances(); j++) {
            for (int attr = 0; attr < inst.numAttributes(); attr++) {
                other_attr[attr] = inst.instance(j).value(attr);
            }
            //get cluster number of j th element
            int clusternumber = sk.clusterInstance(inst.instance(j));
            //check if j and i in the same cluster
            if (clusternumber == currentcluster) {
                if (inst.instance(i) != inst.instance(j)) {
                    //calculate average dist to other elements in the cluster
                    //inst.

                    dist = ed.compute(current_attr, other_attr);
                    avgInClusterDist = avgInClusterDist + dist;
                    countsamecluster++;
                }
            } else {
                dist = ed.compute(current_attr, other_attr);
                distances[clusternumber] = distances[clusternumber] + dist;
                counters[clusternumber]++;
            }
        }
        //calculate value ai
        if (countsamecluster > 0) {
            avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai
        }
        //find average distances to other clusters
        for (int k = 0; k < distances.length; k++) {
            if (k != currentcluster) {
                distances[k] = distances[k] / counters[k];
            }
        }
        //Find the min value of average distance to other clusters
        double min = distances[0];
        for (int k = 1; k < distances.length; k++) {
            if (min > distances[k]) {
                min = distances[k];
            }
        }

        //si for current element:
        double si;
        // if we only have one element in our cluster it makes sense to set
        // si = 0
        if (countsamecluster == 1) {
            si = 0.0d;
        } else {
            si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist);
        }
        avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si;
    }
    //System.out.println(inst.numInstances());
    return avgSilhouetteOverAllPoints / inst.numInstances();

}

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector.//from  w w  w  .  j av a 2s .c  om
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java

License:Open Source License

/**
 * Builds the classifier/*www.  j a va  2  s . co m*/
 *
 * @param instances the training data
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(instances);
        return;
    } else {
        m_ZeroR = null;
    }

    m_standardize = new Standardize();
    m_standardize.setInputFormat(instances);
    instances = Filter.useFilter(instances, m_standardize);

    SimpleKMeans sk = new SimpleKMeans();
    sk.setNumClusters(m_numClusters);
    sk.setSeed(m_clusteringSeed);
    MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer();
    dc.setClusterer(sk);
    dc.setMinStdDev(m_minStdDev);
    m_basisFilter = new ClusterMembership();
    m_basisFilter.setDensityBasedClusterer(dc);
    m_basisFilter.setInputFormat(instances);
    Instances transformed = Filter.useFilter(instances, m_basisFilter);

    if (instances.classAttribute().isNominal()) {
        m_linear = null;
        m_logistic = new Logistic();
        m_logistic.setRidge(m_ridge);
        m_logistic.setMaxIts(m_maxIts);
        m_logistic.buildClassifier(transformed);
    } else {
        m_logistic = null;
        m_linear = new LinearRegression();
        m_linear.setAttributeSelectionMethod(
                new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION));
        m_linear.setRidge(m_ridge);
        m_linear.buildClassifier(transformed);
    }
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void EM_Init(Instances inst) throws Exception {
    int i, j, k;//w w w .j a  v a2 s . c  o m

    // run k means 10 times and choose best solution
    SimpleKMeans bestK = null;
    double bestSqE = Double.MAX_VALUE;
    for (i = 0; i < 10; i++) {
        SimpleKMeans sk = new SimpleKMeans();
        sk.setSeed(m_rr.nextInt());
        sk.setNumClusters(m_num_clusters);
        sk.setDisplayStdDevs(true);
        sk.buildClusterer(inst);
        if (sk.getSquaredError() < bestSqE) {
            bestSqE = sk.getSquaredError();
            bestK = sk;
        }
    }

    // initialize with best k-means solution
    m_num_clusters = bestK.numberOfClusters();
    m_weights = new double[inst.numInstances()][m_num_clusters];
    m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs];
    m_modelNormal = new double[m_num_clusters][m_num_attribs][3];
    m_priors = new double[m_num_clusters];
    Instances centers = bestK.getClusterCentroids();
    Instances stdD = bestK.getClusterStandardDevs();
    double[][][] nominalCounts = bestK.getClusterNominalCounts();
    double[] clusterSizes = bestK.getClusterSizes();

    for (i = 0; i < m_num_clusters; i++) {
        Instance center = centers.instance(i);
        for (j = 0; j < m_num_attribs; j++) {
            if (inst.attribute(j).isNominal()) {
                m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true);
                for (k = 0; k < inst.attribute(j).numValues(); k++) {
                    m_model[i][j].addValue(k, nominalCounts[i][j][k]);
                }
            } else {
                double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev;
                double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j);
                m_modelNormal[i][j][0] = mean;
                double stdv = (stdD.instance(i).isMissing(j))
                        ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters))
                        : stdD.instance(i).value(j);
                if (stdv < minStdD) {
                    stdv = inst.attributeStats(j).numericStats.stdDev;
                    if (Double.isInfinite(stdv)) {
                        stdv = minStdD;
                    }
                    if (stdv < minStdD) {
                        stdv = minStdD;
                    }
                }
                if (stdv <= 0) {
                    stdv = m_minStdDev;
                }

                m_modelNormal[i][j][1] = stdv;
                m_modelNormal[i][j][2] = 1.0;
            }
        }
    }

    for (j = 0; j < m_num_clusters; j++) {
        // m_priors[j] += 1.0;
        m_priors[j] = clusterSizes[j];
    }
    Utils.normalize(m_priors);
}

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];//  w ww .  ja v a  2 s .co  m
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:controller.KMeansBean.java

public void calculate() {
    SimpleKMeans skm = new SimpleKMeans();
    try {//from  w w  w .  jav  a 2  s  . c  om
        skm.setNumClusters(clusternum);
        skm.buildClusterer(inst);
        output = skm.toString();
    } catch (Exception ex) {
        Logger.getLogger(KMeansBean.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:controller.MineroControler.java

public String clasificarSimpleKmeans(int numClusters) {
    BufferedReader breader = null;
    Instances datos = null;//from w w w. ja v a  2  s. co  m
    breader = new BufferedReader(fuente_arff);
    try {
        datos = new Instances(breader);
    } catch (IOException ex) {
        System.err.println("Problemas al intentar cargar los datos");
    }

    SimpleKMeans skmeans = new SimpleKMeans();

    try {
        skmeans.setSeed(10);
        skmeans.setPreserveInstancesOrder(true);
        skmeans.setNumClusters(numClusters);
        skmeans.buildClusterer(datos);
    } catch (Exception ex) {
        System.err.println("Problemas al ejecutar algorimo de clasificacion");
    }
    return skmeans.toString();
}

From source file:detplagiasi.KMeansClustering.java

KMeansClustering() {
    addd = Container.getAddress();
    try {//from w w w .ja va  2  s .c  om
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        SimpleKMeans cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----KMeans Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new SimpleKMeans();
            cl.setNumClusters(4);
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 kmeans cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null kmeans");
    }
}