List of usage examples for weka.clusterers SimpleKMeans SimpleKMeans
public SimpleKMeans()
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {/*from w ww . j ava 2s. co m*/ FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:adams.ml.model.clustering.WekaClusterer.java
License:Open Source License
/** * Adds options to the internal list of options. */// w ww . j av a2 s. co m @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("clusterer", "clusterer", new SimpleKMeans()); }
From source file:analysis.Purity.java
/** * //from w w w . jav a 2 s. c om * @param k number of clusters * @param originalfile original data * @param imputedfile imputed data * @throws Exception */ public void findpurity(int k, String originalfile, String imputedfile) throws Exception { //get original data ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile); // get imputed data ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile); //get instances for clustering this.instances = source.getDataSet(); this.myinstances = mysource.getDataSet(); //Simple Kmeans for clustering SimpleKMeans globalkmeans = new SimpleKMeans(); SimpleKMeans mykmeans = new SimpleKMeans(); //set number of clusters globalkmeans.setNumClusters(k); mykmeans.setNumClusters(k); // build clusters globalkmeans.buildClusterer(instances); mykmeans.buildClusterer(myinstances); // to compare clusters create matrix for original data and imputed data // this matrix indicates the instances in the came clusters original = new Matrix(instances.numInstances(), k); imputed = new Matrix(myinstances.numInstances(), k); // get cluster numbers for each instance and initialize associated cluster value to 1 for (int i = 0; i < myinstances.numInstances(); i++) { //System.out.println(instances.instance(i)); original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1); imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1); } System.out.println("k is: \t" + original.getColumnDimension()); //System.out.println(imputed.getRowDimension()); original = original.times(original.transpose()); imputed = imputed.times(imputed.transpose()); int total1 = 0;// to count instances in the imputed data in the same cluster int total2 = 0; // to count instances in the original data in the same cluster //int value = 1; for (int i = 0; i < original.getRowDimension(); i++) { for (int j = i; j < original.getColumnDimension(); j++) { if ((original.get(i, j) == 1)) { if (imputed.get(i, j) == 1) { total1++; // if i and j th instance in the same cluster in the imputed data } total2++;// if the i and j th instance in the same cluster in the original data } } //System.out.println(); } // calculate purity double purity; purity = (double) total1 / (double) total2; System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError()); System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError()); // System.out.println("Total Hit is \t" + total1); //System.out.println("Total for hit is \t" + total2); System.out.println("Purity is: " + purity); }
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector.//from ww w . j a v a 2 s . com */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }
From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java
License:Open Source License
/** * Returns default capabilities of the classifier, i.e., and "or" of * Logistic and LinearRegression./*from w w w. jav a 2s .com*/ * * @return the capabilities of this classifier * @see Logistic * @see LinearRegression */ public Capabilities getCapabilities() { Capabilities result = new Logistic().getCapabilities(); result.or(new LinearRegression().getCapabilities()); Capabilities classes = result.getClassCapabilities(); result.and(new SimpleKMeans().getCapabilities()); result.or(classes); return result; }
From source file:br.com.ufu.lsi.rebfnetwork.RBFNetwork.java
License:Open Source License
/** * Builds the classifier/*from www . j ava2 s. com*/ * * @param instances the training data * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // only class? -> build ZeroR model if (instances.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(instances); return; } else { m_ZeroR = null; } m_standardize = new Standardize(); m_standardize.setInputFormat(instances); instances = Filter.useFilter(instances, m_standardize); SimpleKMeans sk = new SimpleKMeans(); sk.setNumClusters(m_numClusters); sk.setSeed(m_clusteringSeed); MakeDensityBasedClusterer dc = new MakeDensityBasedClusterer(); dc.setClusterer(sk); dc.setMinStdDev(m_minStdDev); m_basisFilter = new ClusterMembership(); m_basisFilter.setDensityBasedClusterer(dc); m_basisFilter.setInputFormat(instances); Instances transformed = Filter.useFilter(instances, m_basisFilter); if (instances.classAttribute().isNominal()) { m_linear = null; m_logistic = new Logistic(); m_logistic.setRidge(m_ridge); m_logistic.setMaxIts(m_maxIts); m_logistic.buildClassifier(transformed); } else { m_logistic = null; m_linear = new LinearRegression(); m_linear.setAttributeSelectionMethod( new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION)); m_linear.setRidge(m_ridge); m_linear.buildClassifier(transformed); } }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
private void EM_Init(Instances inst) throws Exception { int i, j, k;/*from ww w. jav a 2 s.c o m*/ // run k means 10 times and choose best solution SimpleKMeans bestK = null; double bestSqE = Double.MAX_VALUE; for (i = 0; i < 10; i++) { SimpleKMeans sk = new SimpleKMeans(); sk.setSeed(m_rr.nextInt()); sk.setNumClusters(m_num_clusters); sk.setDisplayStdDevs(true); sk.buildClusterer(inst); if (sk.getSquaredError() < bestSqE) { bestSqE = sk.getSquaredError(); bestK = sk; } } // initialize with best k-means solution m_num_clusters = bestK.numberOfClusters(); m_weights = new double[inst.numInstances()][m_num_clusters]; m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; m_modelNormal = new double[m_num_clusters][m_num_attribs][3]; m_priors = new double[m_num_clusters]; Instances centers = bestK.getClusterCentroids(); Instances stdD = bestK.getClusterStandardDevs(); double[][][] nominalCounts = bestK.getClusterNominalCounts(); double[] clusterSizes = bestK.getClusterSizes(); for (i = 0; i < m_num_clusters; i++) { Instance center = centers.instance(i); for (j = 0; j < m_num_attribs; j++) { if (inst.attribute(j).isNominal()) { m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true); for (k = 0; k < inst.attribute(j).numValues(); k++) { m_model[i][j].addValue(k, nominalCounts[i][j][k]); } } else { double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev; double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j); m_modelNormal[i][j][0] = mean; double stdv = (stdD.instance(i).isMissing(j)) ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters)) : stdD.instance(i).value(j); if (stdv < minStdD) { stdv = inst.attributeStats(j).numericStats.stdDev; if (Double.isInfinite(stdv)) { stdv = minStdD; } if (stdv < minStdD) { stdv = minStdD; } } if (stdv <= 0) { stdv = m_minStdDev; } m_modelNormal[i][j][1] = stdv; m_modelNormal[i][j][2] = 1.0; } } } for (j = 0; j < m_num_clusters; j++) { // m_priors[j] += 1.0; m_priors[j] = clusterSizes[j]; } Utils.normalize(m_priors); }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
public Capabilities getCapabilities() { Capabilities result = new SimpleKMeans().getCapabilities(); result.setOwner(this); return result; }
From source file:Clustering.WekaKMeansClustererWrapper.java
public ArrayList<String>[] classify(HashMap<String, List> data, boolean clearData) { ArrayList<String>[] clusterResult; try {/*from w w w . j a v a2 s. c o m*/ File arff = m_ArffExporter.getArff(data); int nSize = data.size(); if (arff == null) return null; if (clearData) data.clear(); FileInputStream is = new FileInputStream(arff.getAbsolutePath()); Instances instances = ConverterUtils.DataSource.read(is); is.close(); String[] keys = new String[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); ++i) { Instance instance = instances.instance(i); keys[i] = instance.stringValue(0); // assume that the 0th attribute is the key string } instances.deleteStringAttributes(); SimpleKMeans cl = new SimpleKMeans(); int numClusters = m_NumberOfClusters < nSize ? m_NumberOfClusters : nSize; String[] options = new String[5]; options[0] = "-O"; options[1] = "-N"; options[2] = Integer.toString(numClusters); options[3] = "-A"; options[4] = m_DistanceFunction; cl.setOptions(options); //System.out.println( "Clustering" ); cl.buildClusterer(instances); //System.out.println( "Create ArrayList" ); clusterResult = new ArrayList[m_NumberOfClusters]; for (int i = 0; i < m_NumberOfClusters; ++i) { clusterResult[i] = new ArrayList<>(); } //System.out.println( "Assigning" ); int[] assignment = cl.getAssignments(); for (int i = 0; i < assignment.length; ++i) { clusterResult[assignment[i]].add(keys[i]); } //System.out.println( "Done" ); if (!arff.delete()) arff.deleteOnExit(); } catch (Exception ex) { //System.out.println( "[EXCEPTION] " + ex.getMessage() ); m_LastErrorMessage = ex.getMessage(); return null; } return clusterResult; }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];/*w w w . j a v a 2s. com*/ String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }