List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:classifiers.ComplexClassifierZufall.java
@Override @SuppressWarnings("empty-statement") public double[][] test(Instances testinst) { double count = 0; long anfangszeit = System.currentTimeMillis(); ;// ww w. j a v a 2 s .co m long endzeit; double[][] ausgabe = new double[1][2]; if (testinst.numAttributes() != 0) { testinst.setClass(testinst.attribute(testinst.numAttributes() - 1)); for (int i = 0; i < testinst.numInstances(); i++) { if (!Classify(testinst.instance(i))) { count++; } else { } } endzeit = System.currentTimeMillis(); ausgabe[0][0] = (count / testinst.numInstances()) * 100; ausgabe[0][1] = ((endzeit - anfangszeit)); // System.out.println(testinst); return ausgabe; } else { // System.out.println(testinst); return ausgabe; } }
From source file:classifiers.mlp.MultilayerPerceptronCustom.java
License:Open Source License
/** * Call this function to build and train a neural network for the training * data provided./*from w ww. j a va 2 s . c o m*/ * @param i The training data. * @throws Exception if can't build classification properly. */ public void buildClassifier(Instances i) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(i); // remove instances with missing class i = new Instances(i); i.deleteWithMissingClass(); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(i); // only class? -> use ZeroR model if (i.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_useDefaultModel = true; return; } else { m_useDefaultModel = false; } m_epoch = 0; m_error = 0; m_instances = null; m_currentInstance = null; m_controlPanel = null; m_nodePanel = null; m_outputs = new NeuralEnd[0]; m_inputs = new NeuralEnd[0]; m_numAttributes = 0; m_numClasses = 0; m_neuralNodes = new NeuralConnection[0]; m_selected = new FastVector(4); m_graphers = new FastVector(2); m_nextId = 0; m_stopIt = true; m_stopped = true; m_accepted = false; m_instances = new Instances(i); m_random = new Random(m_randomSeed); m_instances.randomize(m_random); if (m_useNomToBin) { m_nominalToBinaryFilter = new NominalToBinary(); m_nominalToBinaryFilter.setInputFormat(m_instances); m_instances = Filter.useFilter(m_instances, m_nominalToBinaryFilter); } m_numAttributes = m_instances.numAttributes() - 1; m_numClasses = m_instances.numClasses(); setClassType(m_instances); //this sets up the validation set. Instances valSet = null; //numinval is needed later int numInVal = (int) (m_valSize / 100.0 * m_instances.numInstances()); if (m_valSize > 0) { if (numInVal == 0) { numInVal = 1; } valSet = new Instances(m_instances, 0, numInVal); } /////////// setupInputs(); setupOutputs(); if (m_autoBuild) { setupHiddenLayer(); } ///////////////////////////// //this sets up the gui for usage if (m_gui) { m_win = new JFrame(); m_win.addWindowListener(new WindowAdapter() { public void windowClosing(WindowEvent e) { boolean k = m_stopIt; m_stopIt = true; int well = JOptionPane .showConfirmDialog(m_win, "Are You Sure...\n" + "Click Yes To Accept" + " The Neural Network" + "\n Click No To Return", "Accept Neural Network", JOptionPane.YES_NO_OPTION); if (well == 0) { m_win.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); m_accepted = true; blocker(false); } else { m_win.setDefaultCloseOperation(JFrame.DO_NOTHING_ON_CLOSE); } m_stopIt = k; } }); m_win.getContentPane().setLayout(new BorderLayout()); m_win.setTitle("Neural Network"); m_nodePanel = new NodePanel(); // without the following two lines, the NodePanel.paintComponents(Graphics) // method will go berserk if the network doesn't fit completely: it will // get called on a constant basis, using 100% of the CPU // see the following forum thread: // http://forum.java.sun.com/thread.jspa?threadID=580929&messageID=2945011 m_nodePanel.setPreferredSize(new Dimension(640, 480)); m_nodePanel.revalidate(); JScrollPane sp = new JScrollPane(m_nodePanel, JScrollPane.VERTICAL_SCROLLBAR_ALWAYS, JScrollPane.HORIZONTAL_SCROLLBAR_NEVER); m_controlPanel = new ControlPanel(); m_win.getContentPane().add(sp, BorderLayout.CENTER); m_win.getContentPane().add(m_controlPanel, BorderLayout.SOUTH); m_win.setSize(640, 480); m_win.setVisible(true); } //This sets up the initial state of the gui if (m_gui) { blocker(true); m_controlPanel.m_changeEpochs.setEnabled(false); m_controlPanel.m_changeLearning.setEnabled(false); m_controlPanel.m_changeMomentum.setEnabled(false); } //For silly situations in which the network gets accepted before training //commenses if (m_numeric) { setEndsToLinear(); } if (m_accepted) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } //connections done. double right = 0; double driftOff = 0; double lastRight = Double.POSITIVE_INFINITY; double bestError = Double.POSITIVE_INFINITY; double tempRate; double totalWeight = 0; double totalValWeight = 0; double origRate = m_learningRate; //only used for when reset //ensure that at least 1 instance is trained through. if (numInVal == m_instances.numInstances()) { numInVal--; } if (numInVal < 0) { numInVal = 0; } for (int noa = numInVal; noa < m_instances.numInstances(); noa++) { if (!m_instances.instance(noa).classIsMissing()) { totalWeight += m_instances.instance(noa).weight(); } } if (m_valSize != 0) { for (int noa = 0; noa < valSet.numInstances(); noa++) { if (!valSet.instance(noa).classIsMissing()) { totalValWeight += valSet.instance(noa).weight(); } } } m_stopped = false; for (int noa = 1; noa < m_numEpochs + 1; noa++) { right = 0; for (int nob = numInVal; nob < m_instances.numInstances(); nob++) { m_currentInstance = m_instances.instance(nob); if (!m_currentInstance.classIsMissing()) { //this is where the network updating (and training occurs, for the //training set resetNetwork(); calculateOutputs(); tempRate = m_learningRate * m_currentInstance.weight(); if (m_decay) { tempRate /= noa; } right += (calculateErrors() / m_instances.numClasses()) * m_currentInstance.weight(); updateNetworkWeights(tempRate, m_momentum); } } right /= totalWeight; if (Double.isInfinite(right) || Double.isNaN(right)) { if (!m_reset) { m_instances = null; throw new Exception("Network cannot train. Try restarting with a" + " smaller learning rate."); } else { //reset the network if possible if (m_learningRate <= Utils.SMALL) throw new IllegalStateException( "Learning rate got too small (" + m_learningRate + " <= " + Utils.SMALL + ")!"); m_learningRate /= 2; buildClassifier(i); m_learningRate = origRate; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } } ////////////////////////do validation testing if applicable if (m_valSize != 0) { right = 0; for (int nob = 0; nob < valSet.numInstances(); nob++) { m_currentInstance = valSet.instance(nob); if (!m_currentInstance.classIsMissing()) { //this is where the network updating occurs, for the validation set resetNetwork(); calculateOutputs(); right += (calculateErrors() / valSet.numClasses()) * m_currentInstance.weight(); //note 'right' could be calculated here just using //the calculate output values. This would be faster. //be less modular } } if (right < lastRight) { if (right < bestError) { bestError = right; // save the network weights at this point for (int noc = 0; noc < m_numClasses; noc++) { m_outputs[noc].saveWeights(); } driftOff = 0; } } else { driftOff++; } lastRight = right; if (driftOff > m_driftThreshold || noa + 1 >= m_numEpochs) { for (int noc = 0; noc < m_numClasses; noc++) { m_outputs[noc].restoreWeights(); } m_accepted = true; } right /= totalValWeight; } m_epoch = noa; m_error = right; //shows what the neuralnet is upto if a gui exists. updateDisplay(); //This junction controls what state the gui is in at the end of each //epoch, Such as if it is paused, if it is resumable etc... if (m_gui) { while ((m_stopIt || (m_epoch >= m_numEpochs && m_valSize == 0)) && !m_accepted) { m_stopIt = true; m_stopped = true; if (m_epoch >= m_numEpochs && m_valSize == 0) { m_controlPanel.m_startStop.setEnabled(false); } else { m_controlPanel.m_startStop.setEnabled(true); } m_controlPanel.m_startStop.setText("Start"); m_controlPanel.m_startStop.setActionCommand("Start"); m_controlPanel.m_changeEpochs.setEnabled(true); m_controlPanel.m_changeLearning.setEnabled(true); m_controlPanel.m_changeMomentum.setEnabled(true); blocker(true); if (m_numeric) { setEndsToLinear(); } } m_controlPanel.m_changeEpochs.setEnabled(false); m_controlPanel.m_changeLearning.setEnabled(false); m_controlPanel.m_changeMomentum.setEnabled(false); m_stopped = false; //if the network has been accepted stop the training loop if (m_accepted) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } } if (m_accepted) { m_instances = new Instances(m_instances, 0); m_currentInstance = null; return; } //TODO: // Customization: store the model created after this epoch ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("mlp/temp/" + noa + ".model")); oos.writeObject(this); oos.flush(); oos.close(); } if (m_gui) { m_win.dispose(); m_controlPanel = null; m_nodePanel = null; } m_instances = new Instances(m_instances, 0); m_currentInstance = null; }
From source file:classify.Classifier.java
public static void missingValuesRows(Instances data) { int[] missingValues = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { missingValues[i] = 0;/*from w ww . ja v a2s . c o m*/ } Instance example; String value = ""; //get number of missing attributes per row int missValues = 0; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i); for (int j = 0; j < 15; j++) { if (example.attribute(j).isNominal()) { value = example.stringValue(j); } else if (example.attribute(j).isNumeric()) { value = Double.toString(example.value(j)); } if (value.equals("?") || value.equals("NaN")) { missingValues[i]++; missValues++; } } } System.out.println("Number of Missing Values: " + missValues); //get how many times i attributes are missing int[] frequency = new int[15]; for (int i = 0; i < data.numInstances(); i++) { frequency[missingValues[i]]++; } int numRows = 0; for (int i = 0; i < data.numInstances(); i++) { if (missingValues[i] > 0) { numRows++; } } System.out.println("Number of rows with missing values: " + numRows); System.out.println("Number of missing attributes per row:"); for (int i = 0; i < 15; i++) { System.out.println(i + ": " + frequency[i]); } }
From source file:classify.Classifier.java
public static void setAttributeValues(Instances data) { Instance example;// w w w . j av a 2s. c o m String[][] savedData = new String[data.numInstances()][10]; for (int i = 0; i < data.numInstances(); i++) { example = data.instance(i); savedData[i][0] = example.stringValue(0); savedData[i][1] = example.stringValue(3); savedData[i][2] = example.stringValue(4); savedData[i][3] = example.stringValue(5); savedData[i][4] = example.stringValue(6); savedData[i][5] = example.stringValue(8); savedData[i][6] = example.stringValue(9); savedData[i][7] = example.stringValue(11); savedData[i][8] = example.stringValue(12); savedData[i][9] = example.stringValue(15); } //add in values for discrete attributes //A1 FastVector attVals = new FastVector(); attVals.addElement("b"); attVals.addElement("a"); data.deleteAttributeAt(0); data.insertAttributeAt(new Attribute("A1", attVals), 0); //A4 attVals = new FastVector(); attVals.addElement("u"); attVals.addElement("y"); attVals.addElement("l"); attVals.addElement("t"); data.deleteAttributeAt(3); data.insertAttributeAt(new Attribute("A4", attVals), 3); //A5 attVals = new FastVector(); attVals.addElement("g"); attVals.addElement("p"); attVals.addElement("gg"); data.deleteAttributeAt(4); data.insertAttributeAt(new Attribute("A5", attVals), 4); //A6 attVals = new FastVector(); attVals.addElement("c"); attVals.addElement("d"); attVals.addElement("cc"); attVals.addElement("i"); attVals.addElement("j"); attVals.addElement("k"); attVals.addElement("m"); attVals.addElement("r"); attVals.addElement("q"); attVals.addElement("w"); attVals.addElement("x"); attVals.addElement("e"); attVals.addElement("aa"); attVals.addElement("ff"); data.deleteAttributeAt(5); data.insertAttributeAt(new Attribute("A6", attVals), 5); //A7 attVals = new FastVector(); attVals.addElement("v"); attVals.addElement("h"); attVals.addElement("bb"); attVals.addElement("j"); attVals.addElement("n"); attVals.addElement("z"); attVals.addElement("dd"); attVals.addElement("ff"); attVals.addElement("o"); data.deleteAttributeAt(6); data.insertAttributeAt(new Attribute("A7", attVals), 6); //A9 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(8); data.insertAttributeAt(new Attribute("A9", attVals), 8); //A10 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(9); data.insertAttributeAt(new Attribute("A10", attVals), 9); //A12 attVals = new FastVector(); attVals.addElement("t"); attVals.addElement("f"); data.deleteAttributeAt(11); data.insertAttributeAt(new Attribute("A12", attVals), 11); //A13 attVals = new FastVector(); attVals.addElement("g"); attVals.addElement("p"); attVals.addElement("s"); data.deleteAttributeAt(12); data.insertAttributeAt(new Attribute("A13", attVals), 12); //Class attVals = new FastVector(); attVals.addElement("+"); attVals.addElement("-"); data.deleteAttributeAt(15); data.insertAttributeAt(new Attribute("C", attVals), 15); for (int i = 0; i < data.numInstances(); i++) { if (!"?".equals(savedData[i][0])) { data.instance(i).setValue(0, savedData[i][0]); } if (!"?".equals(savedData[i][1])) { data.instance(i).setValue(3, savedData[i][1]); } if (!"?".equals(savedData[i][2])) { data.instance(i).setValue(4, savedData[i][2]); } if (!"?".equals(savedData[i][3])) { data.instance(i).setValue(5, savedData[i][3]); } if (!"?".equals(savedData[i][4])) { data.instance(i).setValue(6, savedData[i][4]); } if (!"?".equals(savedData[i][5])) { data.instance(i).setValue(8, savedData[i][5]); } if (!"?".equals(savedData[i][6])) { data.instance(i).setValue(9, savedData[i][6]); } if (!"?".equals(savedData[i][7])) { data.instance(i).setValue(11, savedData[i][7]); } if (!"?".equals(savedData[i][8])) { data.instance(i).setValue(12, savedData[i][8]); } if (!"?".equals(savedData[i][9])) { data.instance(i).setValue(15, savedData[i][9]); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Fast version of meanOrMode - streamlined from Instances.meanOrMode for efficiency * Does not check for missing attributes, assumes numeric attributes, assumes Sparse instances *///from ww w. ja va 2 s . c o m public static double[] meanOrMode(Instances insts) { int numAttributes = insts.numAttributes(); double[] value = new double[numAttributes]; double weight = 0; for (int i = 0; i < numAttributes; i++) { value[i] = 0; } for (int j = 0; j < insts.numInstances(); j++) { SparseInstance inst = (SparseInstance) (insts.instance(j)); weight += inst.weight(); for (int i = 0; i < inst.numValues(); i++) { int indexOfIndex = inst.index(i); value[indexOfIndex] += inst.weight() * inst.valueSparse(i); } } if (Utils.eq(weight, 0)) { for (int k = 0; k < numAttributes; k++) { value[k] = 0; } } else { for (int k = 0; k < numAttributes; k++) { value[k] = value[k] / weight; } } return value; }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Launch the tasks that assign instances to clusters * //from w w w . j a v a2 s .c o m * @param insts the instances to be clustered * @param clusterAssignments the array of cluster assignments * @return true if k means has converged * @throws Exception if a problem occurs */ protected boolean launchAssignToClusters(Instances insts, int[] clusterAssignments) throws Exception { int numPerTask = insts.numInstances() / m_executionSlots; List<Future<Boolean>> results = new ArrayList<Future<Boolean>>(); for (int i = 0; i < m_executionSlots; i++) { int start = i * numPerTask; int end = start + numPerTask; if (i == m_executionSlots - 1) { end = insts.numInstances(); } Future<Boolean> futureKM = m_executorPool .submit(new KMeansClusterTask(insts, start, end, clusterAssignments)); results.add(futureKM); } boolean converged = true; for (Future<Boolean> f : results) { if (!f.get()) { converged = false; } } return converged; }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options.//from w w w. j a v a 2 s . c o m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { m_canopyClusters = null; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = instances.variances(); } m_FullMeansOrMediansOrModes = moveCentroid(0, instances, true, false); m_FullMissingCounts = m_ClusterMissingCounts[0]; m_FullNominalCounts = m_ClusterNominalCounts[0]; double sumOfWeights = instances.sumOfWeights(); for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(m_FullStdDevs[i]); } if (m_FullMissingCounts[i] == sumOfWeights) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap<DecisionTableHashKey, Integer> initC = new HashMap<DecisionTableHashKey, Integer>(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } if (m_speedUpDistanceCompWithCanopies) { m_canopyClusters = new Canopy(); m_canopyClusters.setNumClusters(m_NumClusters); m_canopyClusters.setSeed(getSeed()); m_canopyClusters.setT2(getCanopyT2()); m_canopyClusters.setT1(getCanopyT1()); m_canopyClusters.setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory()); m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate()); m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity()); m_canopyClusters.setDebug(getDebug()); m_canopyClusters.buildClusterer(initInstances); // System.err.println(m_canopyClusters); m_centroidCanopyAssignments = new ArrayList<long[]>(); m_dataPointCanopyAssignments = new ArrayList<long[]>(); } if (m_initializationMethod == KMEANS_PLUS_PLUS) { kMeansPlusPlusInit(initInstances); m_initialStartPoints = new Instances(m_ClusterCentroids); } else if (m_initializationMethod == CANOPY) { canopyInit(initInstances); m_initialStartPoints = new Instances(m_canopyClusters.getCanopies()); } else if (m_initializationMethod == FARTHEST_FIRST) { farthestFirstInit(initInstances); m_initialStartPoints = new Instances(m_ClusterCentroids); } else { // random for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_initialStartPoints = new Instances(m_ClusterCentroids); } if (m_speedUpDistanceCompWithCanopies) { // assign canopies to training data for (int i = 0; i < instances.numInstances(); i++) { m_dataPointCanopyAssignments.add(m_canopyClusters.assignCanopies(instances.instance(i))); } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; startExecutorPool(); while (!converged) { if (m_speedUpDistanceCompWithCanopies) { // re-assign canopies to the current cluster centers m_centroidCanopyAssignments.clear(); for (int kk = 0; kk < m_ClusterCentroids.numInstances(); kk++) { m_centroidCanopyAssignments .add(m_canopyClusters.assignCanopies(m_ClusterCentroids.instance(kk))); } } emptyClusterCount = 0; m_Iterations++; converged = true; if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, false, true, m_speedUpDistanceCompWithCanopies ? m_dataPointCanopyAssignments.get(i) : null); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } } else { converged = launchAssignToClusters(instances, clusterAssignments); } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true, true); } } } else { emptyClusterCount = launchMoveCentroids(tempI); } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; } } // calculate errors if (!m_FastDistanceCalc) { for (i = 0; i < instances.numInstances(); i++) { clusterProcessedInstance(instances.instance(i), true, false, null); } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new double[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = tempI[i].variances(); for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(vals2[j]); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].sumOfWeights(); } m_executorPool.shutdown(); // save memory! m_DistanceFunction.clean(); // Calculate Silhouette Coefficient SilCoeff = new double[instances.numInstances()]; AvgSilCoeff = 0; for (int z = 0; z < instances.numInstances(); z++) { double[] distance = new double[m_NumClusters]; Arrays.fill(distance, 0.0); //Sum for (int y = 0; y < instances.numInstances(); y++) { distance[clusterAssignments[y]] += m_DistanceFunction.distance(instances.get(z), instances.get(y)); } //Average for (int x = 0; x < m_NumClusters; x++) { distance[x] = distance[x] / m_ClusterSizes[x]; } double a = distance[clusterAssignments[z]]; distance[clusterAssignments[z]] = Double.MAX_VALUE; Arrays.sort(distance); double b = distance[0]; SilCoeff[z] = (b - a) / Math.max(a, b); AvgSilCoeff += SilCoeff[z]; } AvgSilCoeff = AvgSilCoeff / instances.numInstances(); //System.out.println("AvgSilCoeff: " + AvgSilCoeff); }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Initialize using the k-means++ method * //from ww w.j a va 2s . com * @param data the training data * @throws Exception if a problem occurs */ protected void kMeansPlusPlusInit(Instances data) throws Exception { Random randomO = new Random(getSeed()); HashMap<DecisionTableHashKey, String> initC = new HashMap<DecisionTableHashKey, String>(); // choose initial center uniformly at random int index = randomO.nextInt(data.numInstances()); m_ClusterCentroids.add(data.instance(index)); DecisionTableHashKey hk = new DecisionTableHashKey(data.instance(index), data.numAttributes(), true); initC.put(hk, null); int iteration = 0; int remainingInstances = data.numInstances() - 1; if (m_NumClusters > 1) { // proceed with selecting the rest // distances to the initial randomly chose center double[] distances = new double[data.numInstances()]; double[] cumProbs = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { distances[i] = m_DistanceFunction.distance(data.instance(i), m_ClusterCentroids.instance(iteration)); } // now choose the remaining cluster centers for (int i = 1; i < m_NumClusters; i++) { // distances converted to probabilities double[] weights = new double[data.numInstances()]; System.arraycopy(distances, 0, weights, 0, distances.length); Utils.normalize(weights); double sumOfProbs = 0; for (int k = 0; k < data.numInstances(); k++) { sumOfProbs += weights[k]; cumProbs[k] = sumOfProbs; } cumProbs[data.numInstances() - 1] = 1.0; // make sure there are no // rounding issues // choose a random instance double prob = randomO.nextDouble(); for (int k = 0; k < cumProbs.length; k++) { if (prob < cumProbs[k]) { Instance candidateCenter = data.instance(k); hk = new DecisionTableHashKey(candidateCenter, data.numAttributes(), true); if (!initC.containsKey(hk)) { initC.put(hk, null); m_ClusterCentroids.add(candidateCenter); } else { // we shouldn't get here because any instance that is a duplicate // of // an already chosen cluster center should have zero distance (and // hence // zero probability of getting chosen) to that center. System.err.println("We shouldn't get here...."); } remainingInstances--; break; } } iteration++; if (remainingInstances == 0) { break; } // prepare to choose the next cluster center. // check distances against the new cluster center to see if it is closer for (int k = 0; k < data.numInstances(); k++) { if (distances[k] > 0) { double newDist = m_DistanceFunction.distance(data.instance(k), m_ClusterCentroids.instance(iteration)); if (newDist < distances[k]) { distances[k] = newDist; } } } } } }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * //from w w w.j a v a 2 s. co m * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @param addToCentroidInstances true if the method is to add the computed * coordinates to the Instances holding the centroids * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo, boolean addToCentroidInstances) { double[] vals = new double[members.numAttributes()]; double[][] nominalDists = new double[members.numAttributes()][]; double[] weightMissing = new double[members.numAttributes()]; double[] weightNonMissing = new double[members.numAttributes()]; // Quickly calculate some relevant statistics for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNominal()) { nominalDists[j] = new double[members.attribute(j).numValues()]; } } for (Instance inst : members) { for (int j = 0; j < members.numAttributes(); j++) { if (inst.isMissing(j)) { weightMissing[j] += inst.weight(); } else { weightNonMissing[j] += inst.weight(); if (members.attribute(j).isNumeric()) { vals[j] += inst.weight() * inst.value(j); // Will be overwritten in Manhattan case } else { nominalDists[j][(int) inst.value(j)] += inst.weight(); } } } } for (int j = 0; j < members.numAttributes(); j++) { if (members.attribute(j).isNumeric()) { if (weightNonMissing[j] > 0) { vals[j] /= weightNonMissing[j]; } else { vals[j] = Utils.missingValue(); } } else { double max = -Double.MAX_VALUE; double maxIndex = -1; for (int i = 0; i < nominalDists[j].length; i++) { if (nominalDists[j][i] > max) { max = nominalDists[j][i]; maxIndex = i; } if (max < weightMissing[j]) { vals[j] = Utils.missingValue(); } else { vals[j] = maxIndex; } } } } if (m_DistanceFunction instanceof ManhattanDistance) { // Need to replace means by medians Instances sortedMembers = null; int middle = (members.numInstances() - 1) / 2; boolean dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } for (int j = 0; j < members.numAttributes(); j++) { if ((weightNonMissing[j] > 0) && members.attribute(j).isNumeric()) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } } } if (updateClusterInfo) { for (int j = 0; j < members.numAttributes(); j++) { m_ClusterMissingCounts[centroidIndex][j] = weightMissing[j]; m_ClusterNominalCounts[centroidIndex][j] = nominalDists[j]; } } if (addToCentroidInstances) { m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } return vals; }
From source file:clustering.myAgnes.java
@Override public void buildClusterer(Instances data) throws Exception { for (int i = 0; i < data.numInstances(); i++) { ArrayList<Instance> instance = new ArrayList<Instance>(); instance.add(data.instance(i));/* w w w . j a va2s. com*/ clusters.add(instance); } distanceFunction.setInstances(data); for (int i = 0; i < clusters.size(); i++) { ArrayList<Double> distances = new ArrayList<Double>(); for (int j = 0; j < clusters.size(); j++) { Double distance = distanceFunction.distance(data.instance(i), data.instance(j)); distances.add(distance); } clusterDistances.add(distances); } while (clusters.size() > numberOfClusters()) { int c1 = -1; int c2 = -1; double min = Double.MAX_VALUE; for (int i = 0; i < clusters.size(); i++) { for (int j = i + 1; j < clusters.size(); j++) { if (clusterDistances.get(i).get(j) <= min) { min = clusterDistances.get(i).get(j); c1 = i; c2 = j; } } } merge(c1, c2); } }