List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:milk.classifiers.MIBoost.java
License:Open Source License
/** * Builds the classifier//www . j a v a2 s .c o m * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars exps) throws Exception { Exemplars train = new Exemplars(exps); if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); m_NumIterations = m_MaxIterations; if (m_NumClasses > 2) { throw new Exception("Not yet prepared to deal with multiple classes!"); } if (m_Classifier == null) throw new Exception("A base classifier has not been specified!"); if (!(m_Classifier instanceof WeightedInstancesHandler)) throw new Exception("Base classifier cannot handle weighted instances!"); m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations()); if (m_Debug) System.err.println("Base classifier: " + m_Classifier.getClass().getName()); m_Beta = new double[m_NumIterations]; m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double N = (double) train.numExemplars(), sumNi = 0; Instances data = new Instances(m_Attributes, 0);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); // Initialize weights for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); exi.setWeight(sumNi / N); Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy //insts.instance(j).setWeight(1.0); ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset); ins.setWeight(exi.weight() / ni); data.add(ins); } } // Assume the order of the instances are preserved in the Discretize filter if (m_DiscretizeBin > 0) { m_Filter = new Discretize(); m_Filter.setInputFormat(new Instances(data, 0)); m_Filter.setBins(m_DiscretizeBin); data = Filter.useFilter(data, m_Filter); } // Main algorithm int dataIdx; iterations: for (int m = 0; m < m_MaxIterations; m++) { if (m_Debug) System.err.println("\nIteration " + m); // Build a model m_Models[m].buildClassifier(data); // Prediction of each bag double[] err = new double[(int) N], weights = new double[(int) N]; boolean perfect = true, tooWrong = true; dataIdx = 0; for (int n = 0; n < N; n++) { Exemplar exn = train.exemplar(n); // Prediction of each instance and the predicted class distribution // of the bag double nn = (double) exn.getInstances().numInstances(); for (int p = 0; p < nn; p++) { Instance testIns = data.instance(dataIdx++); if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors err[n]++; } weights[n] = exn.weight(); err[n] /= nn; if (err[n] > 0.5) perfect = false; if (err[n] < 0.5) tooWrong = false; } if (perfect || tooWrong) { // No or 100% classification error, cannot find beta if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("No errors"); break iterations; } double[] x = new double[1]; x[0] = 0; double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; OptEng opt = new OptEng(); opt.setWeights(weights); opt.setErrs(err); //opt.setDebug(m_Debug); if (m_Debug) System.out.println("Start searching for c... "); x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println("Finished."); m_Beta[m] = x[0]; if (m_Debug) System.err.println("c = " + m_Beta[m]); // Stop if error too small or error too big and ignore this model if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) { if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("Errors out of range!"); break iterations; } // Update weights of data and class label of wfData dataIdx = 0; double totWeights = 0; for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0))); totWeights += exr.weight(); } if (m_Debug) System.err.println("Total weights = " + totWeights); for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); double num = (double) exr.getInstances().numInstances(); exr.setWeight(sumNi * exr.weight() / totWeights); //if(m_Debug) // System.err.print("\nExemplar "+r+"="+exr.weight()+": \t"); for (int s = 0; s < num; s++) { Instance inss = data.instance(dataIdx); inss.setWeight(exr.weight() / num); // if(m_Debug) // System.err.print("instance "+s+"="+inss.weight()+ // "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t"); if (Double.isNaN(inss.weight())) throw new Exception("instance " + s + " in bag " + r + " has weight NaN!"); dataIdx++; } //if(m_Debug) // System.err.println(); } } }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);/* w w w .ja v a2 s .c om*/ double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:milk.classifiers.MIWrapper.java
License:Open Source License
public Instances transform(Exemplars train) throws Exception { Instances data = new Instances(m_Attributes);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); double sumNi = 0, // Total number of instances N = train.numExemplars(); // Number of exemplars for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); // Initialize weights for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); // m_Prior[(int)exi.classValue()]++; Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset);//from ww w .jav a 2s .com ins.setWeight(sumNi / (N * ni)); //ins.setWeight(1); data.add(ins); } } return data; }
From source file:milk.classifiers.SimpleMI.java
License:Open Source License
public Instances transform(Exemplars train) throws Exception { Instances data = new Instances(m_Attributes);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); Instance template = new Instance(dataset.numAttributes()); template.setDataset(dataset);//from w ww. j av a 2 s .c o m double N = train.numExemplars(); // Number of exemplars for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); Instances insts = exi.getInstances(); int attIdx = 0; Instance newIns = new Instance(template); newIns.setDataset(dataset); for (int j = 0; j < insts.numAttributes(); j++) { if ((j == m_IdIndex) || (j == m_ClassIndex)) continue; double value; if (m_TransformMethod == 1) { value = insts.meanOrMode(j); } else { double[] minimax = minimax(insts, j); value = (minimax[0] + minimax[1]) / 2.0; } newIns.setValue(attIdx++, value); } newIns.setClassValue(exi.classValue()); data.add(newIns); } return data; }
From source file:milk.experiment.MIInstanceQuery.java
License:Open Source License
/** * Makes a database query to convert a table into a set of instances * * @param query the query to convert to instances * @return the instances contained in the result of the query * @exception Exception if an error occurs *//*from ww w. j a v a2s. com*/ public Instances retrieveInstances(String query) throws Exception { System.err.println("Executing query: " + query); connectToDatabase(); if (execute(query) == false) { throw new Exception("Query didn't produce results"); } ResultSet rs = getResultSet(); System.err.println("Getting metadata..."); ResultSetMetaData md = rs.getMetaData(); // Determine structure of the instances int numAttributes = md.getColumnCount(); int[] attributeTypes = new int[numAttributes]; Hashtable[] nominalIndexes = new Hashtable[numAttributes]; FastVector[] nominalStrings = new FastVector[numAttributes]; for (int i = 1; i <= numAttributes; i++) { switch (md.getColumnType(i)) { case Types.CHAR: case Types.VARCHAR: case Types.LONGVARCHAR: case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: //System.err.println("String --> nominal"); attributeTypes[i - 1] = Attribute.NOMINAL; nominalIndexes[i - 1] = new Hashtable(); nominalStrings[i - 1] = new FastVector(); break; case Types.BIT: ////System.err.println("boolean --> nominal"); attributeTypes[i - 1] = Attribute.NOMINAL; nominalIndexes[i - 1] = new Hashtable(); nominalIndexes[i - 1].put("false", new Double(0)); nominalIndexes[i - 1].put("true", new Double(1)); nominalStrings[i - 1] = new FastVector(); nominalStrings[i - 1].addElement("false"); nominalStrings[i - 1].addElement("true"); break; case Types.NUMERIC: case Types.DECIMAL: //System.err.println("BigDecimal --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.TINYINT: //System.err.println("byte --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.SMALLINT: //System.err.println("short --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.INTEGER: //System.err.println("int --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.BIGINT: //System.err.println("long --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.REAL: //System.err.println("float --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; case Types.FLOAT: case Types.DOUBLE: //System.err.println("double --> numeric"); attributeTypes[i - 1] = Attribute.NUMERIC; break; /*case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: //System.err.println("byte[] --> unsupported"); attributeTypes[i - 1] = Attribute.STRING; break; */ case Types.DATE: case Types.TIME: case Types.TIMESTAMP: attributeTypes[i - 1] = Attribute.DATE; break; default: //System.err.println("Unknown column type"); attributeTypes[i - 1] = Attribute.STRING; } } // Step through the tuples System.err.println("Creating instances..."); FastVector instances = new FastVector(); int rowCount = 0; while (rs.next()) { if (rowCount % 100 == 0) { System.err.print("read " + rowCount + " instances \r"); System.err.flush(); } double[] vals = new double[numAttributes]; for (int i = 1; i <= numAttributes; i++) { switch (md.getColumnType(i)) { case Types.CHAR: case Types.VARCHAR: case Types.LONGVARCHAR: case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: String str = rs.getString(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { Double index = (Double) nominalIndexes[i - 1].get(str); if (index == null) { index = new Double(nominalStrings[i - 1].size()); nominalIndexes[i - 1].put(str, index); nominalStrings[i - 1].addElement(str); } vals[i - 1] = index.doubleValue(); } break; case Types.BIT: boolean boo = rs.getBoolean(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (boo ? 1.0 : 0.0); } break; case Types.NUMERIC: case Types.DECIMAL: // BigDecimal bd = rs.getBigDecimal(i, 4); double dd = rs.getDouble(i); // Use the column precision instead of 4? if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { // newInst.setValue(i - 1, bd.doubleValue()); vals[i - 1] = dd; } break; case Types.TINYINT: byte by = rs.getByte(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) by; } break; case Types.SMALLINT: short sh = rs.getByte(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) sh; } break; case Types.INTEGER: int in = rs.getInt(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) in; } break; case Types.BIGINT: long lo = rs.getLong(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) lo; } break; case Types.REAL: float fl = rs.getFloat(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) fl; } break; case Types.FLOAT: case Types.DOUBLE: double dou = rs.getDouble(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { vals[i - 1] = (double) dou; } break; /*case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: */ case Types.DATE: case Types.TIME: case Types.TIMESTAMP: Date date = rs.getDate(i); if (rs.wasNull()) { vals[i - 1] = Instance.missingValue(); } else { // TODO: Do a value check here. vals[i - 1] = (double) date.getTime(); } break; default: vals[i - 1] = Instance.missingValue(); } } Instance newInst; if (m_CreateSparseData) { newInst = new SparseInstance(1.0, vals); } else { newInst = new Instance(1.0, vals); } instances.addElement(newInst); rowCount++; } //disconnectFromDatabase(); (perhaps other queries might be made) // Create the header and add the instances to the dataset System.err.println("Creating header..."); FastVector attribInfo = new FastVector(); for (int i = 0; i < numAttributes; i++) { String attribName = md.getColumnName(i + 1); switch (attributeTypes[i]) { case Attribute.NOMINAL: attribInfo.addElement(new Attribute(attribName, nominalStrings[i])); break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: attribInfo.addElement(new Attribute(attribName, (FastVector) null)); break; case Attribute.DATE: attribInfo.addElement(new Attribute(attribName, (String) null)); break; default: throw new Exception("Unknown attribute type"); } } Instances result = new Instances("QueryResult", attribInfo, instances.size()); for (int i = 0; i < instances.size(); i++) { result.add((Instance) instances.elementAt(i)); } rs.close(); return result; }
From source file:milk.experiment.MIInstancesResultListener.java
License:Open Source License
/** * Perform any postprocessing. When this method is called, it indicates * that no more results will be sent that need to be grouped together * in any way.//from w ww .j a v a 2s .c om * * @param rp the ResultProducer that generated the results * @exception Exception if an error occurs */ public void postProcess(MIResultProducer rp) throws Exception { if (m_RP != rp) { throw new Error("Unrecognized ResultProducer sending results!!"); } String[] keyNames = m_RP.getKeyNames(); String[] resultNames = m_RP.getResultNames(); FastVector attribInfo = new FastVector(); for (int i = 0; i < m_AttributeTypes.length; i++) { String attribName = "Unknown"; if (i < keyNames.length) { attribName = "Key_" + keyNames[i]; } else { attribName = resultNames[i - keyNames.length]; } switch (m_AttributeTypes[i]) { case Attribute.NOMINAL: if (m_NominalStrings[i].size() > 0) { attribInfo.addElement(new Attribute(attribName, m_NominalStrings[i])); } else { attribInfo.addElement(new Attribute(attribName, (FastVector) null)); } break; case Attribute.NUMERIC: attribInfo.addElement(new Attribute(attribName)); break; case Attribute.STRING: attribInfo.addElement(new Attribute(attribName, (FastVector) null)); break; default: throw new Exception("Unknown attribute type"); } } Instances result = new Instances("InstanceResultListener", attribInfo, m_Instances.size()); for (int i = 0; i < m_Instances.size(); i++) { result.add((Instance) m_Instances.elementAt(i)); } m_Out.println(new Instances(result, 0)); for (int i = 0; i < result.numInstances(); i++) { m_Out.println(result.instance(i)); } if (!(m_OutputFile == null) && !(m_OutputFile.getName().equals("-"))) { m_Out.close(); } }
From source file:ml.WekaBatteryPredictionExample.java
License:Open Source License
private static Instances loadDatasetFromTxt(String txtFile) throws IOException { ArrayList<Attribute> atts = new ArrayList<>(2); atts.add(new Attribute("time_charged", Attribute.NUMERIC)); atts.add(new Attribute("battery_lasted_time", Attribute.NUMERIC)); Instances data = new Instances("battery-prediction-training-set", atts, 0); data.setClassIndex(1);/*from w w w .j av a 2 s . c o m*/ File file = new File(txtFile); FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { String[] values = line.split(","); double[] newInst = new double[2]; newInst[0] = Double.valueOf(values[0]); newInst[1] = Double.valueOf(values[1]); data.add(new DenseInstance(1.0, newInst)); } br.close(); fr.close(); return data; }
From source file:mlflex.learners.WekaLearner.java
License:Open Source License
/** Creates Weka instances from ML-Flex collections. * * @param dependentVariableInstances ML-Flex collection of dataInstances * @return Weka instances//from w w w . ja v a 2 s .c o m * @throws Exception */ private static Instances GetEvaluationInstances(Predictions predictions) throws Exception { FastVector wekaAttributeVector = GetAttributeVector(predictions); Instances wekaInstances = new Instances("DataSet", wekaAttributeVector, predictions.Size()); wekaInstances.setClass((Attribute) wekaAttributeVector.elementAt(1)); for (Prediction prediction : predictions.GetAll()) wekaInstances.add(GetInstance(wekaInstances, wekaAttributeVector, prediction)); return wekaInstances; }
From source file:mlflex.WekaInMemoryLearner.java
License:Open Source License
/** Creates Weka instances from ML-Flex collections. * * * @param dependentVariableInstances Dependent variable data instances * @param attVector Vector of Weka attributes * @param instances ML-Flex collection of instances * @return Weka instances/*from w w w. j av a2 s .c o m*/ * @throws Exception */ public static Instances GetInstances(DataInstanceCollection dependentVariableInstances, FastVector attVector, DataInstanceCollection instances) throws Exception { Instances wekaInstances = new Instances("DataSet", attVector, instances.Size()); if (dependentVariableInstances != null) wekaInstances.setClass((Attribute) attVector.elementAt(attVector.size() - 1)); for (DataValues instance : instances) wekaInstances.add(GetInstance(wekaInstances, attVector, instance, dependentVariableInstances)); return wekaInstances; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * This is not your grandpa's E-M algorithm... it has multiple mini-steps, * but "The e1-m1-e2-m2-e3-m3-Algorithm" is a mouthful, so we just call it *-Means Clustering * {Pronounced "Any-means (necessary) clustering"} * @param D// w w w . jav a 2 s .co m * @param subclusters * @param maxK * @return score at the end of the process */ protected final double EMStep(List<ClusterPointPair> D, Collection<Riffle> subclusters, int maxK) { double ret = 0; // clear the pallette for (Riffle c : subclusters) { if (c.instances == null) { c.instances = c.getHeader(); } c.instances.clear(); c.cleanTallies(); } // Assign by X's to nearest clusters (Maximization step 1) for (ClusterPointPair cxp : D) { if (this.potentialNovels.contains(cxp.x)) { // could also be if cxp.c == null, but this is safer continue; // ignore the outliers for a moment } final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, cxp.x); // double ds[] = new double[nearestClusters.length]; // int foo = 0; // for(NearestClusterTuple gnarf : nearestClusters) { // ds[foo++] = gnarf.getDistance(); // } cxp.c = nearestClusters[0].getCluster(); nearestClusters[0].getCluster().instances.add(cxp.x); if (cxp.x.weight() > 0.99) { nearestClusters[0].getCluster().addLabeling((int) cxp.x.classValue(), cxp.x.weight()); } } // Find new radius (Expectation step) for (Riffle c : subclusters) { ret += c.recomputeAll(); } // Remove empty clusters to make room for splits (Expectation-ish) Iterator<Riffle> cIter = subclusters.iterator(); while (cIter.hasNext()) { Riffle rc = cIter.next(); if (rc.instances.size() < 1) { cIter.remove(); } } // Are we full? if (subclusters.size() < maxK) { // Fix bad clusters (Maximization step 2 - breaking up noisy clusters) Riffle sortedClusters[] = new Riffle[subclusters.size()]; int tmpIdx = 0; for (Riffle tmpRfl : subclusters) { if (tmpIdx >= sortedClusters.length) { break; } sortedClusters[tmpIdx] = tmpRfl; tmpIdx++; } Arrays.sort(sortedClusters, new Comparator<Riffle>() { @Override public int compare(Riffle first, Riffle second) { if (first == null) { return 1; } if (second == null) { return -1; } double[] votes1 = first.getVotes().clone(); double[] votes2 = second.getVotes().clone(); double total1 = weka.core.Utils.sum(votes1); double total2 = weka.core.Utils.sum(votes2); Arrays.sort(votes1); Arrays.sort(votes2); double pentultimate1 = 1e-16 + ((votes1.length > 1) ? votes1[votes1.length - 2] : 0); double pentultimate2 = 1e-16 + ((votes2.length > 1) ? votes2[votes2.length - 2] : 0); // this is equiv to purity - margin... yea... really... it's awesome... gotta love math... double score1 = (total1 > 0) ? first.size() * pentultimate1 / total1 : 0; double score2 = (total2 > 0) ? second.size() * pentultimate2 / total2 : 0; return Double.compare(score2, score1); } }); // end Anon sort for (int cIdx = 0; cIdx < sortedClusters.length && subclusters.size() < maxK; cIdx++) { Riffle splitMe = sortedClusters[cIdx]; if (splitMe.getPurity() > 0.9) { continue; } double[] votes = splitMe.getVotes(); final double totalVotes = weka.core.Utils.sum(votes); final double critVotes = 1.0 / (votes.length * 2); if (totalVotes < 2) { continue; } ArrayList<Riffle> splitSet = new ArrayList<>(votes.length); int numberOfNewClusters = 0; for (int lblIdx = 0; lblIdx < votes.length; ++lblIdx) { double labelVote = votes[lblIdx] / totalVotes; if (labelVote >= critVotes) { splitSet.add(this.createNewCluster(splitMe.toInstance())); numberOfNewClusters++; } else { splitSet.add(null); } } if (numberOfNewClusters < 2) { continue; } Instances extras = new Instances(splitMe.getHeader()); for (Instance x : splitMe.instances) { if (x.weight() > 0.999) { Riffle myHopefulCluster = splitSet.get((int) x.classValue()); if (myHopefulCluster != null) { myHopefulCluster.instances.add(x); myHopefulCluster.addLabeling((int) x.classValue(), x.weight()); } else { extras.add(x); } } else { extras.add(x); } } LinkedList<Riffle> goodSet = new LinkedList<>(); for (Riffle rfc : splitSet) { if (rfc == null) { continue; } rfc.recomputeAll(); goodSet.add(rfc); subclusters.add(rfc); } for (Instance x : extras) { final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(goodSet, x); nearestClusters[0].getCluster().instances.add(x); } subclusters.remove(splitMe); } } // The pentultimate Expectation step ret = 0; for (Riffle c : subclusters) { ret += c.recomputeAll(); } // See if any outliers should actually be consumed by a cluster now... (Maximization step 3) Iterator<Instance> xIter = potentialNovels.iterator(); while (xIter.hasNext()) { Instance xOut = xIter.next(); final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, xOut); if (nearestClusters == null || nearestClusters.length < 1) { continue; } Riffle c = nearestClusters[0].getCluster(); double d = nearestClusters[0].getDistance(); if (d > c.getRadius()) { // Welcome home wayward tuple! c.instances.add(xOut); xIter.remove(); } } // And the final Expectation step ret = 0; for (Riffle c : subclusters) { ret += c.recomputeAll(); } // return ret; }