List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:LogReg.Logistic.java
License:Open Source License
/** * Computes the distribution for a given instance * * @param instance the instance for which distribution is computed * @return the distribution/*from ww w .j av a 2s. c o m*/ * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { m_ReplaceMissingValues.input(instance); instance = m_ReplaceMissingValues.output(); m_AttFilter.input(instance); instance = m_AttFilter.output(); m_NominalToBinary.input(instance); instance = m_NominalToBinary.output(); // Extract the predictor columns into an array double[] instDat = new double[m_NumPredictors + 1]; int j = 1; instDat[0] = 1; for (int k = 0; k <= m_NumPredictors; k++) { if (k != m_ClassIndex) { instDat[j++] = instance.value(k); } } double[] distribution = evaluateProbability(instDat); return distribution; }
From source file:lu.lippmann.cdb.common.gui.dataset.InstanceFormatter.java
License:Open Source License
public static String htmlFormat(final Instance inst, final boolean withHTMLHeader) { final StringBuilder sb = new StringBuilder(); if (withHTMLHeader) sb.append("<html><body>"); for (int i = 0; i < inst.numAttributes(); i++) { sb.append(StringEscapeUtils.escapeHtml(inst.attribute(i).name())).append(" = "); sb.append("<b>"); if (inst.attribute(i).isNominal() || inst.attribute(i).isString()) { sb.append(StringEscapeUtils.escapeHtml(inst.stringValue(i))); } else if (inst.attribute(i).isDate()) { final Calendar cal = Calendar.getInstance(); cal.setTimeInMillis((long) inst.value(i)); sb.append(FormatterUtil.DATE_FORMAT.format(cal.getTime())); } else if (inst.attribute(i).isNumeric()) { sb.append(inst.value(i));/* w w w . jav a 2s. co m*/ } sb.append("</b>"); sb.append("<br/>"); } if (withHTMLHeader) sb.append("</body></html>"); return sb.toString(); }
From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java
License:Open Source License
private static void fillWithSingleAxis(final Instances dataSet, final int dateIdx, final TimeSeriesCollection tsDataset) { final int numInstances = dataSet.numInstances(); final Calendar cal = Calendar.getInstance(); for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) { if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) { System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name() + "', so skip it!"); continue; }// ww w. j a va2 s. co m final TimeSeries ts = new TimeSeries(dataSet.attribute(i).name()); for (int k = 0; k < numInstances; k++) { final Instance instancek = dataSet.instance(k); final long timeInMilliSec = (long) instancek.value(dateIdx); cal.setTimeInMillis(timeInMilliSec); if (instancek.isMissing(i)) { ts.addOrUpdate(new Millisecond(cal.getTime()), null); } else { ts.addOrUpdate(new Millisecond(cal.getTime()), instancek.value(i)); } } if (!ts.isEmpty()) tsDataset.addSeries(ts); } }
From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java
License:Open Source License
private static void fillWithSingleAxisInterval(final Instances dataSet, final int dateIdx, final YIntervalSeriesCollection tsDataset, final double deviation, final int deviatedAttrIdx) { final int numInstances = dataSet.numInstances(); for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) { if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) { System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name() + "', so skip it!"); continue; }//from w ww. ja va 2s. c o m final YIntervalSeries ts = new YIntervalSeries(dataSet.attribute(i).name()); for (int k = 0; k < numInstances; k++) { final Instance instancek = dataSet.instance(k); final long timeInMilliSec = (long) instancek.value(dateIdx); if (instancek.isMissing(i)) { //ts.add(timeInMilliSec,null,0d,0d); } else { if (i == deviatedAttrIdx && k > 0 && k < (numInstances - 1)) { System.out.println(numInstances + " " + k + " " + instancek.value(i) + " " + (instancek.value(i) - deviation) + " " + (instancek.value(i) + deviation)); ts.add(timeInMilliSec, instancek.value(i), instancek.value(i) - deviation, instancek.value(i) + deviation); } else { ts.add(timeInMilliSec, instancek.value(i), instancek.value(i), instancek.value(i)); } //System.out.println(instancek.value(i)+" "+(instancek.value(i)-deviation)+" "+(instancek.value(i)+deviation)); } } if (!ts.isEmpty()) tsDataset.addSeries(ts); } }
From source file:lu.lippmann.cdb.ext.hydviga.ui.GapFillingFrame.java
License:Open Source License
private Instances buildCorrectedDataset(final Instances diff) { //System.out.println("Build a corrected dataset ..."); final Instances correctedDataSet = new Instances(dataSet); final int corrNumInstances = correctedDataSet.numInstances(); final int diffNumInstances = diff.numInstances(); final int diffNumAttributes = diff.numAttributes(); final int idxInDiff = 0; for (int k = 0; k < diffNumInstances; k++) { final Instance diffInstanceK = diff.instance(k); if (diffInstanceK.isMissing(idxInDiff)) continue; final long timestamp = (long) diffInstanceK.value(diffNumAttributes - 1); for (int h = 0; h < corrNumInstances; h++) { if ((long) correctedDataSet.instance(h).value(dateIdx) == timestamp) { correctedDataSet.instance(h).setValue(attr, diffInstanceK.value(idxInDiff)); break; }/*w w w .j a va 2 s . co m*/ } } //System.out.println("... corrected dataset built!"); return correctedDataSet; }
From source file:lu.lippmann.cdb.ext.hydviga.ui.GapsTabView.java
License:Open Source License
/** * {@inheritDoc}/* w w w.j av a 2 s . com*/ */ @Override public void update0(final Instances dataSet) throws Exception { if (this.dateAttributeField != null) { this.jxp.remove(this.dateAttributeField); this.dateAttributeField = null; this.jxp.updateUI(); } final java.util.List<String> dateAttributeNames = WekaDataStatsUtil.getDateAttributeNames(dataSet); final boolean hasDateAttributes = (!dateAttributeNames.isEmpty()) /*&&(WekaDataStatsUtil.getNumericAttributesIndexes(dataSet).size()>0)*/; if (hasDateAttributes) { this.dateAttributeField = new JXComboBox(dateAttributeNames.toArray()); this.dateAttributeField.setBorder(new TitledBorder("Date attribute")); this.jxp.add(this.dateAttributeField, BorderLayout.SOUTH); this.dateAttributeField.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { fillTabs(dataSet); } }); new AbstractSimpleAsync<Void>(true) { @Override public Void execute() throws Exception { fillTabs(dataSet); return null; } @Override public void onSuccess(Void result) { } @Override public void onFailure(Throwable caught) { caught.printStackTrace(); } }.start(); this.actionsForFictiveGapsPanel.removeAll(); //final JComboBox seriesWithoutGapCB=new JComboBox(WekaTimeSeriesUtil.getNamesOfAttributesWithoutGap(dataSet).toArray()); final JComboBox seriesWithoutGapCB = new JComboBox( WekaDataStatsUtil.getAttributeNames(dataSet).toArray()); seriesWithoutGapCB.setBorder(new TitledBorder("Fictive gap in")); this.actionsForFictiveGapsPanel.add(seriesWithoutGapCB); final JComboBox sizeGapCB = new JComboBox(new Object[] { 10, 50, 100, 200, 400, 500 }); sizeGapCB.setBorder(new TitledBorder("Size of the fictive gap")); this.actionsForFictiveGapsPanel.add(sizeGapCB); final Object[] partChoice = new Object[PARTS_COUNT]; for (int iii = 0; iii < PARTS_COUNT; iii++) { partChoice[iii] = iii + "/" + PARTS_COUNT; } final JComboBox positionGapCB = new JComboBox(partChoice); positionGapCB.setBorder(new TitledBorder("Position of the fictive gap")); this.actionsForFictiveGapsPanel.add(positionGapCB); this.fictiveGapButton = new JXButton("Create a fictive gap"); this.actionsForFictiveGapsPanel.add(this.fictiveGapButton); this.fictiveGapButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { /* create a fake gap */ final Attribute attr = dataSet.attribute(seriesWithoutGapCB.getSelectedItem().toString()); final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); final int position = ((positionGapCB.getSelectedIndex() + 1) * dataSet.numInstances()) / PARTS_COUNT; final int gapsize = Integer.valueOf(sizeGapCB.getSelectedItem().toString()); /* show it */ final GapFillingFrame jxf = new GapFillingFrame(getAbstractTabView(), new Instances(dataSet), attr, dateIdx, GapsUtil.getCountOfValuesBeforeAndAfter(gapsize), position, gapsize, gcp, false); //jxf.setSize(new Dimension(900,700)); //jxf.setExtendedState(Frame.MAXIMIZED_BOTH); jxf.setLocationRelativeTo(jxp); jxf.setVisible(true); //jxf.setResizable(false); } }); this.showKnowledgeDBButton = new JXButton("Show KDB"); this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBButton); this.showKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final DatasetView view = new DatasetView("KnowledgeDB", eventPublisher, commandDispatcher, applicationContext); view.setDataSet(GapFillingKnowledgeDB.getKnowledgeDB()).setAsVisible(true); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.inspectKnowledgeDBButton = new JXButton("Inspect KDB"); this.actionsForFictiveGapsPanel.add(this.inspectKnowledgeDBButton); this.inspectKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); new GapFillingKnowledgeDBExplorerFrame(dataSet, dateIdx, gcp); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.showKnowledgeDBWithTrueCasesButton = new JXButton("Show KDB with true cases"); this.actionsForFictiveGapsPanel.add(this.showKnowledgeDBWithTrueCasesButton); this.showKnowledgeDBWithTrueCasesButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { try { final StringBuilder sb = new StringBuilder("@relation blabla\n"); sb.append("@attribute serieName string\n"); sb.append("@attribute serieX numeric\n"); sb.append("@attribute serieY numeric\n"); sb.append("@attribute year numeric\n"); sb.append("@attribute season {Winter,Spring,Summer,Autumn}\n"); sb.append("@attribute gapSize numeric\n"); sb.append("@attribute gapPosition numeric\n"); sb.append("@attribute isDuringRising {true,false}\n"); sb.append("@attribute flow string\n"); sb.append("@attribute hasDownstream {false,true}\n"); sb.append("@attribute hasUpstream {false,true}\n"); sb.append("@attribute isReal {false,true}\n"); sb.append( "@attribute algo {Interpolation,EM,REG,REPTREE,M5P,ZeroR,ANN,NEARESTNEIGHBOUR}\n"); sb.append("@attribute useDiscretizedTime {false,true}\n"); sb.append("@attribute useMostSimilar {false,true}\n"); sb.append("@attribute useNearest {true,false}\n"); sb.append("@attribute useDownstream {false,true}\n"); sb.append("@attribute useUpstream {true,false}\n"); sb.append("@attribute mae numeric\n"); sb.append("@attribute rmse numeric\n"); sb.append("@attribute rsr numeric\n"); sb.append("@attribute pbias numeric\n"); sb.append("@attribute ns numeric\n"); sb.append("@attribute ioa numeric\n"); sb.append("@attribute wasTheBestSolution {true,false}\n"); sb.append("@data\n"); /* true cases */ final Calendar cal = Calendar.getInstance(); final int dateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(dataSet); final Instances gapsDescriptionsDataset = GapsUtil.buildGapsDescription(gcp, dataSet, dateIdx); final int gddc = gapsDescriptionsDataset.numInstances(); for (int i = 0; i < gddc; i++) { final Instance trueCase = gapsDescriptionsDataset.instance(i); sb.append(trueCase.stringValue(0)); // serie sb.append(","); sb.append(gcp.getCoordinates(trueCase.stringValue(0))[0]); // x sb.append(","); sb.append(gcp.getCoordinates(trueCase.stringValue(0))[1]); // y sb.append(","); cal.setTime(FormatterUtil.DATE_FORMAT.parse(trueCase.stringValue(1))); // year sb.append(cal.get(Calendar.YEAR)); sb.append(","); sb.append(trueCase.stringValue(2).split("/")[0]); // season sb.append(","); sb.append(trueCase.value(4)); //gapsize sb.append(","); sb.append(trueCase.value(5)); //gap position sb.append(","); sb.append(trueCase.stringValue(10).equals("true")); //rising sb.append(","); sb.append(trueCase.stringValue(11)); // flow sb.append(","); sb.append(!trueCase.stringValue(9).equals("n/a")); //downstream sb.append(","); sb.append(!trueCase.stringValue(8).equals("n/a")); // upstream sb.append(","); sb.append("true"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append(","); sb.append("?"); sb.append("\n"); } /* the simulated cases from the knowledge DB */ final Instances knowledgeDB = GapFillingKnowledgeDB.getKnowledgeDB(); final int kni = knowledgeDB.numInstances(); for (int i = 0; i < kni; i++) { final Instance simulatedCase = knowledgeDB.instance(i); sb.append(simulatedCase.stringValue(0)); // name sb.append(","); sb.append(simulatedCase.value(1)); //x sb.append(","); sb.append(simulatedCase.value(2)); //y sb.append(","); sb.append(simulatedCase.value(6)); //year sb.append(","); sb.append(simulatedCase.stringValue(5)); //season sb.append(","); sb.append(simulatedCase.value(3)); // size sb.append(","); sb.append(simulatedCase.value(4)); // position sb.append(","); sb.append(simulatedCase.stringValue(7)); // rising sb.append(","); sb.append(simulatedCase.stringValue(8)); //flow sb.append(","); sb.append(simulatedCase.stringValue(9)); //downstream sb.append(","); sb.append(simulatedCase.stringValue(10)); // upstream sb.append(","); sb.append("false"); // real sb.append(","); sb.append(simulatedCase.stringValue(11)); //algo sb.append(","); sb.append(simulatedCase.stringValue(12)); // discr time sb.append(","); sb.append(simulatedCase.stringValue(13)); // most similar sb.append(","); sb.append(simulatedCase.stringValue(14)); // nearest sb.append(","); sb.append(simulatedCase.stringValue(15)); //downstream sb.append(","); sb.append(simulatedCase.stringValue(16)); //upstream sb.append(","); sb.append(simulatedCase.value(17)); //mae sb.append(","); sb.append(simulatedCase.value(18)); //rmse sb.append(","); sb.append(simulatedCase.value(19)); //rsr sb.append(","); sb.append(simulatedCase.value(20)); //pbias sb.append(","); sb.append(simulatedCase.value(21)); //ns sb.append(","); sb.append(simulatedCase.value(22)); //ioa sb.append(","); sb.append(simulatedCase.stringValue(23)); // best sb.append("\n"); } //System.out.println(sb.toString()); final Instances newds = WekaDataAccessUtil.loadInstancesFromARFFString(sb.toString(), false, false); final DatasetView view = new DatasetView("KnowledgeDB with true cases", eventPublisher, commandDispatcher, applicationContext); view.setDataSet(newds).setAsVisible(true); } catch (final Exception ee) { ee.printStackTrace(); } } }); this.rebuildKnowledgeDBButton = new JXButton("Rebuild KDB"); this.actionsForFictiveGapsPanel.add(this.rebuildKnowledgeDBButton); this.rebuildKnowledgeDBButton.addActionListener(new ActionListener() { @Override public void actionPerformed(final ActionEvent e) { rebuildKnowledgeDB(dataSet); } }); } else { throw new Exception("No date attributes in the dataset."); } }
From source file:lu.lippmann.cdb.ext.hydviga.util.TransformTimeSeries.java
License:Open Source License
/** * Main method./*www . j av a 2 s .c o m*/ * @param args command line arguments */ public static final void main(final String[] args) { try { final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File("." + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2.arff")); System.out.println(dataSet.toSummaryString()); final int numAttributes = dataSet.numAttributes(); final int numInstances = dataSet.numInstances(); for (int i = 0; i < numAttributes; i++) { final int i_bis = (int) (Math.random() * (double) (numAttributes - 3)); final int i_tri = (int) (Math.random() * (double) (numAttributes - 3)); for (int j = 0; j < numInstances; j++) { final Instance instance_j = dataSet.instance(j); if (instance_j.isMissing(i)) continue; if (instance_j.isMissing(i_bis)) continue; if (instance_j.isMissing(i_tri)) continue; final double iValue = instance_j.value(i); final double iBisValue = instance_j.value(i_bis); final double iTriValue = instance_j.value(i_tri); instance_j.setValue(i, (iValue + iBisValue + iTriValue)); } } WekaDataAccessUtil.saveInstancesIntoARFFFile(dataSet, new File("." + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2_fake.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /*from w w w.j a v a2 s .com*/ * @return * @throws Exception */ private void computeMatrixMDF() throws Exception { //Build filtered instance for each element of the base int baseSize = base.size(); Attribute baseAttribute = instances.attribute(baseIndex); //Create baseSize copy of instances for filtering List<Instances> filteredInstances = new ArrayList<Instances>(); for (int i = 0; i < baseSize; i++) { filteredInstances.add(new Instances(instances)); } //Filter for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); for (final TupleSI j : base) { final int wekaAttributeValue = (int) instance.value(baseIndex); if (!new TupleSI(baseAttribute.value(wekaAttributeValue), baseIndex).equals(j)) { WekaUtil2.removeFromInstances(filteredInstances.get(base.indexOf(j)), instance); } } } //Compute I vector final int idxsNs = idxsN.size(); if (idxsNs == 0) { throw new Exception("You need at least one numerical attribute !!"); } int minIndexForI = -1; double minValueForI = Double.MAX_VALUE; final double[][] meanBase = new double[idxsNs][baseSize]; int p = 0; for (final Integer num : idxsN) { double Ip = 0.0; for (int j = 0; j < baseSize; j++) { final List<Instance> filtredInstance = filteredInstances.get(j); final int fs = filtredInstance.size(); double mean = 0; for (int l = 0; l < fs; l++) { mean += filtredInstance.get(l).value(num); } mean = mean / fs; meanBase[p][j] = mean; for (int l = 0; l < fs; l++) { Ip += Math.pow(filtredInstance.get(l).value(num) - mean, 2); } } if (Ip < minValueForI) { minValueForI = Ip; minIndexForI = p; } p++; } this.maxNum = new double[idxsNs]; for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); //Save maximum value for each numerical attribute for (Integer n1 : idxsN) { double val = instance.value(n1); int idx = idxsN.indexOf(n1); if (val > maxNum[idx]) { maxNum[idx] = val; } } //Compute matrix M for each categorical attribute for (final Integer e1 : idxsC) { for (final Integer e2 : idxsC) { final int i1 = getIndexOf(e1, instance.attribute(e1).value((int) instance.value(e1))); final int j1 = getIndexOf(e2, instance.attribute(e2).value((int) instance.value(e2))); M[i1][j1] = M[i1][j1] + 1; } } } //Compute D matrix for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { double d = M[i][j] / (M[i][i] + M[j][j] - M[i][j] + 0.0); if (d >= theta) { D[i][j] = d; } else { D[i][j] = 0; } } } //Compute F matrix for base for (final TupleSI baseVal : base) { F.put(baseVal, meanBase[minIndexForI][base.indexOf(baseVal)]); } //Compute F matrix for noBase for (final TupleSI noBaseVal : noBase) { double f = 0.0; for (final TupleSI baseVal : base) { f += D[getIndexOf(noBaseVal)][getIndexOf(baseVal)] * F.get(baseVal); } F.put(noBaseVal, f); } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * /* w w w . j ava 2 s .co m*/ * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used./* w ww . ja v a 2 s .c o m*/ * @return the centroid */ public static MixedCentroid computeMixedCentroid(final boolean preserveOrder, final NormalizableDistance distanceFunction, final Instances numericInstances, final Instances originalInstances, final int clusterIndex) { final int numInstances = numericInstances.numInstances(); final int numAttributes = numericInstances.numAttributes(); final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>(); if (numInstances == 1) { Instance uniqueNumInstance = numericInstances.firstInstance(); Instance uniqueMixInstance = originalInstances.firstInstance(); double[] centroid = uniqueNumInstance.toDoubleArray(); for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) { if (!uniqueMixInstance.attribute(i).isNumeric()) { final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i)); addedAttr.put(new TupleSI(catVal, i), 1); } } return new MixedCentroid(clusterIndex, centroid, addedAttr); } final double[] vals = new double[numAttributes]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance); final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance); if (isManhattanDist) { middle = (numInstances - 1) / 2; dataIsEven = ((numInstances % 2) == 0); if (preserveOrder) { sortedMembers = numericInstances; } else { sortedMembers = new Instances(numericInstances); } } for (int j = 0; j < numAttributes; j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (isEuclideanDist) { vals[j] = numericInstances.meanOrMode(j); for (int i = 0; i < numInstances; i++) { if (!originalInstances.attribute(j).isNumeric()) { final Instance instance = originalInstances.instance(i); final String catVal = instance.attribute(j).value((int) instance.value(j)); //Initialize map final TupleSI key = new TupleSI(catVal, j); if (!addedAttr.containsKey(key)) addedAttr.put(key, 0); addedAttr.put(key, addedAttr.get(key) + 1); } } } else if (isManhattanDist) { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } else { throw new IllegalStateException("Not handled distance ..."); } } return new MixedCentroid(clusterIndex, vals, addedAttr); }