List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:adams.gui.visualization.debug.objectexport.WekaInstancesExporter.java
License:Open Source License
/** * Performs the actual export./*from w ww . j a va 2s . co m*/ * * @param obj the object to export * @param file the file to export to * @return null if successful, otherwise error message */ @Override protected String doExport(Object obj, File file) { Instances data; Instance inst; try { if (obj instanceof Instances) { DataSink.write(file.getAbsolutePath(), (Instances) obj); return null; } else { inst = (Instance) obj; if (inst.dataset() != null) { data = new Instances(inst.dataset()); data.add((Instance) inst.copy()); DataSink.write(file.getAbsolutePath(), data); return null; } else { return "Instance has no dataset associated, cannot export as ARFF!"; } } } catch (Exception e) { return "Failed to write Instances to '" + file + "'!\n" + Utils.throwableToString(e); } }
From source file:adams.gui.visualization.instance.LoadDatasetDialog.java
License:Open Source License
/** * Returns the full dataset, can be null if none loaded. * * @return the full dataset/*from w w w . jav a 2 s. com*/ */ public Instances getDataset() { int index; Instances result; result = new Instances(m_Instances); if (m_ComboBoxSorting.getSelectedIndex() > 0) result.sort(m_ComboBoxSorting.getSelectedIndex() - 1); index = m_ComboBoxClass.getSelectedIndex(); if (index > -1) index--; result.setClassIndex(index); return result; }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Returns the a spreadsheet with the same header and comments. * * @return the spreadsheet//from w w w .j a va 2s. c o m */ @Override public Dataset getHeader() { Instances data; data = new Instances(m_Data); return new InstancesView(data); }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Returns a spreadsheet containing only the input columns, not class * columns.//from w ww. ja va 2s . c om * * @return the input features, null if data conists only of class columns */ @Override public SpreadSheet getInputs() { Instances data; if (m_Data.classIndex() == -1) return this; data = new Instances(m_Data); data.setClassIndex(-1); data.deleteAttributeAt(m_Data.classIndex()); return new InstancesView(data); }
From source file:adams.ml.data.InstancesView.java
License:Open Source License
/** * Returns a spreadsheet containing only output columns, i.e., the class * columns.//from w w w . j av a 2s. c om * * @return the output features, null if data has no class columns */ @Override public SpreadSheet getOutputs() { Instances data; Remove remove; if (m_Data.classIndex() == -1) return null; data = new Instances(m_Data); data.setClassIndex(-1); remove = new Remove(); remove.setAttributeIndicesArray(new int[] { m_Data.classIndex() }); remove.setInvertSelection(true); try { remove.setInputFormat(data); data = Filter.useFilter(data, remove); return new InstancesView(data); } catch (Exception e) { throw new IllegalStateException("Failed to apply Remove filter!", e); } }
From source file:adams.opt.optimise.genetic.fitnessfunctions.AbstractWEKAFitnessFunction.java
License:Open Source License
protected synchronized void init() { int classIndex = 0; if (!m_init) { FileReader reader;/*from w w w .ja v a 2 s . c om*/ try { reader = new FileReader(m_Dataset.getAbsolutePath()); m_Instances = new Instances(reader); reader.close(); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to read instances: " + m_Dataset, e); throw new IllegalStateException("Error loading dataset '" + m_Dataset + "': " + e); } // class index if (m_ClassIndex.equals("first")) classIndex = 0; else if (m_ClassIndex.equals("last")) classIndex = m_Instances.numAttributes() - 1; else classIndex = Integer.parseInt(m_ClassIndex); m_Instances.setClassIndex(classIndex); m_init = true; } }
From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java
License:Open Source License
public double evaluate(OptData opd) { init();/*from w w w . ja v a 2s . c o m*/ int cnt = 0; int[] weights = getWeights(opd); Instances newInstances = new Instances(getInstances()); for (int i = 0; i < getInstances().numInstances(); i++) { Instance in = newInstances.instance(i); cnt = 0; for (int a = 0; a < getInstances().numAttributes(); a++) { if (a == getInstances().classIndex()) continue; if (weights[cnt++] == 0) { in.setValue(a, 0); } else { in.setValue(a, in.value(a)); } } } Classifier newClassifier = null; try { newClassifier = (Classifier) OptionUtils.shallowCopy(getClassifier()); // evaluate classifier on data Evaluation evaluation = new Evaluation(newInstances); evaluation.crossValidateModel(newClassifier, newInstances, getFolds(), new Random(getCrossValidationSeed())); // obtain measure double measure = 0; if (getMeasure() == Measure.ACC) measure = evaluation.pctCorrect(); else if (getMeasure() == Measure.CC) measure = evaluation.correlationCoefficient(); else if (getMeasure() == Measure.MAE) measure = evaluation.meanAbsoluteError(); else if (getMeasure() == Measure.RAE) measure = evaluation.relativeAbsoluteError(); else if (getMeasure() == Measure.RMSE) measure = evaluation.rootMeanSquaredError(); else if (getMeasure() == Measure.RRSE) measure = evaluation.rootRelativeSquaredError(); else throw new IllegalStateException("Unhandled measure '" + getMeasure() + "'!"); measure = getMeasure().adjust(measure); return (measure); // process fitness } catch (Exception e) { getLogger().log(Level.SEVERE, "Error evaluating", e); } return 0; }
From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java
License:Open Source License
/** * Callback for best measure so far/*w w w. j a va2 s .c o m*/ */ @Override public void newBest(double val, OptData opd) { int cnt = 0; int[] weights = getWeights(opd); Instances newInstances = new Instances(getInstances()); for (int i = 0; i < getInstances().numInstances(); i++) { Instance in = newInstances.instance(i); cnt = 0; for (int a = 0; a < getInstances().numAttributes(); a++) { if (a == getInstances().classIndex()) continue; if (weights[cnt++] == 0) { in.setValue(a, 0); } else { in.setValue(a, in.value(a)); } } } try { File file = new File(getOutputDirectory().getAbsolutePath() + File.separator + Double.toString(getMeasure().adjust(val)) + ".arff"); file.createNewFile(); Writer writer = new BufferedWriter(new FileWriter(file)); Instances header = new Instances(newInstances, 0); // remove filter setup Remove remove = new Remove(); remove.setAttributeIndices(getRemoveAsString(weights)); remove.setInvertSelection(true); header.setRelationName(OptionUtils.getCommandLine(remove)); writer.write(header.toString()); writer.write("\n"); for (int i = 0; i < newInstances.numInstances(); i++) { writer.write(newInstances.instance(i).toString()); writer.write("\n"); } writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer * that are not being set via options.//from w w w . j a v a 2 s. c o m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); //long start = System.currentTimeMillis(); m_ReplaceMissingFilter = new ReplaceMissingValues(); // Missing values replacement is not required so this modification is made /*m_ReplaceMissingFilter.setInputFormat(data); m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);*/ Instances m_instances = new Instances(data); // To use semantic measurers through DistanceFunction interface m_DistanceFunction.setInstances(m_instances); initMinMax(m_instances); m_ClusterCentroids = new Instances(m_instances, m_NumClusters); int n = m_instances.numInstances(); Random r = new Random(getSeed()); boolean[] selected = new boolean[n]; double[] minDistance = new double[n]; for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE; int firstI = r.nextInt(n); m_ClusterCentroids.add(m_instances.instance(firstI)); selected[firstI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI)); if (m_NumClusters > n) m_NumClusters = n; for (int i = 1; i < m_NumClusters; i++) { int nextI = farthestAway(minDistance, selected); m_ClusterCentroids.add(m_instances.instance(nextI)); selected[nextI] = true; updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI)); } m_instances = new Instances(m_instances, 0); //long end = System.currentTimeMillis(); //System.out.println("Clustering Time = " + (end-start)); // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options.//from w w w .ja va2 s.c o m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }