List of usage examples for weka.core Instances Instances
public Instances(Instances dataset, int capacity)
From source file:PrincipalComponents.java
License:Open Source License
/** * Return the header of the training data after all filtering - i.e missing * values and nominal to binary.//from w w w. j a v a 2s . co m * * @return the header of the training data after all filtering. */ public Instances getFilteredInputFormat() { return new Instances(m_trainInstances, 0); }
From source file:WekaRegressor.java
License:Open Source License
public WekaRegressor(WekaRegressor toCopy) { this.wekaClassifier = OtherUtils.serializationCopy(toCopy.wekaClassifier); if (toCopy.wekaDataSet != null) this.wekaDataSet = OtherUtils.serializationCopy(new Instances(toCopy.wekaDataSet, 0)); }
From source file:WekaRegressor.java
License:Open Source License
@Override public void train(RegressionDataSet dataSet) { try {/* w w w.jav a 2 s. c o m*/ Instances instances = InstanceHandler.dataSetToInstances(dataSet); this.wekaDataSet = OtherUtils.serializationCopy(new Instances(instances, 0)); wekaClassifier.buildClassifier(instances); } catch (Exception ex) { throw new FailedToFitException(ex); } }
From source file:GrowTree.java
Attribute bestSplit(Instances D) { double imin = 1.0; Attribute fbest = null;/*from www . j av a2 s . c o m*/ Enumeration enat = D.enumerateAttributes(); while (enat.hasMoreElements()) { Attribute a = (Attribute) enat.nextElement(); //split D into subsets d1 to dn based on values vi based on features Instances[] split = new Instances[a.numValues()]; for (int i = 0; i < a.numValues(); i++) { split[i] = new Instances(D, D.numInstances()); } Enumeration x = D.enumerateInstances(); while (x.hasMoreElements()) { Instance in = (Instance) x.nextElement(); split[(int) in.value(a)].add(in); } for (int i = 0; i < split.length; i++) { split[i].compactify(); } for (int i = 0; i < a.numValues(); i++) { if (imp(split[i]) < imin) { imin = imp(split[i]); fbest = a; //evaluate the best feature to make root } } } return fbest; }
From source file:ArffLoader.java
License:Open Source License
/** * Determines and returns (if possible) the structure (internally the header) * of the data set as an empty set of instances. * //w ww . j av a 2s . c om * @return the structure of the data set as an empty set of Instances * @throws IOException if an error occurs */ public Instances getStructure() throws IOException { if (m_structure == null) { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } try { m_ArffReader = new ArffReader(m_sourceReader, 1); m_structure = m_ArffReader.getStructure(); } catch (Exception ex) { throw new IOException("Unable to determine structure as arff (Reason: " + ex.toString() + ")."); } } return new Instances(m_structure, 0); }
From source file:ArffLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by a * call to getStructure then method should do so before processing the rest of * the data set.//from ww w . j a va2 s . co m * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { Instances insts = null; try { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } // Read all instances Instance inst; insts = new Instances(m_structure, 0); while ((inst = m_ArffReader.readInstance(m_structure)) != null) { insts.add(inst); } // Instances readIn = new Instances(m_structure); } finally { if (m_sourceReader != null) { // close the stream m_sourceReader.close(); } } return insts; }
From source file:WrapperSubset.java
License:Open Source License
@Override public int[] postProcess(int[] attributeSet) { // save memory m_trainInstances = new Instances(m_trainInstances, 0); return attributeSet; }
From source file:ID3Chi.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data/*ww w . jav a 2 s .com*/ * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { // [att.numValues()] is location for "unknown" values Instances[] subset = new Instances[att.numValues() + 1]; for (int j = 0; j <= att.numValues(); j++) { subset[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { subset[att.numValues()].add(inst); } else { subset[(int) inst.value(att)].add(inst); } } for (int i = 0; i < subset.length; i++) { subset[i].compactify(); } return subset; }
From source file:MPCKMeans.java
License:Open Source License
/** * Generates a clusterer. Instances in data have to be * either all sparse or all non-sparse//from w w w. j a v a2s .co m * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been * generated successfully */ public void buildClusterer(Instances data) throws Exception { System.out.println("ML weight=" + m_MLweight); System.out.println("CL weight= " + m_CLweight); System.out.println("LOG term weight=" + m_logTermWeight); System.out.println("Regularizer weight= " + m_regularizerTermWeight); m_RandomNumberGenerator = new Random(m_RandomSeed); if (m_metric instanceof OfflineLearnableMetric) { m_isOfflineMetric = true; } else { m_isOfflineMetric = false; } // Don't rebuild the metric if it was already trained if (!m_metricBuilt) { m_metric.buildMetric(data); m_metricBuilt = true; m_metricLearner.setMetric(m_metric); m_metricLearner.setClusterer(this); m_metrics = new LearnableMetric[m_NumClusters]; m_metricLearners = new MPCKMeansMetricLearner[m_NumClusters]; for (int i = 0; i < m_metrics.length; i++) { if (m_useMultipleMetrics) { m_metrics[i] = (LearnableMetric) m_metric.clone(); m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone(); m_metricLearners[i].setMetric(m_metrics[i]); m_metricLearners[i].setClusterer(this); } else { m_metrics[i] = m_metric; m_metricLearners[i] = m_metricLearner; } } } setInstances(data); m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); m_ClusterAssignments = new int[m_Instances.numInstances()]; if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n"); } m_ClusterCentroids = m_Initializer.initialize(); // if all instances are smoothed by the metric, the centroids // need to be smoothed too (note that this is independent of // centroid smoothing performed by K-Means) if (m_metric instanceof InstanceConverter) { System.out.println("Converting centroids..."); Instances convertedCentroids = new Instances(m_ClusterCentroids, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance centroid = m_ClusterCentroids.instance(i); convertedCentroids.add(((InstanceConverter) m_metric).convertInstance(centroid)); } m_ClusterCentroids.delete(); for (int i = 0; i < convertedCentroids.numInstances(); i++) { m_ClusterCentroids.add(convertedCentroids.instance(i)); } } System.out.println("Done initializing clustering ..."); getIndexClusters(); if (m_verbose && m_Seedable) { printIndexClusters(); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i)); } } // Some extra work for smoothing metrics if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) { SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric; Instances smoothedCentroids = new Instances(m_Instances, m_NumClusters); for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) { Instance smoothedCentroid = smoothingMetric.smoothInstance(m_ClusterCentroids.instance(i)); smoothedCentroids.add(smoothedCentroid); } m_ClusterCentroids = smoothedCentroids; updateSmoothingMetrics(); } runKMeans(); }
From source file:MPCKMeans.java
License:Open Source License
/** M-step of the KMeans clustering algorithm -- updates cluster centroids *//* w w w. j a v a 2 s. c om*/ protected void updateClusterCentroids() throws Exception { Instances[] tempI = new Instances[m_NumClusters]; Instances tempCentroids = m_ClusterCentroids; Instances tempNewCentroids = new Instances(m_Instances, m_NumClusters); m_ClusterCentroids = new Instances(m_Instances, m_NumClusters); // tempI[i] stores the cluster instances for cluster i for (int i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(m_Instances, 0); } for (int i = 0; i < m_Instances.numInstances(); i++) { tempI[m_ClusterAssignments[i]].add(m_Instances.instance(i)); } // Calculates cluster centroids for (int i = 0; i < m_NumClusters; i++) { double[] values = new double[m_Instances.numAttributes()]; Instance centroid = null; if (m_isSparseInstance) { // uses fast meanOrMode values = ClusterUtils.meanOrMode(tempI[i]); centroid = new SparseInstance(1.0, values); } else { // non-sparse, go through each attribute for (int j = 0; j < m_Instances.numAttributes(); j++) { values[j] = tempI[i].meanOrMode(j); // uses usual meanOrMode } centroid = new Instance(1.0, values); } // // debugging: compare previous centroid w/current: // double w = 0; // for (int j = 0; j < m_Instances.numAttributes(); j++) w += values[j] * values[j]; // double w1 = 0; // for (int j = 0; j < m_Instances.numAttributes(); j++) w1 += tempCentroids.instance(i).value(j) * tempCentroids.instance(i).value(j); // System.out.println("\tOldCentroid=" + w1); // System.out.println("\tNewCentroid=" + w); // double prevObj = 0, currObj = 0; // for (int j = 0; j < tempI[i].numInstances(); j++) { // Instance instance = tempI[i].instance(j); // double prevPen = m_metrics[i].penalty(instance, tempCentroids.instance(i)); // double currPen = m_metrics[i].penalty(instance, centroid); // prevObj += prevPen; // currObj += currPen; // //System.out.println("\t\t" + j + " " + prevPen + " -> " + currPen + "\t" + prevObj + " -> " + currObj); // } // // dump instances out if there is a problem. // System.out.println("\t\t" + prevObj + " -> " + currObj); // if (currObj > prevObj) { // PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream("/tmp/INST.arff")), true); // out.println(new Instances(tempI[i], 0)); // out.println(centroid); // out.println(tempCentroids.instance(i)); // for (int j = 0; j < tempI[i].numInstances(); j++) { // out.println(tempI[i].instance(j)); // } // out.close(); // System.out.println(" Updated cluster " + i + "(" // + tempI[i].numInstances()); // System.exit(0); // } // if we are using a smoothing metric, smooth the centroids if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) { System.out.println("\tSmoothing..."); SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric; centroid = smoothingMetric.smoothInstance(centroid); } // DEBUGGING: replaced line under with block below m_ClusterCentroids.add(centroid); // { // tempNewCentroids.add(centroid); // m_ClusterCentroids.delete(); // for (int j = 0; j <= i; j++) { // m_ClusterCentroids.add(tempNewCentroids.instance(j)); // } // for (int j = i+1; j < m_NumClusters; j++) { // m_ClusterCentroids.add(tempCentroids.instance(j)); // } // double objBackup = m_Objective; // System.out.println(" Updated cluster " + i + "(" // + tempI[i].numInstances() + "); obj=" + // calculateObjectiveFunction(false)); // m_Objective = objBackup; // } // in SPKMeans, cluster centroids need to be normalized if (m_metric.doesNormalizeData()) { m_metric.normalizeInstanceWeighted(m_ClusterCentroids.instance(i)); } } if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) updateSmoothingMetrics(); for (int i = 0; i < m_NumClusters; i++) tempI[i] = null; // free memory }