List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:assign00.ExperimentShell.java
/** * @param args the command line arguments *//*from ww w .ja va2 s . c om*/ public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(1)); //determine sizes int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); NeuralNetworkClassifier NWC = new NeuralNetworkClassifier(); NWC.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(NWC, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:aw_cluster.myKMeans.java
@Override public void buildClusterer(Instances data) throws Exception { getCapabilities().testWithFail(data); Instances instances = new Instances(data); instances.setClassIndex(-1);/*w ww.j av a 2 s .c om*/ if (instances.numInstances() == 0) { throw new RuntimeException("Dataset should not be empty"); } assignments = new int[instances.numInstances()]; centroid = new Instances(instances, numCluster); distanceFunction.setInstances(instances); squaredError = new double[numCluster]; // Initialize Centroid Random From seed Random random = new Random(getSeedRandom()); Instances tempInstances = new Instances(instances); int tI = tempInstances.numInstances() - 1; while (tI >= 0 && centroid.numInstances() < numCluster) { int indexCentroid = random.nextInt(tI + 1); centroid.add(tempInstances.instance(indexCentroid)); tempInstances.swap(tI, indexCentroid); tI--; } tempInstances = null; boolean converged = false; while (!converged) { converged = true; numIteration++; for (int i = 0; i < instances.numInstances(); ++i) { Instance toCluster = instances.instance(i); int clusterResult = clusterInstanceProcess(toCluster, true); if (clusterResult != assignments[i]) converged = false; assignments[i] = clusterResult; } // update centroid Instances[] TempI = new Instances[numCluster]; centroid = new Instances(instances, numCluster); for (int i = 0; i < TempI.length; ++i) { TempI[i] = new Instances(instances, 0); } for (int i = 0; i < instances.numInstances(); ++i) { TempI[assignments[i]].add(instances.instance(i)); } for (int i = 0; i < TempI.length; ++i) { moveCentroid(TempI[i]); } if (converged) squaredError = new double[numCluster]; if (numIteration == maxIteration) converged = true; sizeEachCluster = new int[numCluster]; for (int i = 0; i < numCluster; ++i) { sizeEachCluster[i] = TempI[i].numInstances(); } } }
From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java
/** * Initialize the tree configuration. This implementation considers a complete binary tree of depth h. *///from www .j av a2s . c o m private void initialize() throws IOException { //the number of nodes N, is penednt on h. actualy N //h=ln N -> N=2^h Instances instances = readArff(getFilenameARFF()); instances.setClassIndex(-1);//clustering Stuff numberOfInstances = instances.numInstances(); dimensionality = instances.numAttributes(); data = new double[getNumberOfInstances()][getDimensionality()]; weight = new double[getNumberOfNeurons()][getDimensionality()]; //randomly select instances and assign to weight. for (int k = 0; k < getNumberOfNeurons(); k++) { weight[k] = instances.instance(r.nextInt(getNumberOfInstances())).toDoubleArray(); //hard copy of the double array } for (int k = 0; k < getNumberOfInstances(); k++) { data[k] = instances.instance(k).toDoubleArray(); //hard copy of the double array } }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Evaluates the classifier on a given set of instances. Note that the data * must have exactly the same format (e.g. order of attributes) as the data * used to train the classifier! Otherwise the results will generally be * meaningless.//w ww. j a v a2s. c o m * * @param classifier machine learning classifier * @param data set of test instances for evaluation * @param forPredictionsString varargs parameter that, if supplied, is * expected to hold a StringBuffer to print predictions to, a Range * of attributes to output and a Boolean (true if the distribution is * to be printed) * @return the predictions * @throws Exception if model could not be evaluated successfully */ public double[] evaluateModel(List<LibSVM> classifier, List<Double> classifierWeight, Instances data, Object... forPredictionsPrinting) throws Exception { // for predictions printing StringBuffer buff = null; Range attsToOutput = null; boolean printDist = false; double predictions[] = new double[data.numInstances()]; if (forPredictionsPrinting.length > 0) { buff = (StringBuffer) forPredictionsPrinting[0]; attsToOutput = (Range) forPredictionsPrinting[1]; printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue(); } // Need to be able to collect predictions if appropriate (for AUC) for (int i = 0; i < data.numInstances(); i++) { predictions[i] = evaluateModelOnceAndRecordPrediction(classifier, classifierWeight, data.instance(i)); } for (int i = 0; i < predictions.length; i++) { System.out.print(predictions[i]); } return predictions; }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Sets the class prior probabilities/* w w w.j a va 2s . co m*/ * * @param train the training instances used to determine the prior * probabilities * @throws Exception if the class attribute of the instances is not set */ public void setPriors(Instances train) throws Exception { m_NoPriors = false; if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }
From source file:boostingPL.boosting.AdaBoost.java
License:Open Source License
public AdaBoost(Instances insts, int numInterations) { this.insts = insts; this.numIterations = numInterations; this.classifiers = new Classifier[numInterations]; this.cweights = new double[numInterations]; // initialize instance's weight int numInstances = insts.numInstances(); for (int i = 0; i < numInstances; i++) { double tweight = 1.0 / numInstances; insts.instance(i).setWeight(tweight); }/*from ww w . j av a 2 s .c o m*/ //System.out.println("instances weights total: " + insts.sumOfWeights()); }
From source file:boostingPL.boosting.SAMME.java
License:Open Source License
public SAMME(Instances insts, int numInterations) { this.insts = insts; this.numIterations = numInterations; this.classifiers = new Classifier[numInterations]; this.cweights = new double[numInterations]; // initialize instance's weight final int numInstances = insts.numInstances(); for (int i = 0; i < numInstances; i++) { double tweight = 1.0 / numInstances; insts.instance(i).setWeight(tweight); }// ww w.j a v a 2 s. c om //System.out.println("instances weights total: " + insts.sumOfWeights()); }
From source file:br.com.edu.arff.LoadArff.java
public ArrayList<Cluster> carregarArff(String caminho) throws FileNotFoundException, IOException { BufferedReader reader = new BufferedReader(new FileReader(caminho)); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); Instance inst = null;//from w w w . j a v a 2 s. c o m Attribute att = data.attribute("Cluster"); ArrayList<String> uris; ArrayList<Cluster> lista = new ArrayList<Cluster>(); Fuseki fuseki = new Fuseki(); uris = fuseki.buscaURIS(); for (int i = 0; i <= data.numInstances() - 1; i++) { Cluster cluster = new Cluster(); String clusters = String.valueOf(data.get(i).stringValue(att)); cluster.setUri(uris.get(i)); cluster.setGrupo(clusters); lista.add(cluster); } // for (Cluster c : lista) { // System.out.println(c.getUri()); // System.out.println(c.getGrupo()); // } return lista; }
From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java
License:Open Source License
/** * Method used to pre-process the data, perform clustering, and * set the initial parameter vector.//from w w w. j a v a 2s .c o m */ protected Instances initializeClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); // Make sure data is shuffled Random random = new Random(m_Seed); if (data.numInstances() > 2) { random = data.getRandomNumberGenerator(m_Seed); } data.randomize(random); double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case int index = 1; while (index < data.numInstances() && data.instance(index).classValue() == y0) { index++; } if (index == data.numInstances()) { // degenerate case, all class values are equal // we don't want to deal with this, too much hassle throw new Exception("All class values are the same. At least two class values should be different"); } double y1 = data.instance(index).classValue(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(data); data = Filter.useFilter(data, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(data); data = Filter.useFilter(data, m_AttFilter); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data after removing useless attributes!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return data; } else { m_ZeroR = null; } // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(data); data = Filter.useFilter(data, m_NominalToBinary); m_Filter = new Normalize(); ((Normalize) m_Filter).setIgnoreClass(true); m_Filter.setInputFormat(data); data = Filter.useFilter(data, m_Filter); double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case double z1 = data.instance(index).classValue(); m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ??? m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1 m_classIndex = data.classIndex(); m_numClasses = data.numClasses(); m_numAttributes = data.numAttributes(); // Run k-means SimpleKMeans skm = new SimpleKMeans(); skm.setMaxIterations(10000); skm.setNumClusters(m_numUnits); Remove rm = new Remove(); data.setClassIndex(-1); rm.setAttributeIndices((m_classIndex + 1) + ""); rm.setInputFormat(data); Instances dataRemoved = Filter.useFilter(data, rm); data.setClassIndex(m_classIndex); skm.buildClusterer(dataRemoved); Instances centers = skm.getClusterCentroids(); if (centers.numInstances() < m_numUnits) { m_numUnits = centers.numInstances(); } // Set up arrays OFFSET_WEIGHTS = 0; if (m_useAttributeWeights) { OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses; OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes; } else { OFFSET_ATTRIBUTE_WEIGHTS = -1; OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses; } OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes; switch (m_scaleOptimizationOption) { case USE_GLOBAL_SCALE: m_RBFParameters = new double[OFFSET_SCALES + 1]; break; case USE_SCALE_PER_UNIT_AND_ATTRIBUTE: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes]; break; default: m_RBFParameters = new double[OFFSET_SCALES + m_numUnits]; break; } // Set initial radius based on distance to nearest other basis function double maxMinDist = -1; for (int i = 0; i < centers.numInstances(); i++) { double minDist = Double.MAX_VALUE; for (int j = i + 1; j < centers.numInstances(); j++) { double dist = 0; for (int k = 0; k < centers.numAttributes(); k++) { if (k != centers.classIndex()) { double diff = centers.instance(i).value(k) - centers.instance(j).value(k); dist += diff * diff; } } if (dist < minDist) { minDist = dist; } } if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) { maxMinDist = minDist; } } // Initialize parameters if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) { m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist); } for (int i = 0; i < m_numUnits; i++) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) { m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist); } int k = 0; for (int j = 0; j < m_numAttributes; j++) { if (k == centers.classIndex()) { k++; } if (j != data.classIndex()) { if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) { m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist); } m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k); k++; } } } if (m_useAttributeWeights) { for (int j = 0; j < m_numAttributes; j++) { if (j != data.classIndex()) { m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0; } } } initializeOutputLayer(random); return data; }
From source file:br.fapesp.myutils.MyUtils.java
License:Open Source License
public static void print_dataset_as_matrix(Instances data) { for (int i = 0; i < data.numInstances(); i++) { for (int j = 0; j < data.numAttributes(); j++) System.out.print(data.instance(i).value(j) + " "); System.out.println();/* ww w. j av a2 s .c o m*/ } }