List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:lascer.WekaClassifier.java
License:Open Source License
/** * Generates the classifier.//w ww. j a v a 2 s . com * * @param data the data to be used. * * @exception Exception if the classifier can't built successfully. */ public void buildClassifier(Instances data) throws Exception { weka.coreExtended.Instances extendedInstances; weka.coreExtended.BasicInstance extInst; weka.coreExtended.BasicAttribute classAttribut; de.unistuttgart.commandline.Option formelnArtOption; de.unistuttgart.commandline.Option formelnKlasseOption; de.unistuttgart.commandline.Option loggingSwitch; Instance readInst; Beispieldaten invDatensatz; StringReader stringReader; Enumeration instEnum; Enumeration attribEnum; PraedErzParameter praedErzParameter = null; KonzErzParameter konzErzParameter = null; Pruning pruning; String formelArt; String formelKlasse; String optionWert; float posPruneAnt, negPruneAnt; int instNumber; boolean unbekannteWertBsp; Steuerung.parseArguments(parser); formelArt = Konstanten.WEKA_FORMEL_ART; formelnArtOption = parser.getOption("formelArt"); if (parser.isEnabled(formelnArtOption)) { optionWert = parser.getParameter(formelnArtOption); if (!optionWert.equals("dis") && !optionWert.equals("kon") && !optionWert.equals("beste")) { System.err.println("Wert der Option formelArt unzulssig"); System.err.println("Zulssig: " + formelnArtOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelArt = optionWert; } formelKlasse = Konstanten.WEKA_FORMEL_KLASSE; formelnKlasseOption = parser.getOption("formelKlasse"); if (parser.isEnabled(formelnKlasseOption)) { optionWert = parser.getParameter(formelnKlasseOption); if (!optionWert.equals("pos") && !optionWert.equals("neg") && !optionWert.equals("beste") && !optionWert.equals("beide")) { System.err.println("Wert der Option formelKlasse unzulssig"); System.err.println("Zulssig: " + formelnKlasseOption.toString()); throw (new RuntimeException("Wert von Option unzulssig.")); } formelKlasse = optionWert; } loggingSwitch = parser.getOption("logging"); if (debugMode || parser.isEnabled(loggingSwitch)) { Steuerung.setLogLevel(Konstanten.LOGGING_LEVEL); } // Ermittlung der Parameter. unbekannteWertBsp = Steuerung.unbekannteWertBeispiele(parser); posPruneAnt = Steuerung.posPruneAnteil(parser); negPruneAnt = Steuerung.negPruneAnteil(parser); praedErzParameter = Steuerung.praedErzParameter(parser); konzErzParameter = Steuerung.konzErzParameter(parser); // Einlesen der Daten und Erzeugung des Instanzen-Objekts. instNumber = data.numInstances(); stringReader = new StringReader(data.toString()); extendedInstances = new weka.coreExtended.Instances(stringReader, instNumber); instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { readInst = (Instance) instEnum.nextElement(); extInst = new weka.coreExtended.BasicInstance(readInst.weight(), readInst.toDoubleArray()); extendedInstances.addBasicInstance(extInst); } // Erzeugung der Datenstze. posDatensatz = ArffDateiEinlesen.beispieldaten(extendedInstances, unbekannteWertBsp); negDatensatz = posDatensatz.kopie(true); // Erzeugung der Liste der Attribute. attributListe = new LinkedList(); attribEnum = extendedInstances.enumerateBasicAttributes(); while (attribEnum.hasMoreElements()) { attributListe.add(attribEnum.nextElement()); } // Ermittlung der Werte der Klassifikation. classAttribut = extendedInstances.basicClassAttribute(); wekaClassTrue = classAttribut.indexOfValue("true"); wekaClassFalse = classAttribut.indexOfValue("false"); // Die Formel zur Klasse der positiven Beispiele erzeugen. if (formelKlasse.equals("pos") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } // Die Formel zur Klasse der negativen Beispiele erzeugen. if (formelKlasse.equals("neg") || formelKlasse.equals("beste") || formelKlasse.equals("beide")) { negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (formelKlasse.equals("beste")) { // Die schlechtere Formel lschen. if (negFormel.istBesser(posFormel)) { posFormel = null; } else { negFormel = null; } } if ((posPruneAnt > 0) || (negPruneAnt > 0)) { pruning = new Pruning(); if (posFormel != null) { posDatensatz = pruning.reduzierteDaten(posDatensatz, posFormel, posPruneAnt, negPruneAnt); posFormel = generatedFormula(posDatensatz, praedErzParameter, konzErzParameter, formelArt); } if (negFormel != null) { negDatensatz = pruning.reduzierteDaten(negDatensatz, negFormel, negPruneAnt, posPruneAnt); negFormel = generatedFormula(negDatensatz, praedErzParameter, konzErzParameter, formelArt); } } }
From source file:lattice.Lattice.java
License:Open Source License
/** * Constructor of a lattice over the given variables of the dataset. * //from w w w . j a v a 2s. co m * @param dataset */ public Lattice(Instances dataset) { // ~ initialise internal structure for counting (TID sets) this.nbInstances = dataset.numInstances(); this.nbVariables = dataset.numAttributes(); BitSet[][] presence = new BitSet[nbVariables][]; TreeSet<Integer> allAttributesNumbers = new TreeSet<Integer>(); int[] nbValuesForAttribute = new int[nbVariables]; for (int a = 0; a < nbVariables; a++) { nbValuesForAttribute[a] = dataset.numDistinctValues(a) + 1; //+1 for missing presence[a] = new BitSet[nbValuesForAttribute[a]]; allAttributesNumbers.add(a); for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } for (int i = 0; i < nbInstances; i++) { Instance row = dataset.instance(i); for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { // indexOfValue = (int) dataset.meanOrMode(a); indexOfValue = dataset.numDistinctValues(a); //missing at the end } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(i); } } // initialise the first nodes of the lattice (i.e., the ones // corresponding to single variables this.all = new LatticeNode(this, nbValuesForAttribute); this.singleNodes = new LatticeNode[nbVariables]; for (int a = 0; a < nbVariables; a++) { int[] variablesNumbers = { a }; LatticeNode node = new LatticeNode(this, variablesNumbers, nbValuesForAttribute, presence[a], all); singleNodes[a] = node; } }
From source file:lector.Analizador.java
public static void clasificador() { BufferedReader reader1;/*from w w w . j a v a 2 s . c om*/ BufferedReader reader2; try { reader1 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos.arff")); reader2 = new BufferedReader(new FileReader("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos_inc.arff")); Instances train = new Instances(reader1); train.setClassIndex(train.numAttributes() - 1); System.out.println(train.classIndex() + " " + train.numAttributes()); Instances test = new Instances(reader2); test.setClassIndex(train.numAttributes() - 1); System.out.println(test.classIndex() + " " + test.numAttributes()); NaiveBayes model = new NaiveBayes(); model.buildClassifier(train); //classify Instances labeled = new Instances(test); for (int i = 0; i < test.numInstances(); i++) { double clsLabel = model.classifyInstance(test.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // https://youtu.be/JY_x5zKTfyo?list=PLJbE6j2EG1pZnBhOg3_Rb63WLCprtyJag Evaluation eval_train = new Evaluation(test); eval_train.evaluateModel(model, test); reader1.close(); reader2.close(); //System.out.println(eval_train.toSummaryString("\nResults\n======\n", false)); String[] options = new String[4]; options[0] = "-t"; //name of training file options[1] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/" + "compartida/DataSetAnalisisSentimientos.arff"; options[2] = "-T"; options[3] = "/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/proyecto/" + "compartida/DataSetAnalisisSentimientos_inc.arff"; System.out.println(Evaluation.evaluateModel(model, options)); try ( // print classification results to file BufferedWriter writer = new BufferedWriter( new FileWriter("/Users/danieltapia/Google Drive/EPN/MAESTRIA/MSW128 BI/" + "proyecto/compartida/DataSetAnalisisSentimientos_labeled.arff"))) { writer.write(labeled.toString()); } } catch (Exception e) { } }
From source file:liac.igmn.util.MatrixUtil.java
License:Open Source License
/** * //w ww . j a va 2 s . c o m * @param data instancias carregadas pelo weka * @return matriz correspondentes as instancias */ public static SimpleMatrix instancesToMatrix(Instances data) { double dataset[][] = new double[data.numAttributes()][data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { double[] B = data.instance(i).toDoubleArray(); for (int j = 0; j < data.numAttributes(); j++) dataset[j][i] = B[j]; } return new SimpleMatrix(dataset); }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering/*from w w w .jav a2s . co m*/ * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * Expectation Maximization clustering/*from ww w . j a v a 2 s . c om*/ * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] em(double[][] data, int numObs, int numFeatures) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); EM clusterer = new EM(); try { clusterer.buildClusterer(ds); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(new Instances(ds)); int numClusters = eval.getNumClusters(); Cluster[] clusters = new Cluster[numClusters]; double[][] clusterCentroids = new double[numClusters][numFeatures]; int[] clusterCount = new int[numClusters]; double[] assignments = eval.getClusterAssignments(); for (int i = 0; i < ds.numInstances(); i++) { Instance inst = ds.instance(i); int clusterId = (int) assignments[i]; for (int j = 0; j < numFeatures; j++) { clusterCentroids[clusterId][j] += inst.value(j); } clusterCount[clusterId]++; } for (int i = 0; i < numClusters; i++) { double[] mean = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { mean[j] = clusterCentroids[i][j] / clusterCount[i]; } clusters[i] = new Cluster(mean, i); } // cluster members & std dev double[][] clusterStdDev = new double[numClusters][numFeatures]; for (int i = 0; i < ds.numInstances(); i++) { int clusterId = (int) assignments[i]; clusters[clusterId].addMember(i); for (int j = 0; j < numFeatures; j++) { clusterStdDev[clusterId][j] += Math .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2); } } for (int i = 0; i < numClusters; i++) { double[] dev = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]); } clusters[i].setStdDev(dev); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:linqs.gaia.model.oc.ncc.WekaClassifier.java
License:Open Source License
/** * Predict single item but specify whether or not to save weka * test file for the single item.// w ww .ja v a 2 s .c o m * * @param testitem Single test item * @param savewekatestfile True to save weka test file and false otherwise * @return Predicted value */ private CategValue predictSingleItem(Decorable testitem, boolean savewekatestfile) { CategValue cvalue = null; Instances testinstances = gaia2weka(testitem, true); try { if (savewekatestfile) { String savefile = this.getStringParameter("wekatestfile"); this.saveWekaInstances(savefile + "-" + testitem, testinstances); } int numinstances = testinstances.numInstances(); if (numinstances != 1) { throw new InvalidStateException("Only one predicted item should ever be returned"); } Instance inst = testinstances.instance(0); double prob[] = this.wekaclassifier.distributionForInstance(inst); // Can just take maximum. This is equivalent to what Weka does // to classify the instance. This saves the cost of recomputing. int pred = ArrayUtils.maxValueIndex(prob); cvalue = new CategValue(this.targetcategories.get(pred), prob); } catch (Exception e) { throw new RuntimeException(e); } return cvalue; }
From source file:LogReg.Logistic.java
License:Open Source License
/** * Builds the classifier// ww w . ja v a 2s . c om * * @param train the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances train) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(train); // remove instances with missing class train = new Instances(train); train.deleteWithMissingClass(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(train); train = Filter.useFilter(train, m_AttFilter); // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary); // Save the structure for printing the model m_structure = new Instances(train, 0); // Extract data m_ClassIndex = train.classIndex(); m_NumClasses = train.numClasses(); int nK = m_NumClasses - 1; // Only K-1 class labels needed int nR = m_NumPredictors = train.numAttributes() - 1; int nC = train.numInstances(); m_Data = new double[nC][nR + 1]; // Data values int[] Y = new int[nC]; // Class labels double[] xMean = new double[nR + 1]; // Attribute means xSD = new double[nR + 1]; // Attribute stddev's double[] sY = new double[nK + 1]; // Number of classes double[] weights = new double[nC]; // Weights of instances double totWeights = 0; // Total weights of the instances m_Par = new double[nR + 1][nK]; // Optimized parameter values if (m_Debug) { System.out.println("Extracting data..."); } for (int i = 0; i < nC; i++) { // initialize X[][] Instance current = train.instance(i); Y[i] = (int) current.classValue(); // Class value starts from 0 weights[i] = current.weight(); // Dealing with weights totWeights += weights[i]; m_Data[i][0] = 1; int j = 1; for (int k = 0; k <= nR; k++) { if (k != m_ClassIndex) { double x = current.value(k); m_Data[i][j] = x; xMean[j] += weights[i] * x; xSD[j] += weights[i] * x * x; j++; } } // Class count sY[Y[i]]++; } if ((totWeights <= 1) && (nC > 1)) throw new Exception("Sum of weights of instances less than 1, please reweight!"); xMean[0] = 0; xSD[0] = 1; for (int j = 1; j <= nR; j++) { xMean[j] = xMean[j] / totWeights; if (totWeights > 1) xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1)); else xSD[j] = 0; } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); for (int m = 0; m <= nK; m++) System.out.println(sY[m] + " cases have class " + m); System.out.println("\n Variable Avg SD "); for (int j = 1; j <= nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data for (int i = 0; i < nC; i++) { for (int j = 0; j <= nR; j++) { if (xSD[j] != 0) { m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j]; } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[(nR + 1) * nK]; double[][] b = new double[2][x.length]; // Boundary constraints, N/A here // Initialize for (int p = 0; p < nK; p++) { int offset = p * (nR + 1); x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model b[0][offset] = Double.NaN; b[1][offset] = Double.NaN; for (int q = 1; q <= nR; q++) { x[offset + q] = 0.0; b[0][offset + q] = Double.NaN; b[1][offset + q] = Double.NaN; } } OptEng opt = new OptEng(); opt.setDebug(m_Debug); opt.setWeights(weights); opt.setClassLabels(Y); if (m_MaxIts == -1) { // Search until convergence x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); } else { opt.setMaxIteration(m_MaxIts); x = opt.findArgmin(x, b); if (x == null) // Not enough, but use the current value x = opt.getVarbValues(); } m_LL = -opt.getMinFunction(); // Log-likelihood // Don't need data matrix anymore m_Data = null; // Convert coefficients back to non-normalized attribute units for (int i = 0; i < nK; i++) { m_Par[0][i] = x[i * (nR + 1)]; for (int j = 1; j <= nR; j++) { m_Par[j][i] = x[i * (nR + 1) + j]; if (xSD[j] != 0) { m_Par[j][i] /= xSD[j]; m_Par[0][i] -= m_Par[j][i] * xMean[j]; } } } }
From source file:lu.lippmann.cdb.common.gui.dataset.InstanceTableModel.java
License:Open Source License
public void setDataset(final Instances dataSet) { this.dataSet = dataSet; final int numInstances = dataSet.numInstances(); final ArrayList<Instance> pdata = new ArrayList<Instance>(numInstances); //Initialize rows Id final int dsSize = dataSet.numInstances(); this.rows = new ArrayList<Integer>(dsSize); for (int i = 0; i < dsSize; i++) { rows.add(i);/*from ww w . j a va2s .c o m*/ } //Initialize data for (int i = 0; i < numInstances; i++) { pdata.add(dataSet.instance(i)); } super.setData(pdata); }
From source file:lu.lippmann.cdb.common.gui.ts.TimeSeriesChartUtil.java
License:Open Source License
private static void fillWithSingleAxis(final Instances dataSet, final int dateIdx, final TimeSeriesCollection tsDataset) { final int numInstances = dataSet.numInstances(); final Calendar cal = Calendar.getInstance(); for (final Integer i : WekaDataStatsUtil.getNumericAttributesIndexes(dataSet)) { if (dataSet.attributeStats(i).missingCount == dataSet.numInstances()) { System.out.println("TimeSeriesChartUtil: Only missing values for '" + dataSet.attribute(i).name() + "', so skip it!"); continue; }//from ww w . j a v a 2 s .co m final TimeSeries ts = new TimeSeries(dataSet.attribute(i).name()); for (int k = 0; k < numInstances; k++) { final Instance instancek = dataSet.instance(k); final long timeInMilliSec = (long) instancek.value(dateIdx); cal.setTimeInMillis(timeInMilliSec); if (instancek.isMissing(i)) { ts.addOrUpdate(new Millisecond(cal.getTime()), null); } else { ts.addOrUpdate(new Millisecond(cal.getTime()), instancek.value(i)); } } if (!ts.isEmpty()) tsDataset.addSeries(ts); } }