List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"; Instances data = DataSource.read(location); if (data == null) { throw new IOException("Could not load data from: " + location); }/*from www .j a v a 2 s. co m*/ data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully//from w w w .j a va 2 s .c om */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i])); } } }
From source file:edu.drexel.psal.jstylo.verifiers.WLSVM.java
License:Open Source License
/** * converts an ARFF dataset into sparse format * // w w w .j a v a 2 s. co m * @param instances * @return */ @SuppressWarnings({ "rawtypes", "unchecked" }) protected Vector DataToSparse(Instances data) { Vector sparse = new Vector(data.numInstances() + 1); for (int i = 0; i < data.numInstances(); i++) { // for each instance sparse.add(InstanceToSparse(data.instance(i))); } return sparse; }
From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java
License:Open Source License
public void saveInstances(String s) throws IOException { if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) { BufferedWriter bw = new BufferedWriter(new FileWriter( Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_" + s));/*from www .j a va2s . c o m*/ System.out.println("Saving: " + s); //bw.write(getInstances().toString()); Instances inst = getInstances(); StringBuffer text = new StringBuffer(); text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n"); for (int i = 0; i < inst.numAttributes(); i++) { text.append(inst.attribute(i)).append("\n"); } text.append("\n").append("@data").append("\n"); bw.write(text.toString()); for (int i = 0; i < inst.numInstances(); i++) { text = new StringBuffer(); text.append(inst.instance(i)); if (i < inst.numInstances() - 1) { text.append('\n'); } bw.write(text.toString()); } bw.flush(); bw.close(); System.out.println("Done."); } }
From source file:edu.uga.cs.fluxbuster.classification.Classifier.java
License:Open Source License
/** * Executes the classifier./*from w w w . j av a 2s . co m*/ * * @param prepfeatures the prepared features in arff format * @param modelfile the path to the serialized model * @param clusters the clusters to classify * @return a map of the classified clusters, the keys are the classes * and the values are lists of cluster id's belonging to those classes */ private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile, List<StoredDomainCluster> clusters) { Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>(); try { DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes())); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } String[] options = weka.core.Utils.splitOptions("-p 0"); J48 cls = (J48) weka.core.SerializationHelper.read(modelfile); cls.setOptions(options); for (int i = 0; i < data.numInstances(); i++) { double pred = cls.classifyInstance(data.instance(i)); ClusterClass clusClass = ClusterClass .valueOf(data.classAttribute().value((int) pred).toUpperCase()); if (!retval.containsKey(clusClass)) { retval.put(clusClass, new ArrayList<StoredDomainCluster>()); } retval.get(clusClass).add(clusters.get(i)); } } catch (Exception e) { if (log.isErrorEnabled()) { log.error("Error executing classifier.", e); } } return retval; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Take a certain percentage of a set of instances. * @param instances/*from www . j a v a 2 s. co m*/ * @param percentage * @return a reduced set of instances according to the given percentage */ public static Instances trimInstances(Instances instances, double percentage) { int numInstancesToKeep = (int) Math.ceil(instances.numInstances() * percentage); return trimInstances(instances, numInstancesToKeep); }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Take a certain number of a set of instances. * @param instances//from w w w .java 2s . c o m * @param numInstances the number of instances to keep * @return a reduced set of instances according to the given number to keep */ public static Instances trimInstances(Instances instances, int numInstances) { Instances trimmedInstances = new Instances(instances); for (int i = trimmedInstances.numInstances() - 1; i >= numInstances; i--) { trimmedInstances.delete(i); } return trimmedInstances; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Extract a particular subset of the instances. * @param instances//w w w. j av a 2s . c o m * @param startIdx the start instance index * @param numInstancesToRetrieve the number of instances to retrieve * @return the specified subset of the instances. */ public static Instances subsetInstances(Instances instances, int startIdx, int numInstancesToRetrieve) { double possibleNumInstancesToRetrieve = instances.numInstances() - startIdx; if (numInstancesToRetrieve > possibleNumInstancesToRetrieve) { throw new IllegalArgumentException( "Cannot retrieve more than " + possibleNumInstancesToRetrieve + " instances."); } int endIdx = startIdx + numInstancesToRetrieve - 1; // delete all instance indices outside of [startIdx, endIdx] Instances subset = new Instances(instances); for (int i = subset.numInstances() - 1; i >= 0; i--) { if (i < startIdx || i > endIdx) subset.delete(i); } return subset; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Merge two instance sets. * @param instances1/*from w w w . j av a 2 s.com*/ * @param instances2 * @return the merged instance sets */ public static Instances mergeInstances(Instances instances1, Instances instances2) { if (instances1 == null) return instances2; if (instances2 == null) return instances1; if (!instances1.checkInstance(instances2.firstInstance())) throw new IllegalArgumentException("The instance sets are incompatible."); Instances mergedInstances = new Instances(instances1); Instances tempInstances = new Instances(instances2); for (int i = 0; i < tempInstances.numInstances(); i++) { mergedInstances.add(tempInstances.instance(i)); } return mergedInstances; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** * Converts a set of instances to an array of vectors * @param instances The set of instances. * @return The array of feature vectors. *//* w w w . j a v a 2 s. c om*/ public static double[][] instancesToDoubleArrays(Instances instances) { double[][] vectors = new double[instances.numInstances()][]; for (int instIdx = 0; instIdx < instances.numInstances(); instIdx++) { vectors[instIdx] = instanceToDoubleArray(instances.instance(instIdx)); } return vectors; }