Example usage for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });

        String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                + "-plusIDs.arff";

        Instances data = DataSource.read(location);

        if (data == null) {
            throw new IOException("Could not load data from: " + location);
        }/*from www .j  a v  a 2  s.  co m*/

        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//from w  w w  .j a  va  2 s .c  om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:edu.drexel.psal.jstylo.verifiers.WLSVM.java

License:Open Source License

/**
 * converts an ARFF dataset into sparse format
 * //  w w  w  .j a v  a  2 s.  co  m
 * @param instances
 * @return
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
protected Vector DataToSparse(Instances data) {
    Vector sparse = new Vector(data.numInstances() + 1);

    for (int i = 0; i < data.numInstances(); i++) { // for each instance
        sparse.add(InstanceToSparse(data.instance(i)));
    }
    return sparse;
}

From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java

License:Open Source License

public void saveInstances(String s) throws IOException {
    if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) {
        BufferedWriter bw = new BufferedWriter(new FileWriter(
                Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_"
                        + s));/*from  www  .j  a va2s . c  o  m*/
        System.out.println("Saving: " + s);

        //bw.write(getInstances().toString());

        Instances inst = getInstances();
        StringBuffer text = new StringBuffer();

        text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n");
        for (int i = 0; i < inst.numAttributes(); i++) {
            text.append(inst.attribute(i)).append("\n");
        }
        text.append("\n").append("@data").append("\n");
        bw.write(text.toString());

        for (int i = 0; i < inst.numInstances(); i++) {
            text = new StringBuffer();
            text.append(inst.instance(i));
            if (i < inst.numInstances() - 1) {
                text.append('\n');
            }
            bw.write(text.toString());
        }
        bw.flush();
        bw.close();
        System.out.println("Done.");
    }
}

From source file:edu.uga.cs.fluxbuster.classification.Classifier.java

License:Open Source License

/**
 * Executes the classifier./*from  w w  w  .  j av a 2s .  co  m*/
 * 
 * @param prepfeatures the prepared features in arff format
 * @param modelfile the path to the serialized model
 * @param clusters the clusters to classify
 * @return a map of the classified clusters, the keys are the classes
 *       and the values are lists of cluster id's belonging to those classes
 */
private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile,
        List<StoredDomainCluster> clusters) {
    Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>();
    try {
        DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes()));
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        String[] options = weka.core.Utils.splitOptions("-p 0");
        J48 cls = (J48) weka.core.SerializationHelper.read(modelfile);
        cls.setOptions(options);
        for (int i = 0; i < data.numInstances(); i++) {
            double pred = cls.classifyInstance(data.instance(i));
            ClusterClass clusClass = ClusterClass
                    .valueOf(data.classAttribute().value((int) pred).toUpperCase());
            if (!retval.containsKey(clusClass)) {
                retval.put(clusClass, new ArrayList<StoredDomainCluster>());
            }
            retval.get(clusClass).add(clusters.get(i));
        }
    } catch (Exception e) {
        if (log.isErrorEnabled()) {
            log.error("Error executing classifier.", e);
        }
    }
    return retval;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Take a certain percentage of a set of instances.
 * @param instances/*from   www  .  j  a v a 2 s.  co m*/
 * @param percentage
 * @return a reduced set of instances according to the given percentage
 */
public static Instances trimInstances(Instances instances, double percentage) {
    int numInstancesToKeep = (int) Math.ceil(instances.numInstances() * percentage);
    return trimInstances(instances, numInstancesToKeep);
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Take a certain number of a set of instances.
 * @param instances//from w  w w .java 2s . c o m
 * @param numInstances the number of instances to keep
 * @return a reduced set of instances according to the given number to keep
 */
public static Instances trimInstances(Instances instances, int numInstances) {
    Instances trimmedInstances = new Instances(instances);
    for (int i = trimmedInstances.numInstances() - 1; i >= numInstances; i--) {
        trimmedInstances.delete(i);
    }
    return trimmedInstances;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Extract a particular subset of the instances.
 * @param instances//w  w w. j av  a  2s  .  c  o  m
 * @param startIdx the start instance index
 * @param numInstancesToRetrieve the number of instances to retrieve
 * @return the specified subset of the instances.
 */
public static Instances subsetInstances(Instances instances, int startIdx, int numInstancesToRetrieve) {
    double possibleNumInstancesToRetrieve = instances.numInstances() - startIdx;
    if (numInstancesToRetrieve > possibleNumInstancesToRetrieve) {
        throw new IllegalArgumentException(
                "Cannot retrieve more than " + possibleNumInstancesToRetrieve + " instances.");
    }

    int endIdx = startIdx + numInstancesToRetrieve - 1;

    // delete all instance indices outside of [startIdx, endIdx]
    Instances subset = new Instances(instances);
    for (int i = subset.numInstances() - 1; i >= 0; i--) {
        if (i < startIdx || i > endIdx)
            subset.delete(i);
    }

    return subset;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Merge two instance sets.
 * @param instances1/*from  w  w w  .  j av  a 2 s.com*/
 * @param instances2
 * @return the merged instance sets
 */
public static Instances mergeInstances(Instances instances1, Instances instances2) {
    if (instances1 == null)
        return instances2;
    if (instances2 == null)
        return instances1;
    if (!instances1.checkInstance(instances2.firstInstance()))
        throw new IllegalArgumentException("The instance sets are incompatible.");
    Instances mergedInstances = new Instances(instances1);
    Instances tempInstances = new Instances(instances2);
    for (int i = 0; i < tempInstances.numInstances(); i++) {
        mergedInstances.add(tempInstances.instance(i));
    }
    return mergedInstances;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/**
 * Converts a set of instances to an array of vectors
 * @param instances The set of instances.
 * @return The array of feature vectors.
 *//* w w  w .  j a  v  a  2 s. c om*/
public static double[][] instancesToDoubleArrays(Instances instances) {
    double[][] vectors = new double[instances.numInstances()][];
    for (int instIdx = 0; instIdx < instances.numInstances(); instIdx++) {
        vectors[instIdx] = instanceToDoubleArray(instances.instance(instIdx));
    }
    return vectors;
}