Example usage for weka.core Instances numInstances

List of usage examples for weka.core Instances numInstances

Introduction

In this page you can find the example usage for weka.core Instances numInstances.

Prototype


publicint numInstances() 

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Uses the given model to predict the classes of the data.
 * @param model//  ww w.j av  a  2  s  .co m
 * @param data
 * @return An array of the class predictions.
 */
public static int[] predictClasses(Classifier model, Instances data) {
    int[] predictions = new int[data.numInstances()];
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        try {
            predictions[instIdx] = (int) model.classifyInstance(data.instance(instIdx));
        } catch (Exception e) {
            predictions[instIdx] = -1;
        }
    }
    return predictions;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the class labels for a set of instances.
 * @param data/*from  ww  w  .  j a va  2s. co m*/
 * @return a vector of the class labels for the data set, with one entry per instance
 */
public static int[] getLabels(Instances data) {
    int[] classLabels = new int[data.numInstances()];
    for (int instIdx = 0; instIdx < classLabels.length; instIdx++) {
        classLabels[instIdx] = (int) data.instance(instIdx).classValue();
    }
    return classLabels;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the class values for a set of instances.
 * @param data//  www . j a  v a2  s  .  c  o m
 * @return a vector of the class values for the data set, with one entry per instance
 */
public static double[] getClassValues(Instances data) {
    double[] classLabels = new double[data.numInstances()];
    for (int instIdx = 0; instIdx < classLabels.length; instIdx++) {
        classLabels[instIdx] = data.instance(instIdx).classValue();
    }
    return classLabels;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *///from   w  w w  .ja  v a2 s .c  o m
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Determines whether a data set has equal class priors.
 * @param data//w  ww .  j  a  v  a2 s .  c o  m
 * @return whether the data set has equal class priors
 */
public static boolean equalClassPriors(Instances data) {
    double[] counts = new double[data.numClasses()];
    int numInstances = data.numInstances();
    for (int i = 0; i < numInstances; i++) {
        Instance inst = data.instance(i);
        int classValueIdx = (int) Math.round(inst.classValue());
        counts[classValueIdx] = counts[classValueIdx] + 1;
    }

    // compute the mean
    double meanCount = MathUtils.sum(counts) / counts.length;
    double[] meanArray = new double[counts.length];
    for (int i = 0; i < meanArray.length; i++) {
        meanArray[i] = meanCount;
    }

    // compute the rmse
    double rmse = MathUtils.rmse(meanArray, counts);

    // compute 2.5% of the possible 
    double deviationAllowed = Math.ceil(0.025 * meanCount);

    if (rmse <= deviationAllowed)
        return true;
    else
        return false;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the weights of each instance in a dataset as an array.
 * @param data the dataset of instances/*w  ww . j  a  v a  2 s .  c  om*/
 * @return the weights of the instances as an array.
 */
public static double[] getWeights(Instances data) {
    int numInstances = data.numInstances();
    double[] weights = new double[numInstances];
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        weights[instIdx] = data.instance(instIdx).weight();
    }
    return weights;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts a set of instances to svm-light format
 * @param data the weka instances//from w  ww.  ja  v  a 2  s.c o m
 * @return the weka instances in svm-light format
 */
public static String arffToSVMLight(Instances data, SVMLightLabelFormat labelFormat) {

    if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) {
        throw new IllegalArgumentException(
                "SVM-light classification label format requires that the data contain only two classes.");
    }

    String str = "";
    String endline = System.getProperty("line.separator");

    int numInstances = data.numInstances();
    int numAttributes = data.numAttributes();
    int classAttIdx = data.classIndex();

    for (int instIdx = 0; instIdx < numInstances; instIdx++) {

        Instance inst = data.instance(instIdx);

        // convert the instance label
        if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) {
            str += (inst.classValue() == 0) ? "-1" : "1";
        } else {
            str += inst.classValue();
        }

        str += " ";

        // convert each feature
        for (int attIdx = 0; attIdx < numAttributes; attIdx++) {
            // skip the class attribute
            if (attIdx == classAttIdx)
                continue;
            str += (attIdx + 1) + ":" + inst.value(attIdx) + " ";
        }

        // append the instance info string
        str += "# " + instIdx;

        str += endline;
    }

    return str;
}

From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java

License:Open Source License

/**
 * adding y attributes with values//  w w w  .  j av  a 2  s  .c o  m
 */
public static Instances addYforWeka(Instances xInsts, Double[] y) {

    Instances xyInsts = addYforWeka(xInsts);

    if (y.length != xInsts.numInstances()) {
        log.error("cannot add y to instances since y.length != numInstances");
    }

    // initialize all y values
    int n = xInsts.numAttributes() - 1;
    for (int i = 0; i < y.length; ++i) {
        xInsts.get(i).setValue(n, y[i]);
    }

    return xyInsts;
}

From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java

License:Open Source License

public static Double leaveOneOutErrorLinRegLambda(double lambda, Instances data) {

    // MANUAL //  ww w.  ja va2s . c o m

    // create a linear regression classifier with Xy_polynorm data
    LinearRegression linreg = createLinearRegression();
    linreg.setRidge(lambda);

    double mse = 0;
    for (int i = 0; i < data.numInstances(); ++i) {
        log.info("fold " + i);
        Instances train = data.trainCV(data.numInstances(), i);
        log.info("train");
        Instances test = data.testCV(data.numInstances(), i);
        log.info("test");
        double actualY = data.instance(i).classValue();
        log.info("actualY");
        try {
            linreg.buildClassifier(train);
            log.info("buildClassifier");
        } catch (Exception e) {
            log.error("failed to build classifier in cross validation", e);
            return null;

        }
        double predictedY = 0;
        try {
            predictedY = linreg.classifyInstance(test.instance(0));
            log.info("predictedY");
        } catch (Exception e) {
            log.error("failed to classify in cross validation", e);
            return null;
        }
        double error = predictedY - actualY;
        log.info("error " + error);
        mse += error * error;
        log.info("mse " + mse);
    }
    if (data.numInstances() == 0) {
        log.error("no instances in leave-one-out data");
        return null;
    }
    mse /= data.numInstances();
    log.info("mse " + mse);
    return mse;

    //     // USING WEKA 
    // 
    //     // create evaluation object
    //     Evaluation eval = null;
    //     try {
    //       eval = new Evaluation(data);
    //     } catch (Exception e) {
    //       log.error("weka Evaluation() creation threw exception", e);      
    //       //e.printStackTrace();    
    //       return null;
    //     }
    //     
    //     // create a linear regression classifier with Xy_polynorm data
    //     LinearRegression linreg = createLinearRegression();
    //     linreg.setRidge(lambda);
    // //    try {
    // //      linreg.buildClassifier(data);
    // //    } catch (Exception e) {
    // //      log.error("FAILED: linear regression threw exception", e);
    // //      //e.printStackTrace();    
    // //      return null;
    // //    }
    //     
    //     // initialize the evaluation object
    //     Classifier classifier = linreg;
    //     int numFolds = data.numInstances();
    //     Random random = new Random(0);
    //     try {
    //       eval.crossValidateModel(classifier , data , numFolds , random);
    //     } catch (Exception e) {
    //       log.error("crossvalidation threw exception", e);
    //       //e.printStackTrace();    
    //       return null;
    //     }
    //     
    //     double mse = eval.errorRate();
    //     return mse;
}

From source file:edu.washington.cs.knowitall.summarization.RedundancyClassifier.java

License:Open Source License

/**
 * //from w ww  . ja  v a2  s . c  o m
 * @param fstSentence
 * @param sndSentence
 * @return if the sentence are redundant
 * @throws Exception
 */
public boolean redundant(Sentence fstSentence, Sentence sndSentence) {
    if (redundantMap.containsKey(fstSentence.getKey() + "::" + sndSentence.getKey())) {
        return redundantMap.get(fstSentence.getKey() + "::" + sndSentence.getKey());
    }
    // Set up the instances.
    String header = getHeader();
    String features = getFeatures(fstSentence, sndSentence, false, -1);
    StringReader testReader = new StringReader(header + features + ",0.0" + "\n");
    Instances unlabeled = setupInstances(testReader);

    try {
        // label instances
        if (unlabeled.numInstances() > 0) {
            double clsLabel = tree.classifyInstance(unlabeled.instance(0));
            if (clsLabel == 0.0) {
                redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), true);
                redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), true);
                return true;
            } else {
                redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), false);
                redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), false);
                return false;
            }
        }
    } catch (FileNotFoundException e) {
        System.err.println("File not found error");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return false;
}