Example usage for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts the instances in the given dataset to binary, setting the specified labels to positive.
 * Note this method is destructive to data, directly modifying its contents.
 * @param data the multiclass dataset to be converted to binary.
 * @param positiveClassValue the class value to treat as positive.
 *//*ww  w . j  a  va  2 s  . c  o m*/
public static void convertMulticlassToBinary(Instances data, String positiveClassValue) {

    // ensure that data is nominal
    if (!data.classAttribute().isNominal())
        throw new IllegalArgumentException("Instances must have a nominal class.");

    // create the new class attribute
    FastVector newClasses = new FastVector(2);
    newClasses.addElement("Y");
    newClasses.addElement("N");
    Attribute newClassAttribute = new Attribute("class", newClasses);

    // alter the class attribute to be binary
    int newClassAttIdx = data.classIndex();
    data.insertAttributeAt(newClassAttribute, newClassAttIdx);
    int classAttIdx = data.classIndex();

    // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively)
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        Instance inst = data.instance(instIdx);
        if (inst.stringValue(classAttIdx).equals(positiveClassValue)) {
            inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y
        } else {
            inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0
        }
    }

    // switch the class index to the new class and delete the old class
    data.setClassIndex(newClassAttIdx);
    data.deleteAttributeAt(classAttIdx);

    // alter the dataset name
    data.setRelationName(data.relationName() + "-" + positiveClassValue);
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Determines whether a data set has equal class priors.
 * @param data//from  w  w  w  . ja  v a2 s. c  o m
 * @return whether the data set has equal class priors
 */
public static boolean equalClassPriors(Instances data) {
    double[] counts = new double[data.numClasses()];
    int numInstances = data.numInstances();
    for (int i = 0; i < numInstances; i++) {
        Instance inst = data.instance(i);
        int classValueIdx = (int) Math.round(inst.classValue());
        counts[classValueIdx] = counts[classValueIdx] + 1;
    }

    // compute the mean
    double meanCount = MathUtils.sum(counts) / counts.length;
    double[] meanArray = new double[counts.length];
    for (int i = 0; i < meanArray.length; i++) {
        meanArray[i] = meanCount;
    }

    // compute the rmse
    double rmse = MathUtils.rmse(meanArray, counts);

    // compute 2.5% of the possible 
    double deviationAllowed = Math.ceil(0.025 * meanCount);

    if (rmse <= deviationAllowed)
        return true;
    else
        return false;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the weights of each instance in a dataset as an array.
 * @param data the dataset of instances//  w  ww.  ja  v  a2s . co m
 * @return the weights of the instances as an array.
 */
public static double[] getWeights(Instances data) {
    int numInstances = data.numInstances();
    double[] weights = new double[numInstances];
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        weights[instIdx] = data.instance(instIdx).weight();
    }
    return weights;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Converts a set of instances to svm-light format
 * @param data the weka instances/*w w w .j a  v a2  s  .  co  m*/
 * @return the weka instances in svm-light format
 */
public static String arffToSVMLight(Instances data, SVMLightLabelFormat labelFormat) {

    if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) {
        throw new IllegalArgumentException(
                "SVM-light classification label format requires that the data contain only two classes.");
    }

    String str = "";
    String endline = System.getProperty("line.separator");

    int numInstances = data.numInstances();
    int numAttributes = data.numAttributes();
    int classAttIdx = data.classIndex();

    for (int instIdx = 0; instIdx < numInstances; instIdx++) {

        Instance inst = data.instance(instIdx);

        // convert the instance label
        if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) {
            str += (inst.classValue() == 0) ? "-1" : "1";
        } else {
            str += inst.classValue();
        }

        str += " ";

        // convert each feature
        for (int attIdx = 0; attIdx < numAttributes; attIdx++) {
            // skip the class attribute
            if (attIdx == classAttIdx)
                continue;
            str += (attIdx + 1) + ":" + inst.value(attIdx) + " ";
        }

        // append the instance info string
        str += "# " + instIdx;

        str += endline;
    }

    return str;
}

From source file:edu.utexas.cs.tactex.CostCurvesPredictorService.java

License:Open Source License

private double makeMwhPrediction(WekaLinRegData wekaData, int futureTimeslot, double myMwh,
        double competitorMwh) {
    // predict/*from www .j  a  va2  s .c om*/
    ArrayList<Double> features = configuratorFactoryService.getCostCurvesDataProcessor().extractFeatures(myMwh,
            competitorMwh);
    Instance inst = RegressionUtils.createInstance(features);
    ArrayList<Instance> instArr = new ArrayList<Instance>();
    instArr.add(inst);
    ArrayList<String> attrNames = configuratorFactoryService.getCostCurvesDataProcessor().getFeatureNames();
    Instances x0_features = RegressionUtils.createInstances(instArr, attrNames);

    Instances x0_featuresNorm = RegressionUtils.featureNormalize(x0_features, wekaData.getStandardize());
    Instances x0_final = RegressionUtils.addYforWeka(x0_featuresNorm); // no yvals, missing values 
    double prediction = 0;
    try {
        prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0));
    } catch (Exception e) {
        log.error("passed CV but cannot predict on new point. falling back to array");
        log.error("Exception is", e);
        return getAvgBootstrapPricePerMwh(futureTimeslot);
    }
    return prediction;
}

From source file:edu.utexas.cs.tactex.OpponentPredictorService.java

License:Open Source License

private ArrayList<Double> makeRatesPrediction(WekaLinRegData wekaData, double state, double myBestRate,
        double opponentBestRate) {

    ArrayList<Double> predictedActions = new ArrayList<Double>();

    // predict/*w  w  w  .  ja v  a  2s  . com*/
    ArrayList<Double> features = new ArrayList<Double>();
    features.add(state);
    //
    Instance inst = RegressionUtils.createInstance(features);
    ArrayList<Instance> instArr = new ArrayList<Instance>();
    instArr.add(inst);
    ArrayList<String> attrNames = RAW_ATTR_NAMES;
    Instances x0_features = RegressionUtils.createInstances(instArr, attrNames);

    Instances x0_featuresNorm = RegressionUtils.featureNormalize(x0_features, wekaData.getStandardize());
    Instances x0_final = RegressionUtils.addYforWeka(x0_featuresNorm); // no yvals, missing values 
    double prediction = 0;
    try {
        prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0));
        predictedActions.add(prediction);
    } catch (Exception e) {
        log.error("passed CV but cannot predict on new point. falling back to array");
        log.error("Exception is", e);
    }

    return convertActionsToRates(predictedActions, myBestRate, opponentBestRate);
}

From source file:edu.utexas.cs.tactex.subscriptionspredictors.PolyRegCust.java

License:Open Source License

@Override
public Double predictNumSubs(double candidateEval, TreeMap<Double, Double> e2n, CustomerInfo customer,
        int timeslot) {

    int maxDegree = 8;

    WekaLinRegData wekaData = retrieveOrCreateWekaData(e2n, maxDegree, customer, timeslot);
    if (null == wekaData) {
        return null; // errors should have been printed inside
    }/*from  w w  w .ja v a 2 s  . c  o  m*/

    // predict
    Double[] x0 = new Double[] { candidateEval };
    Instances x0_poly = RegressionUtils.polyFeatures1D(x0, maxDegree);
    log.info("x0_poly " + x0_poly);
    Instances x0_polynorm = RegressionUtils.featureNormalize(x0_poly, wekaData.getStandardize());
    log.info("x0_polynorm " + x0_polynorm);
    Instances x0_final = RegressionUtils.addYforWeka(x0_polynorm); // no yvals, missing values 
    log.info("x0_final " + x0_final);
    double prediction = 0;
    try {
        prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0));
        log.info("prediction " + prediction + " => " + Math.max(0, (int) Math.round(prediction)));
    } catch (Exception e) {
        log.error("PolyReg passed CV but cannot predict on new point. falling back to interpolateOrNN()");
        log.error("Exception is", e);
        log.error("e2n: " + e2n.toString());
        log.error("candidateEval " + candidateEval);
        return null;
    }
    log.info("PolyReg succeeded");
    // cast to int, and cannot be negative
    return Math.max(0.0, Math.round(prediction));
}

From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java

License:Open Source License

public static Double leaveOneOutErrorLinRegLambda(double lambda, Instances data) {

    // MANUAL /*from www  . j  av  a2s  .  c  om*/

    // create a linear regression classifier with Xy_polynorm data
    LinearRegression linreg = createLinearRegression();
    linreg.setRidge(lambda);

    double mse = 0;
    for (int i = 0; i < data.numInstances(); ++i) {
        log.info("fold " + i);
        Instances train = data.trainCV(data.numInstances(), i);
        log.info("train");
        Instances test = data.testCV(data.numInstances(), i);
        log.info("test");
        double actualY = data.instance(i).classValue();
        log.info("actualY");
        try {
            linreg.buildClassifier(train);
            log.info("buildClassifier");
        } catch (Exception e) {
            log.error("failed to build classifier in cross validation", e);
            return null;

        }
        double predictedY = 0;
        try {
            predictedY = linreg.classifyInstance(test.instance(0));
            log.info("predictedY");
        } catch (Exception e) {
            log.error("failed to classify in cross validation", e);
            return null;
        }
        double error = predictedY - actualY;
        log.info("error " + error);
        mse += error * error;
        log.info("mse " + mse);
    }
    if (data.numInstances() == 0) {
        log.error("no instances in leave-one-out data");
        return null;
    }
    mse /= data.numInstances();
    log.info("mse " + mse);
    return mse;

    //     // USING WEKA 
    // 
    //     // create evaluation object
    //     Evaluation eval = null;
    //     try {
    //       eval = new Evaluation(data);
    //     } catch (Exception e) {
    //       log.error("weka Evaluation() creation threw exception", e);      
    //       //e.printStackTrace();    
    //       return null;
    //     }
    //     
    //     // create a linear regression classifier with Xy_polynorm data
    //     LinearRegression linreg = createLinearRegression();
    //     linreg.setRidge(lambda);
    // //    try {
    // //      linreg.buildClassifier(data);
    // //    } catch (Exception e) {
    // //      log.error("FAILED: linear regression threw exception", e);
    // //      //e.printStackTrace();    
    // //      return null;
    // //    }
    //     
    //     // initialize the evaluation object
    //     Classifier classifier = linreg;
    //     int numFolds = data.numInstances();
    //     Random random = new Random(0);
    //     try {
    //       eval.crossValidateModel(classifier , data , numFolds , random);
    //     } catch (Exception e) {
    //       log.error("crossvalidation threw exception", e);
    //       //e.printStackTrace();    
    //       return null;
    //     }
    //     
    //     double mse = eval.errorRate();
    //     return mse;
}

From source file:edu.washington.cs.knowitall.summarization.RedundancyClassifier.java

License:Open Source License

/**
 * /*from w w w  .j a v a  2  s.co m*/
 * @param fstSentence
 * @param sndSentence
 * @return if the sentence are redundant
 * @throws Exception
 */
public boolean redundant(Sentence fstSentence, Sentence sndSentence) {
    if (redundantMap.containsKey(fstSentence.getKey() + "::" + sndSentence.getKey())) {
        return redundantMap.get(fstSentence.getKey() + "::" + sndSentence.getKey());
    }
    // Set up the instances.
    String header = getHeader();
    String features = getFeatures(fstSentence, sndSentence, false, -1);
    StringReader testReader = new StringReader(header + features + ",0.0" + "\n");
    Instances unlabeled = setupInstances(testReader);

    try {
        // label instances
        if (unlabeled.numInstances() > 0) {
            double clsLabel = tree.classifyInstance(unlabeled.instance(0));
            if (clsLabel == 0.0) {
                redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), true);
                redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), true);
                return true;
            } else {
                redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), false);
                redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), false);
                return false;
            }
        }
    } catch (FileNotFoundException e) {
        System.err.println("File not found error");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return false;
}

From source file:edu.washington.cs.knowitall.summarization.RedundancyClassifier.java

License:Open Source License

public double probabilityRedundant(Sentence fstSentence, Sentence sndSentence) {
    // Set up the instances.
    String header = getHeader();//from  ww w  .j  a v  a  2s  . c  o  m
    String features = getFeatures(fstSentence, sndSentence, false, -1);
    StringReader testReader = new StringReader(header + features + ",0.0" + "\n");
    Instances unlabeled = setupInstances(testReader);

    try {
        // label instances
        return tree.distributionForInstance(unlabeled.instance(0))[0];
    } catch (FileNotFoundException e) {
        System.err.println("File not found error");
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
    return -1.0;
}