List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. *//*ww w . j a va 2 s . c o m*/ public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Determines whether a data set has equal class priors. * @param data//from w w w . ja v a2 s. c o m * @return whether the data set has equal class priors */ public static boolean equalClassPriors(Instances data) { double[] counts = new double[data.numClasses()]; int numInstances = data.numInstances(); for (int i = 0; i < numInstances; i++) { Instance inst = data.instance(i); int classValueIdx = (int) Math.round(inst.classValue()); counts[classValueIdx] = counts[classValueIdx] + 1; } // compute the mean double meanCount = MathUtils.sum(counts) / counts.length; double[] meanArray = new double[counts.length]; for (int i = 0; i < meanArray.length; i++) { meanArray[i] = meanCount; } // compute the rmse double rmse = MathUtils.rmse(meanArray, counts); // compute 2.5% of the possible double deviationAllowed = Math.ceil(0.025 * meanCount); if (rmse <= deviationAllowed) return true; else return false; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Gets the weights of each instance in a dataset as an array. * @param data the dataset of instances// w ww. ja v a2s . co m * @return the weights of the instances as an array. */ public static double[] getWeights(Instances data) { int numInstances = data.numInstances(); double[] weights = new double[numInstances]; for (int instIdx = 0; instIdx < numInstances; instIdx++) { weights[instIdx] = data.instance(instIdx).weight(); } return weights; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts a set of instances to svm-light format * @param data the weka instances/*w w w .j a v a2 s . co m*/ * @return the weka instances in svm-light format */ public static String arffToSVMLight(Instances data, SVMLightLabelFormat labelFormat) { if (labelFormat == SVMLightLabelFormat.CLASSIFICATION && data.numClasses() != 2) { throw new IllegalArgumentException( "SVM-light classification label format requires that the data contain only two classes."); } String str = ""; String endline = System.getProperty("line.separator"); int numInstances = data.numInstances(); int numAttributes = data.numAttributes(); int classAttIdx = data.classIndex(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); // convert the instance label if (labelFormat == SVMLightLabelFormat.CLASSIFICATION) { str += (inst.classValue() == 0) ? "-1" : "1"; } else { str += inst.classValue(); } str += " "; // convert each feature for (int attIdx = 0; attIdx < numAttributes; attIdx++) { // skip the class attribute if (attIdx == classAttIdx) continue; str += (attIdx + 1) + ":" + inst.value(attIdx) + " "; } // append the instance info string str += "# " + instIdx; str += endline; } return str; }
From source file:edu.utexas.cs.tactex.CostCurvesPredictorService.java
License:Open Source License
private double makeMwhPrediction(WekaLinRegData wekaData, int futureTimeslot, double myMwh, double competitorMwh) { // predict/*from www .j a va2 s .c om*/ ArrayList<Double> features = configuratorFactoryService.getCostCurvesDataProcessor().extractFeatures(myMwh, competitorMwh); Instance inst = RegressionUtils.createInstance(features); ArrayList<Instance> instArr = new ArrayList<Instance>(); instArr.add(inst); ArrayList<String> attrNames = configuratorFactoryService.getCostCurvesDataProcessor().getFeatureNames(); Instances x0_features = RegressionUtils.createInstances(instArr, attrNames); Instances x0_featuresNorm = RegressionUtils.featureNormalize(x0_features, wekaData.getStandardize()); Instances x0_final = RegressionUtils.addYforWeka(x0_featuresNorm); // no yvals, missing values double prediction = 0; try { prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0)); } catch (Exception e) { log.error("passed CV but cannot predict on new point. falling back to array"); log.error("Exception is", e); return getAvgBootstrapPricePerMwh(futureTimeslot); } return prediction; }
From source file:edu.utexas.cs.tactex.OpponentPredictorService.java
License:Open Source License
private ArrayList<Double> makeRatesPrediction(WekaLinRegData wekaData, double state, double myBestRate, double opponentBestRate) { ArrayList<Double> predictedActions = new ArrayList<Double>(); // predict/*w w w . ja v a 2s . com*/ ArrayList<Double> features = new ArrayList<Double>(); features.add(state); // Instance inst = RegressionUtils.createInstance(features); ArrayList<Instance> instArr = new ArrayList<Instance>(); instArr.add(inst); ArrayList<String> attrNames = RAW_ATTR_NAMES; Instances x0_features = RegressionUtils.createInstances(instArr, attrNames); Instances x0_featuresNorm = RegressionUtils.featureNormalize(x0_features, wekaData.getStandardize()); Instances x0_final = RegressionUtils.addYforWeka(x0_featuresNorm); // no yvals, missing values double prediction = 0; try { prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0)); predictedActions.add(prediction); } catch (Exception e) { log.error("passed CV but cannot predict on new point. falling back to array"); log.error("Exception is", e); } return convertActionsToRates(predictedActions, myBestRate, opponentBestRate); }
From source file:edu.utexas.cs.tactex.subscriptionspredictors.PolyRegCust.java
License:Open Source License
@Override public Double predictNumSubs(double candidateEval, TreeMap<Double, Double> e2n, CustomerInfo customer, int timeslot) { int maxDegree = 8; WekaLinRegData wekaData = retrieveOrCreateWekaData(e2n, maxDegree, customer, timeslot); if (null == wekaData) { return null; // errors should have been printed inside }/*from w w w .ja v a 2 s . c o m*/ // predict Double[] x0 = new Double[] { candidateEval }; Instances x0_poly = RegressionUtils.polyFeatures1D(x0, maxDegree); log.info("x0_poly " + x0_poly); Instances x0_polynorm = RegressionUtils.featureNormalize(x0_poly, wekaData.getStandardize()); log.info("x0_polynorm " + x0_polynorm); Instances x0_final = RegressionUtils.addYforWeka(x0_polynorm); // no yvals, missing values log.info("x0_final " + x0_final); double prediction = 0; try { prediction = wekaData.getLinearRegression().classifyInstance(x0_final.instance(0)); log.info("prediction " + prediction + " => " + Math.max(0, (int) Math.round(prediction))); } catch (Exception e) { log.error("PolyReg passed CV but cannot predict on new point. falling back to interpolateOrNN()"); log.error("Exception is", e); log.error("e2n: " + e2n.toString()); log.error("candidateEval " + candidateEval); return null; } log.info("PolyReg succeeded"); // cast to int, and cannot be negative return Math.max(0.0, Math.round(prediction)); }
From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java
License:Open Source License
public static Double leaveOneOutErrorLinRegLambda(double lambda, Instances data) { // MANUAL /*from www . j av a2s . c om*/ // create a linear regression classifier with Xy_polynorm data LinearRegression linreg = createLinearRegression(); linreg.setRidge(lambda); double mse = 0; for (int i = 0; i < data.numInstances(); ++i) { log.info("fold " + i); Instances train = data.trainCV(data.numInstances(), i); log.info("train"); Instances test = data.testCV(data.numInstances(), i); log.info("test"); double actualY = data.instance(i).classValue(); log.info("actualY"); try { linreg.buildClassifier(train); log.info("buildClassifier"); } catch (Exception e) { log.error("failed to build classifier in cross validation", e); return null; } double predictedY = 0; try { predictedY = linreg.classifyInstance(test.instance(0)); log.info("predictedY"); } catch (Exception e) { log.error("failed to classify in cross validation", e); return null; } double error = predictedY - actualY; log.info("error " + error); mse += error * error; log.info("mse " + mse); } if (data.numInstances() == 0) { log.error("no instances in leave-one-out data"); return null; } mse /= data.numInstances(); log.info("mse " + mse); return mse; // // USING WEKA // // // create evaluation object // Evaluation eval = null; // try { // eval = new Evaluation(data); // } catch (Exception e) { // log.error("weka Evaluation() creation threw exception", e); // //e.printStackTrace(); // return null; // } // // // create a linear regression classifier with Xy_polynorm data // LinearRegression linreg = createLinearRegression(); // linreg.setRidge(lambda); // // try { // // linreg.buildClassifier(data); // // } catch (Exception e) { // // log.error("FAILED: linear regression threw exception", e); // // //e.printStackTrace(); // // return null; // // } // // // initialize the evaluation object // Classifier classifier = linreg; // int numFolds = data.numInstances(); // Random random = new Random(0); // try { // eval.crossValidateModel(classifier , data , numFolds , random); // } catch (Exception e) { // log.error("crossvalidation threw exception", e); // //e.printStackTrace(); // return null; // } // // double mse = eval.errorRate(); // return mse; }
From source file:edu.washington.cs.knowitall.summarization.RedundancyClassifier.java
License:Open Source License
/** * /*from w w w .j a v a 2 s.co m*/ * @param fstSentence * @param sndSentence * @return if the sentence are redundant * @throws Exception */ public boolean redundant(Sentence fstSentence, Sentence sndSentence) { if (redundantMap.containsKey(fstSentence.getKey() + "::" + sndSentence.getKey())) { return redundantMap.get(fstSentence.getKey() + "::" + sndSentence.getKey()); } // Set up the instances. String header = getHeader(); String features = getFeatures(fstSentence, sndSentence, false, -1); StringReader testReader = new StringReader(header + features + ",0.0" + "\n"); Instances unlabeled = setupInstances(testReader); try { // label instances if (unlabeled.numInstances() > 0) { double clsLabel = tree.classifyInstance(unlabeled.instance(0)); if (clsLabel == 0.0) { redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), true); redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), true); return true; } else { redundantMap.put(fstSentence.getKey() + "::" + sndSentence.getKey(), false); redundantMap.put(sndSentence.getKey() + "::" + fstSentence.getKey(), false); return false; } } } catch (FileNotFoundException e) { System.err.println("File not found error"); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return false; }
From source file:edu.washington.cs.knowitall.summarization.RedundancyClassifier.java
License:Open Source License
public double probabilityRedundant(Sentence fstSentence, Sentence sndSentence) { // Set up the instances. String header = getHeader();//from ww w .j a v a 2s . c o m String features = getFeatures(fstSentence, sndSentence, false, -1); StringReader testReader = new StringReader(header + features + ",0.0" + "\n"); Instances unlabeled = setupInstances(testReader); try { // label instances return tree.distributionForInstance(unlabeled.instance(0))[0]; } catch (FileNotFoundException e) { System.err.println("File not found error"); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return -1.0; }