List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Convert Distribution - Given the posterior across combinations, return the distribution across labels. * <br>/* www.ja va 2 s. c om*/ * TODO Use recombination!!! * @see PSUtils#recombination(double[],int,LabelSet[]) * @param p the posterior of the super classes (combinations), e.g., P([1,3],[2]) = [1,0] * @param L the number of labels * @return the distribution across labels, e.g., P(1,2,3) = [1,0,1] */ @Deprecated public static double[] convertDistribution(double p[], int L, Instances iTemplate) { double y[] = new double[L]; int i = Utils.maxIndex(p); double d[] = toDoubleArray(iTemplate.classAttribute().value(i), L); for (int j = 0; j < d.length; j++) { if (d[j] > 0.0) y[j] = 1.0; } return y; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Convert Distribution - Given the posterior across combinations, return the distribution across labels. * @param p the posterior of the super classes (combinations), e.g., P([1,3],[2]) = [0.3,0.7] * @param L the number of labels//from www .ja v a 2 s . c o m * @return the distribution across labels, e.g., P(1,2,3) = [0.3,0.7,0.3] */ public static final double[] recombination_t(double p[], int L, Instances iTemplate) { double y[] = new double[L]; for (int k = 0; k < p.length; k++) { String d_string = iTemplate.classAttribute().value(k); // e.g. d_string = "[1,3,5]" int d[] = MLUtils.toIntArray(d_string); // e.g. d = [1,3,5] p[k] = 0.5 for (int j : d) { y[j] += p[k]; // e.g., y[0] += d[0] * p[k] = 1 * 0.5 = 0.5 } } return y; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Returns a map of values for this multi-class Instances. For example, values <code>{[2,3,1], [1,0,1, [2,0,1]}</code>. * @param D_ multi-class Instances/* ww w. j av a2 s . c o m*/ * @return a map where map[d] returns an int[] array of all values referred to by the d-th classification. */ public static int[][] extractValues(Instances D_) { int numVals = D_.classAttribute().numValues(); int vMap[][] = new int[numVals][]; for (int d = 0; d < numVals; d++) { vMap[d] = MLUtils.toIntArray(D_.classAttribute().value(d)); } return vMap; }
From source file:meka.gui.dataviewer.DataViewerMainPanel.java
License:Open Source License
/** * displays some properties of the instances *//*from w ww . ja va2 s .co m*/ public void showProperties() { DataPanel panel; ListSelectorDialog dialog; Vector<String> props; Instances inst; panel = getCurrentPanel(); if (panel == null) { return; } inst = panel.getInstances(); if (inst == null) { return; } if (inst.classIndex() < 0) { inst.setClassIndex(inst.numAttributes() - 1); } // get some data props = new Vector<String>(); props.add("Filename: " + panel.getFilename()); props.add("Relation name: " + inst.relationName()); props.add("# of instances: " + inst.numInstances()); props.add("# of attributes: " + inst.numAttributes()); props.add("Class attribute: " + inst.classAttribute().name()); props.add("# of class labels: " + inst.numClasses()); dialog = new ListSelectorDialog(getParentFrame(), new JList(props)); dialog.showDialog(); }
From source file:ml.ann.MultiClassPTR.java
public void initAttributes(Instances instances) { numInstance = instances.numInstances(); numInput = instances.numAttributes(); // Including bias inputInstances = new double[numInstance][numInput];// add values of attributes including bias value Attribute classAttribute = instances.classAttribute(); if (classAttribute.isNominal()) { numOutput = classAttribute.numValues(); } else {//from w w w . ja va 2 s . c o m numOutput = 1; } targetInstances = new double[numInstance][numOutput]; weight = new double[numInput][numOutput]; double rangeMin = 0.0; double rangeMax = 1.0; for (int i = 0; i < numInput; ++i) { for (int j = 0; j < numOutput; ++j) { weight[i][j] = Math.random() * (rangeMax - rangeMin) + rangeMin; } } }
From source file:ml.ann.MultiClassPTR.java
@Override public void buildClassifier(Instances instances) throws Exception { initAttributes(instances);//from ww w .j ava 2s . c o m // REMEMBER: only works if class index is in the last position for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.get(instanceIdx); double[] inputInstance = inputInstances[instanceIdx]; inputInstance[0] = 1.0; // initialize bias value for (int attrIdx = 0; attrIdx < instance.numAttributes() - 1; attrIdx++) { inputInstance[attrIdx + 1] = instance.value(attrIdx); // the first index of input instance is for bias } } // Initialize target values if (instances.classAttribute().isNominal()) { for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.instance(instanceIdx); for (int classIdx = 0; classIdx < instances.numClasses(); classIdx++) { targetInstances[instanceIdx][classIdx] = 0.0; } targetInstances[instanceIdx][(int) instance.classValue()] = 1.0; } } else { for (int instanceIdx = 0; instanceIdx < instances.numInstances(); instanceIdx++) { Instance instance = instances.instance(instanceIdx); targetInstances[instanceIdx][0] = instance.classValue(); } } if (algo == 1) { setActFunction(); buildClassifier(); } else if (algo == 2) { buildClassifier(); } else if (algo == 3) { buildClassifierBatch(); } }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * /*from ww w . jav a 2 s . co m*/ * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:mlpoc.MLPOC.java
/** * @param args the command line arguments */// w w w .j a va 2 s. c o m public static void main(String[] args) { try { // TODO code application logic here BufferedReader br; br = new BufferedReader( new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff")); Instances training_data = new Instances(br); br.close(); training_data.setClassIndex(training_data.numAttributes() - 1); br = new BufferedReader(new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/TestFinal.arff")); Instances testing_data = new Instances(br); br.close(); testing_data.setClassIndex(testing_data.numAttributes() - 1); String summary = training_data.toSummaryString(); int number_samples = training_data.numInstances(); int number_attributes_per_sample = training_data.numAttributes(); System.out.println("Number of attributes in model = " + number_attributes_per_sample); System.out.println("Number of samples = " + number_samples); System.out.println("Summary: " + summary); System.out.println(); J48 j48 = new J48(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(j48); fc.buildClassifier(training_data); System.out.println("Testing instances: " + testing_data.numInstances()); for (int i = 0; i < testing_data.numInstances(); i++) { double pred = fc.classifyInstance(testing_data.instance(i)); String s1 = testing_data.classAttribute().value((int) pred); System.out.println(testing_data.instance(i) + " Predicted value: " + s1); } Evaluation crossValidate = crossValidate( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); DataSource source = new DataSource( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); Instances data = source.getDataSet(); System.out.println(data.numInstances()); data.setClassIndex(data.numAttributes() - 1); // 1. meta-classifier useClassifier(data); // 2. filter useFilter(data); } catch (Exception ex) { Logger.getLogger(MLPOC.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:mlpoc.MLPOC.java
public static Evaluation crossValidate(String filename) { Evaluation eval = null;//from www . j av a 2 s .c o m try { BufferedReader br = new BufferedReader(new FileReader(filename)); // loads data and set class index Instances data = new Instances(br); br.close(); /*File csv=new File(filename); CSVLoader loader = new CSVLoader(); loader.setSource(csv); Instances data = loader.getDataSet();*/ data.setClassIndex(data.numAttributes() - 1); // classifier String[] tmpOptions; String classname = "weka.classifiers.trees.J48 -C 0.25"; tmpOptions = classname.split(" "); classname = "weka.classifiers.trees.J48"; tmpOptions[0] = ""; Classifier cls = (Classifier) Utils.forName(Classifier.class, classname, tmpOptions); // other options int seed = 2; int folds = 10; // randomize data Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) randData.stratify(folds); // perform cross-validation eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(cls); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } // output evaluation System.out.println(); System.out.println("=== Setup ==="); System.out .println("Classifier: " + cls.getClass().getName() + " " + Utils.joinOptions(cls.getOptions())); System.out.println("Dataset: " + data.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + seed); System.out.println(); System.out.println(eval.toSummaryString("Summary for testing", true)); System.out.println("Correctly Classified Instances: " + eval.correct()); System.out.println("Percentage of Correctly Classified Instances: " + eval.pctCorrect()); System.out.println("InCorrectly Classified Instances: " + eval.incorrect()); System.out.println("Percentage of InCorrectly Classified Instances: " + eval.pctIncorrect()); } catch (Exception ex) { System.err.println(ex.getMessage()); } return eval; }
From source file:moa.classifiers.AccuracyWeightedEnsemble.java
License:Open Source License
/** * Computes the weight of a candidate classifier. * @param candidate Candidate classifier. * @param chunk Data chunk of examples./*from w w w . ja v a 2s . c o m*/ * @param numFolds Number of folds in candidate classifier cross-validation. * @param useMseR Determines whether to use the MSEr threshold. * @return Candidate classifier weight. */ protected double computeCandidateWeight(Classifier candidate, Instances chunk, int numFolds) { double candidateWeight = 0.0; Random random = new Random(1); Instances randData = new Instances(chunk); randData.randomize(random); if (randData.classAttribute().isNominal()) { randData.stratify(numFolds); } for (int n = 0; n < numFolds; n++) { Instances train = randData.trainCV(numFolds, n, random); Instances test = randData.testCV(numFolds, n); Classifier learner = candidate.copy(); for (int num = 0; num < train.numInstances(); num++) { learner.trainOnInstance(train.instance(num)); } candidateWeight += computeWeight(learner, test); } double resultWeight = candidateWeight / numFolds; if (Double.isInfinite(resultWeight)) { return Double.MAX_VALUE; } else { return resultWeight; } }