List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private Instances[] splitData(Instances data, int attIndex, double splitPoint) throws Exception { Instances[] subsets = new Instances[2]; subsets[0] = new Instances(data, 0); subsets[1] = new Instances(data, 0); // changed on 7 Feb 2013, because for some LR datasets, the Alpo returns NaN int halfPoint = (int) (data.numInstances() * 0.50); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.value(attIndex) <= splitPoint && subsets[0].numInstances() < halfPoint) { subsets[0].add(inst);//from w ww. ja v a 2 s .c o m } else { subsets[1].add(inst); } } if (subsets[1].numInstances() == 0) { subsets[1].add(subsets[0].instance(0)); } if (subsets[0].numInstances() == 0) { subsets[0].add(subsets[1].instance(0)); } return subsets; // following were used before 7 Feb 2013 // for (int i = 0; i < data.numInstances(); i++) { // Instance inst = data.instance(i); // if (inst.value(attIndex) <= splitPoint) { // subsets[0].add(inst); // } else { // subsets[1].add(inst); // } // } // // if (subsets[1].numInstances() == 0) { // subsets[1].add(subsets[0].instance(0)); // } // // if (subsets[0].numInstances() == 0) { // subsets[0].add(subsets[1].instance(0)); // } // return subsets; }
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private double getMedian2(Instances data, int attIndex) throws Exception { double[] numArray = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); numArray[i] = inst.value(attIndex); }//from w ww .jav a2 s .co m Arrays.sort(numArray); double median; if (numArray.length % 2 == 0) { median = ((double) numArray[numArray.length / 2] + (double) numArray[numArray.length / 2 + 1]) / 2; } else { median = (double) numArray[numArray.length / 2]; } return median; }
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private double getMedian(Instances data, int attIndex) throws Exception { if (false) {// ww w.ja va 2 s . com return getMedian2(data, attIndex); // added 07-july 2013; actually they are the same // removed 17/07/2013 } DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); stats.addValue(inst.value(attIndex)); } double median = stats.getPercentile(50); return median; }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>(); m_AlgoPairs = new ArrayList<String>(); m_NumLabels = Tools.getNumberTargets(data); // build pb datasets for (int a = 0; a < m_NumLabels; a++) { for (int b = 0; b < m_NumLabels; b++) { String pairStr = a + "|" + b; if (!hasPair(m_AlgoPairs, pairStr) && a != b) { m_AlgoPairs.add(pairStr); Instances d = new Instances(data); d.setClassIndex(-1);//w ww . j a va2 s .co m d.deleteAttributeAt(d.numAttributes() - 1); weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add(); add.setInputFormat(d); add.setOptions(weka.core.Utils .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last")); d = Filter.useFilter(d, add); d.setClassIndex(d.numAttributes() - 1); for (int i = 0; i < d.numInstances(); i++) { Instance metaInst = (Instance) data.instance(i); Instance inst = d.instance(i); double[] rankVector = Tools.getTargetVector(metaInst); double rank_a = rankVector[a]; double rank_b = rankVector[b]; if (rank_a < rank_b) { inst.setClassValue(0.0); } else { inst.setClassValue(1.0); } } //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO(); //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\""; //cls.setOptions(weka.core.Utils.splitOptions(ops)); //cls.buildClassifier(d); //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic(); //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48(); //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR(); weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump(); cls.buildClassifier(d); m_Classifiers.add(cls); m_BaseClassifierName = cls.getClass().getSimpleName(); m_Add = add; } } } }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public double[] recommendRanking(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);//from ww w .j a va2s . c om tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < predRanking.length; i++) { predRanking[i] = m_NumLabels - 1; } for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= 1; } predRanking = Tools.doubleArrayToRanking(predRanking); return predRanking; }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
public double[] recommendRanking2(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);/*from w w w .j a v a 2s . c om*/ tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0]; String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= predProb; } return Tools.doubleArrayToRanking(predRanking); }
From source file:fantail.algorithms.RankingViaRegression.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { Instances workingData = new Instances(data); //Instance instTemp = workingData.instance(0); //m_LastFeatureIndex = workingData.numAttributes() - 1; m_NumFeatures = workingData.numAttributes() - 1; m_NumTargets = Tools.getNumberTargets(data); m_Classifiers = new AbstractClassifier[m_NumTargets]; for (int i = 0; i < m_NumTargets; i++) { weka.classifiers.functions.LinearRegression lr = new weka.classifiers.functions.LinearRegression(); m_Classifiers[i] = AbstractClassifier.makeCopy(lr); }/*from w w w . j av a 2 s. com*/ Instances[] trainingSets = new Instances[m_NumTargets]; for (int t = 0; t < m_NumTargets; t++) { ArrayList attributes = new ArrayList(); for (int i = 0; i < m_NumFeatures; i++) { attributes.add(new Attribute(workingData.attribute(i).name())); } String targetName = "att-" + (t + 1); attributes.add(new Attribute(targetName)); trainingSets[t] = new Instances("data-" + targetName, attributes, 0); for (int j = 0; j < workingData.numInstances(); j++) { Instance metaInst = workingData.instance(j); double[] ranking = Tools.getTargetVector(metaInst); double[] values = new double[trainingSets[t].numAttributes()]; for (int m = 0; m < (trainingSets[t].numAttributes() - 1); m++) { values[m] = metaInst.value(m); } values[values.length - 1] = ranking[t]; trainingSets[t].add(new DenseInstance(1.0, values)); } trainingSets[t].setClassIndex(trainingSets[t].numAttributes() - 1); m_Classifiers[t].buildClassifier(trainingSets[t]); } m_TempHeader = new Instances(trainingSets[0], 0); }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private double computeVariance(Instances data) throws Exception { double[][] targets = new double[data.numInstances()][]; for (int i = 0; i < data.numInstances(); i++) { targets[i] = Tools.getTargetVector(data.instance(i)); }/*from www . j a va 2 s. com*/ double sumVar = 0; for (int i = 0; i < m_NumTargetLabels; i++) { double[] target_i = new double[data.numInstances()]; for (int j = 0; j < data.numInstances(); j++) { Instance metaInst = (Instance) data.instance(j); target_i[j] = targets[j][i] * metaInst.weight(); } sumVar += weka.core.Utils.variance(target_i); } return sumVar / m_NumTargetLabels; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private Instances[] splitData(Instances data, int attIndex, double splitPoint) throws Exception { Instances[] subsets = new Instances[2]; subsets[0] = new Instances(data, 0); subsets[1] = new Instances(data, 0); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.value(attIndex) <= splitPoint && (subsets[0].numInstances() <= 0.5 * data.numInstances())) { subsets[0].add(inst);/* w w w .j a v a 2 s . c o m*/ } else { subsets[1].add(inst); } } // TODO: if (subsets[1].numInstances() == 0) { subsets[1].add(subsets[0].instance(0)); } if (subsets[0].numInstances() == 0) { subsets[0].add(subsets[1].instance(0)); } return subsets; }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private Instances[] splitData2(Instances data, int attIndex, double splitPoint) throws Exception { Instances[] subsets = new Instances[2]; subsets[0] = new Instances(data, 0); subsets[1] = new Instances(data, 0); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); if (inst.value(attIndex) <= splitPoint) { subsets[0].add(inst);// ww w . j ava2 s . c om } else { subsets[1].add(inst); } } // TODO: if (subsets[1].numInstances() == 0) { subsets[1].add(subsets[0].instance(0)); } if (subsets[0].numInstances() == 0) { subsets[0].add(subsets[1].instance(0)); } return subsets; }