List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:Pair.java
License:Open Source License
private void doCV(Instances targetData) throws Exception { System.out.println();/* w ww. ja v a 2s.co m*/ System.out.flush(); int numSourceInstances = m_SourceInstances.numInstances(); int numInstances = targetData.numInstances() + numSourceInstances; numTargetInstances = numInstances - numSourceInstances; double weightSource, weightTarget; double initialSourceFraction; double[] weights = new double[numInstances]; Random randomInstance = new Random(1); Instances data = new Instances(m_SourceInstances, 0, numSourceInstances); // Now add the target data, shallow copying the instances as they are added // so it doesn't mess up the weights for anyone else Enumeration enumer = targetData.enumerateInstances(); while (enumer.hasMoreElements()) { Instance instance = (Instance) enumer.nextElement(); data.add(instance); } if (sourceRatio < 0) { //weight all equally weightSource = weightTarget = 1.0/*/numInstances*/; initialSourceFraction = numSourceInstances / (double) numInstances; } else { double totalWeight = 1 + sourceRatio; weightSource = sourceRatio / totalWeight/*/numSourceInstances*/; weightTarget = 1.0 / totalWeight/*/numTargetInstances*/; initialSourceFraction = weightSource; } for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(weightSource); else instance.setWeight(weightTarget); } if (doFraction) { for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) { sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular] if (sourceFraction > .995) sourceFraction = .995; //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances; double sourceWeight = (sourceFraction * numTargetInstances) / (numSourceInstances * (1 - sourceFraction)); for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(sourceWeight); else instance.setWeight(1); } buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); } } else { for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration -1:" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); for (int it = 0; it < sourceIterations; it++) { Instances sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] sweights = new double[data.numInstances()]; for (int i = 0; i < sweights.length; i++) { sweights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, sweights); } try { m_Classifiers[it].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations); setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false); for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); } } }
From source file:Pair.java
License:Open Source License
/** * Sets the weights for the next iteration. *//* w w w . ja v a2 s .c o m*/ protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances, boolean isFinal) throws Exception { Enumeration enu = trainData.enumerateInstances(); int instNum = 0; double[] errors = new double[trainData.numInstances()]; double max = 0; int i = 0; while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue()); if (i >= numSourceInstances && errors[i] > max) max = errors[i]; i++; } if (max == 0) return -1; //get avg loss double loss = 0; double initialTWeightSum = 0; double allWeightSum = 0; for (int j = 0; j < errors.length; j++) { errors[j] /= max; Instance instance = trainData.instance(j); loss += instance.weight() * errors[j]; if (j >= numSourceInstances) { //loss += instance.weight() * errors[j]; initialTWeightSum += instance.weight(); } allWeightSum += instance.weight(); } //loss /= weightSum; loss /= allWeightSum; targetWeight = initialTWeightSum / allWeightSum; /* if (!isFinal){ System.out.println("Target weight: " + targetWeight); System.out.println("max: " + max); System.out.println("avg error: " + loss * max); System.out.println("Loss: " + loss); } */ double beta; if (fixedBeta) beta = 0.4 / 0.6; else { if (isFinal && loss > 0.499)//bad, so quit //return -1; loss = 0.499; //since we're doing CV, no reason to quit beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2; } double tWeightSum = 0; if (!isFinal) { //need to find b so that weight of source be sourceFraction*num source //do binary search double goal = sourceFraction * errors.length; double bMin = .001; double bMax = .999; double b; double sourceSum = 0; while (bMax - bMin > .001) { b = (bMax + bMin) / 2; double sum = 0; for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); sum += Math.pow(b, errors[j]) * instance.weight(); } if (sum > goal) bMax = b; else bMin = b; } b = (bMax + bMin) / 2; //System.out.println(b); for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(bMin, errors[j])); sourceSum += instance.weight(); } //now adjust target weights goal = errors.length - sourceSum; double m = goal / initialTWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * m); } } else {//final if (!doUpsource) { //modify only target weights for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = initialTWeightSum / tWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } else { //modify all weights for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = errors.length / tWeightSum; for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } } return beta; }
From source file:GrowTree.java
public boolean homogeneous(Instances D) { distribution = new double[D.numClasses()]; Enumeration eninst = D.enumerateInstances(); while (eninst.hasMoreElements()) { Instance ele = (Instance) eninst.nextElement(); distribution[(int) ele.classValue()]++; }//from w ww . ja v a2 s . com int cnt = 0; for (int i = 0; i < D.numClasses(); i++) { if (distribution[i] > 0) cnt++; } if (cnt <= 1) // if all instances are of single class return true; else return false; }
From source file:GrowTree.java
double label(Instances D) { Enumeration eninst = D.enumerateInstances(); Instance ele = (Instance) eninst.nextElement(); return ele.classValue(); }
From source file:GrowTree.java
Attribute bestSplit(Instances D) { double imin = 1.0; Attribute fbest = null;//from w ww .j a v a 2 s. co m Enumeration enat = D.enumerateAttributes(); while (enat.hasMoreElements()) { Attribute a = (Attribute) enat.nextElement(); //split D into subsets d1 to dn based on values vi based on features Instances[] split = new Instances[a.numValues()]; for (int i = 0; i < a.numValues(); i++) { split[i] = new Instances(D, D.numInstances()); } Enumeration x = D.enumerateInstances(); while (x.hasMoreElements()) { Instance in = (Instance) x.nextElement(); split[(int) in.value(a)].add(in); } for (int i = 0; i < split.length; i++) { split[i].compactify(); } for (int i = 0; i < a.numValues(); i++) { if (imp(split[i]) < imin) { imin = imp(split[i]); fbest = a; //evaluate the best feature to make root } } } return fbest; }
From source file:GrowTree.java
public double imp(Instances data) { double localdistribution[] = new double[data.numClasses()]; Enumeration eninst = data.enumerateInstances(); while (eninst.hasMoreElements()) { Instance ele = (Instance) eninst.nextElement(); localdistribution[(int) ele.classValue()]++; }//from w w w .j a v a2 s.c o m return imp; }
From source file:ID3Chi.java
License:Open Source License
private void MakeALeaf(Instances data) { data.deleteWithMissing(m_Attribute); if (data.numInstances() == 0) { SetNullDistribution(data);//ww w . j a va 2 s. co m return; } m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); // set m_Attribute to null to mark this node as a leaf m_Attribute = null; }
From source file:ID3Chi.java
License:Open Source License
private double[] GetClassCounts(Instances data) { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; }//from w ww .j a va2s . co m return classCounts; }
From source file:ID3Chi.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data//from w ww.j a v a2 s . c o m * the data which is to be split * @param att * the attribute to be used for splitting * @return the sets of instances produced by the split */ private Instances[] splitData(Instances data, Attribute att) { // [att.numValues()] is location for "unknown" values Instances[] subset = new Instances[att.numValues() + 1]; for (int j = 0; j <= att.numValues(); j++) { subset[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) { subset[att.numValues()].add(inst); } else { subset[(int) inst.value(att)].add(inst); } } for (int i = 0; i < subset.length; i++) { subset[i].compactify(); } return subset; }
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric.//ww w. j a v a2 s .co m * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }