List of usage examples for weka.core Instance weight
public double weight();
From source file:PrincipalComponents.java
License:Open Source License
/** * Convert a pc transformed instance back to the original space * * @param inst the instance to convert/*from w w w.ja v a 2s. c om*/ * @return the processed instance * @throws Exception if something goes wrong */ private Instance convertInstanceToOriginal(Instance inst) throws Exception { double[] newVals = null; if (m_hasClass) { newVals = new double[m_numAttribs + 1]; } else { newVals = new double[m_numAttribs]; } if (m_hasClass) { // class is always appended as the last attribute newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1); } for (int i = 0; i < m_eTranspose[0].length; i++) { double tempval = 0.0; for (int j = 1; j < m_eTranspose.length; j++) { tempval += (m_eTranspose[j][i] * inst.value(j - 1)); } newVals[i] = tempval; if (!m_center) { newVals[i] *= m_stdDevs[i]; } newVals[i] += m_means[i]; } if (inst instanceof SparseInstance) { return new SparseInstance(inst.weight(), newVals); } else { return new DenseInstance(inst.weight(), newVals); } }
From source file:PrincipalComponents.java
License:Open Source License
/** * Transform an instance in original (unormalized) format. Convert back to * the original space if requested./*from w w w .j av a2 s . c o m*/ * * @param instance an instance in the original (unormalized) format * @return a transformed instance * @throws Exception if instance cant be transformed */ @Override public Instance convertInstance(Instance instance) throws Exception { if (m_eigenvalues == null) { throw new Exception("convertInstance: Principal components not " + "built yet"); } double[] newVals = new double[m_outputNumAtts]; Instance tempInst = (Instance) instance.copy(); if (!instance.dataset().equalHeaders(m_trainHeader)) { throw new Exception("Can't convert instance: header's don't match: " + "PrincipalComponents\n" + instance.dataset().equalHeadersMsg(m_trainHeader)); } m_replaceMissingFilter.input(tempInst); m_replaceMissingFilter.batchFinished(); tempInst = m_replaceMissingFilter.output(); /* * if (m_normalize) { m_normalizeFilter.input(tempInst); * m_normalizeFilter.batchFinished(); tempInst = * m_normalizeFilter.output(); } */ m_nominalToBinFilter.input(tempInst); m_nominalToBinFilter.batchFinished(); tempInst = m_nominalToBinFilter.output(); if (m_attributeFilter != null) { m_attributeFilter.input(tempInst); m_attributeFilter.batchFinished(); tempInst = m_attributeFilter.output(); } if (!m_center) { m_standardizeFilter.input(tempInst); m_standardizeFilter.batchFinished(); tempInst = m_standardizeFilter.output(); } else { m_centerFilter.input(tempInst); m_centerFilter.batchFinished(); tempInst = m_centerFilter.output(); } if (m_hasClass) { newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex()); } double cumulative = 0; int numAttAdded = 0; for (int i = m_numAttribs - 1; i >= 0; i--) { double tempval = 0.0; for (int j = 0; j < m_numAttribs; j++) { tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j)); } newVals[m_numAttribs - i - 1] = tempval; cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } if (numAttAdded > m_maxNumAttr) { break; } numAttAdded++; } if (!m_transBackToOriginal) { if (instance instanceof SparseInstance) { return new SparseInstance(instance.weight(), newVals); } else { return new DenseInstance(instance.weight(), newVals); } } else { if (instance instanceof SparseInstance) { return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals)); } else { return convertInstanceToOriginal(new DenseInstance(instance.weight(), newVals)); } } }
From source file:REPTree.java
License:Open Source License
/** * Builds classifier.//from w w w . j a va2 s.c om * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:Pair.java
License:Open Source License
/** * Sets the weights for the next iteration. *//* w w w . j a v a 2s .c o m*/ protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances, boolean isFinal) throws Exception { Enumeration enu = trainData.enumerateInstances(); int instNum = 0; double[] errors = new double[trainData.numInstances()]; double max = 0; int i = 0; while (enu.hasMoreElements()) { Instance instance = (Instance) enu.nextElement(); errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue()); if (i >= numSourceInstances && errors[i] > max) max = errors[i]; i++; } if (max == 0) return -1; //get avg loss double loss = 0; double initialTWeightSum = 0; double allWeightSum = 0; for (int j = 0; j < errors.length; j++) { errors[j] /= max; Instance instance = trainData.instance(j); loss += instance.weight() * errors[j]; if (j >= numSourceInstances) { //loss += instance.weight() * errors[j]; initialTWeightSum += instance.weight(); } allWeightSum += instance.weight(); } //loss /= weightSum; loss /= allWeightSum; targetWeight = initialTWeightSum / allWeightSum; /* if (!isFinal){ System.out.println("Target weight: " + targetWeight); System.out.println("max: " + max); System.out.println("avg error: " + loss * max); System.out.println("Loss: " + loss); } */ double beta; if (fixedBeta) beta = 0.4 / 0.6; else { if (isFinal && loss > 0.499)//bad, so quit //return -1; loss = 0.499; //since we're doing CV, no reason to quit beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2; } double tWeightSum = 0; if (!isFinal) { //need to find b so that weight of source be sourceFraction*num source //do binary search double goal = sourceFraction * errors.length; double bMin = .001; double bMax = .999; double b; double sourceSum = 0; while (bMax - bMin > .001) { b = (bMax + bMin) / 2; double sum = 0; for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); sum += Math.pow(b, errors[j]) * instance.weight(); } if (sum > goal) bMax = b; else bMin = b; } b = (bMax + bMin) / 2; //System.out.println(b); for (int j = 0; j < numSourceInstances; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(bMin, errors[j])); sourceSum += instance.weight(); } //now adjust target weights goal = errors.length - sourceSum; double m = goal / initialTWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * m); } } else {//final if (!doUpsource) { //modify only target weights for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = initialTWeightSum / tWeightSum; for (int j = numSourceInstances; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } else { //modify all weights for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * Math.pow(beta, -errors[j])); tWeightSum += instance.weight(); } double weightSumInverse = errors.length / tWeightSum; for (int j = 0; j < errors.length; j++) { Instance instance = trainData.instance(j); instance.setWeight(instance.weight() * weightSumInverse); } } } return beta; }
From source file:REPRandomTree.java
License:Open Source License
/** * Builds classifier./*from ww w.j a v a2 s .c om*/ * * @param data the data to train with * @throws Exception if building fails */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_Seed); m_zeroR = null; if (data.numAttributes() == 1) { m_zeroR = new ZeroR(); m_zeroR.buildClassifier(data); return; } // Randomize and stratify data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(m_NumFolds); } // Split data into training and pruning set Instances train = null; Instances prune = null; if (!m_NoPruning) { train = data.trainCV(m_NumFolds, 0, random); prune = data.testCV(m_NumFolds, 0); } else { train = data; } // Create array of sorted indices and weights int[][][] sortedIndices = new int[1][train.numAttributes()][0]; double[][][] weights = new double[1][train.numAttributes()][0]; double[] vals = new double[train.numInstances()]; for (int j = 0; j < train.numAttributes(); j++) { if (j != train.classIndex()) { weights[0][j] = new double[train.numInstances()]; if (train.attribute(j).isNominal()) { // Handling nominal attributes. Putting indices of // instances with missing values at the end. sortedIndices[0][j] = new int[train.numInstances()]; int count = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (!inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (inst.isMissing(j)) { sortedIndices[0][j][count] = i; weights[0][j][count] = inst.weight(); count++; } } } else { // Sorted indices are computed for numeric attributes for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); vals[i] = inst.value(j); } sortedIndices[0][j] = Utils.sort(vals); for (int i = 0; i < train.numInstances(); i++) { weights[0][j][i] = train.instance(sortedIndices[0][j][i]).weight(); } } } } // Compute initial class counts double[] classProbs = new double[train.numClasses()]; double totalWeight = 0, totalSumSquared = 0; for (int i = 0; i < train.numInstances(); i++) { Instance inst = train.instance(i); if (data.classAttribute().isNominal()) { classProbs[(int) inst.classValue()] += inst.weight(); totalWeight += inst.weight(); } else { classProbs[0] += inst.classValue() * inst.weight(); totalSumSquared += inst.classValue() * inst.classValue() * inst.weight(); totalWeight += inst.weight(); } } m_Tree = new Tree(); double trainVariance = 0; if (data.classAttribute().isNumeric()) { trainVariance = m_Tree.singleVariance(classProbs[0], totalSumSquared, totalWeight) / totalWeight; classProbs[0] /= totalWeight; } // Build tree m_Tree.buildTree(sortedIndices, weights, train, totalWeight, classProbs, new Instances(train, 0), m_MinNum, m_MinVarianceProp * trainVariance, 0, m_MaxDepth, m_FeatureFrac, random); // Insert pruning data and perform reduced error pruning if (!m_NoPruning) { m_Tree.insertHoldOutSet(prune); m_Tree.reducedErrorPrune(); m_Tree.backfitHoldOutSet(); } }
From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java
License:Open Source License
/** * Matches the input instance against the header. * * @param input the Instance to align to the header * @return the aligned Instance/*from w w w . j a va2 s . c o m*/ */ protected Instance match(Instance input) { Instance result; double[] values; int i; values = new double[m_Dataset.numAttributes()]; for (i = 0; i < m_Dataset.numAttributes(); i++) { values[i] = Utils.missingValue(); switch (m_Dataset.attribute(i).type()) { case Attribute.NUMERIC: case Attribute.DATE: values[i] = input.value(i); break; case Attribute.NOMINAL: if (m_Dataset.attribute(i).indexOfValue(input.stringValue(i)) != -1) values[i] = m_Dataset.attribute(i).indexOfValue(input.stringValue(i)); break; case Attribute.STRING: values[i] = m_Dataset.attribute(i).addStringValue(input.stringValue(i)); break; case Attribute.RELATIONAL: values[i] = m_Dataset.attribute(i).addRelation(input.relationalValue(i)); break; default: throw new IllegalStateException( "Unhandled attribute type: " + Attribute.typeToString(m_Dataset.attribute(i).type())); } } if (input instanceof SparseInstance) result = new SparseInstance(input.weight(), values); else result = new DenseInstance(input.weight(), values); result.setDataset(m_Dataset); // fix class index, if necessary if ((input.classIndex() != m_Dataset.classIndex()) && (m_Dataset.classIndex() < 0)) m_Dataset.setClassIndex(input.classIndex()); return result; }
From source file:adams.flow.transformer.WekaInstanceBuffer.java
License:Open Source License
/** * Executes the flow item.//from w w w .j a va2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance[] insts; Instance inst; double[] values; int i; int n; boolean updated; result = null; if (m_Operation == Operation.INSTANCE_TO_INSTANCES) { if (m_InputToken.getPayload() instanceof Instance) { insts = new Instance[] { (Instance) m_InputToken.getPayload() }; } else { insts = (Instance[]) m_InputToken.getPayload(); } for (n = 0; n < insts.length; n++) { inst = insts[n]; if ((m_Buffer != null) && m_CheckHeader) { if (!m_Buffer.equalHeaders(inst.dataset())) { getLogger().info("Header changed, resetting buffer"); m_Buffer = null; } } // buffer instance if (m_Buffer == null) m_Buffer = new Instances(inst.dataset(), 0); // we need to make sure that string and relational values are in our // buffer header and update the current Instance accordingly before // buffering it values = inst.toDoubleArray(); updated = false; for (i = 0; i < values.length; i++) { if (inst.isMissing(i)) continue; if (inst.attribute(i).isString()) { values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i)); updated = true; } else if (inst.attribute(i).isRelationValued()) { values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i)); updated = true; } } if (updated) { if (inst instanceof SparseInstance) { inst = new SparseInstance(inst.weight(), values); } else if (inst instanceof BinarySparseInstance) { inst = new BinarySparseInstance(inst.weight(), values); } else { if (!(inst instanceof DenseInstance)) { getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), " + "defaulting to " + DenseInstance.class.getName()); } inst = new DenseInstance(inst.weight(), values); } } else { inst = (Instance) inst.copy(); } m_Buffer.add(inst); } if (m_Buffer.numInstances() % m_Interval == 0) { m_OutputToken = new Token(m_Buffer); if (m_ClearBuffer) m_Buffer = null; } } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) { m_Buffer = (Instances) m_InputToken.getPayload(); m_Iterator = m_Buffer.iterator(); } else { throw new IllegalStateException("Unhandled operation: " + m_Operation); } return result; }
From source file:adams.flow.transformer.WekaInstancesAppend.java
License:Open Source License
/** * Executes the flow item./*w ww .j a va2s . c om*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; String[] filesStr; File[] files; int i; int n; Instances[] inst; Instances full; String msg; StringBuilder relation; double[] values; result = null; // get filenames files = null; inst = null; if (m_InputToken.getPayload() instanceof String[]) { filesStr = (String[]) m_InputToken.getPayload(); files = new File[filesStr.length]; for (i = 0; i < filesStr.length; i++) files[i] = new PlaceholderFile(filesStr[i]); } else if (m_InputToken.getPayload() instanceof File[]) { files = (File[]) m_InputToken.getPayload(); } else if (m_InputToken.getPayload() instanceof Instances[]) { inst = (Instances[]) m_InputToken.getPayload(); } else { throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass()); } // load data? if (files != null) { inst = new Instances[files.length]; for (i = 0; i < files.length; i++) { try { inst[i] = DataSource.read(files[i].getAbsolutePath()); } catch (Exception e) { result = handleException("Failed to load dataset: " + files[i], e); break; } } } // test compatibility if (result == null) { for (i = 0; i < inst.length - 1; i++) { for (n = i + 1; n < inst.length; n++) { if ((msg = inst[i].equalHeadersMsg(inst[n])) != null) { result = "Dataset #" + (i + 1) + " and #" + (n + 1) + " are not compatible:\n" + msg; break; } } if (result != null) break; } } // append if (result == null) { full = new Instances(inst[0]); relation = new StringBuilder(inst[0].relationName()); for (i = 1; i < inst.length; i++) { relation.append("+" + inst[i].relationName()); for (Instance row : inst[i]) { values = row.toDoubleArray(); for (n = 0; n < values.length; n++) { if (row.attribute(n).isString()) values[n] = full.attribute(n).addStringValue(row.stringValue(n)); else if (row.attribute(n).isRelationValued()) values[n] = full.attribute(n).addRelation(row.relationalValue(n)); } if (row instanceof SparseInstance) row = new SparseInstance(row.weight(), values); else row = new DenseInstance(row.weight(), values); full.add(row); } } full.setRelationName(relation.toString()); m_OutputToken = new Token(full); } return result; }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Evaluates the classifier on a single instance and records the prediction * (if the class is nominal).//from ww w . j a va2s . c o m * * @param classifier machine learning classifier * @param instance the test instance to be classified * @return the prediction made by the clasifier * @throws Exception if model could not be evaluated successfully or the data * contains string attributes */ public double evaluateModelOnceAndRecordPrediction(List<LibSVM> classifier, List<Double> classifierWeight, Instance instance) throws Exception { Instance classMissing = (Instance) instance.copy(); double pred = 0; classMissing.setDataset(instance.dataset()); classMissing.setClassMissing(); if (m_ClassIsNominal) { if (m_Predictions == null) { m_Predictions = new FastVector(); } List<double[]> prob = new ArrayList<double[]>();// double[] finalProb = new double[instance.numClasses()]; for (int i = 0; i < classifier.size(); i++) { double[] dist = classifier.get(i).distributionForInstance(classMissing);// prob.add(dist); } for (int i = 0; i < finalProb.length; i++) { for (int j = 0; j < classifier.size(); j++) { finalProb[i] += prob.get(j)[i] * classifierWeight.get(j); } } double sum = 0; for (int i = 0; i < finalProb.length; i++) { sum += finalProb[i]; } for (int i = 0; i < finalProb.length; i++) { finalProb[i] = finalProb[i] / sum; } pred = Utils.maxIndex(finalProb); if (finalProb[(int) pred] <= 0) { pred = Instance.missingValue(); } updateStatsForClassifier(finalProb, instance); m_Predictions.addElement(new NominalPrediction(instance.classValue(), finalProb, instance.weight())); } else { pred = classifier.get(0).classifyInstance(classMissing); updateStatsForPredictor(pred, instance); } return pred; }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Evaluates the supplied distribution on a single instance. * /*w w w. j a va2 s . co m*/ * @param dist the supplied distribution * @param instance the test instance to be classified * @return the prediction * @throws Exception if model could not be evaluated successfully */ public double evaluateModelOnceAndRecordPrediction(double[] dist, Instance instance) throws Exception { double pred; if (m_ClassIsNominal) { if (m_Predictions == null) { m_Predictions = new FastVector(); } pred = Utils.maxIndex(dist); if (dist[(int) pred] <= 0) { pred = Instance.missingValue(); } updateStatsForClassifier(dist, instance); m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight())); } else { pred = dist[0]; updateStatsForPredictor(pred, instance); } return pred; }