List of usage examples for weka.core Instance classIsMissing
public boolean classIsMissing();
From source file:moa.classifiers.functions.SGDMultiClass.java
License:Open Source License
public void trainOnInstanceImpl(Instance instance, int classLabel) { if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights[classLabel], instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() != classLabel) ? -1 : 1; z = y * (wx + m_bias[classLabel]); } else {/*from w w w.j a va2s . c o m*/ y = instance.classValue(); z = y - (wx + m_bias[classLabel]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights[classLabel].numValues(); i++) { m_weights[classLabel].setValue(i, m_weights[classLabel].getValue(i) * multiplier); } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[classLabel].addToValue(indS, factor * instance.valueSparse(p1)); } } // update the bias m_bias[classLabel] += factor; } } }
From source file:moa.classifiers.functions.SGDOld.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model *//* w ww . java 2 s . c o m*/ @Override public void trainOnInstanceImpl(Instance instance) { if (m_weights == null) { m_weights = new double[instance.numAttributes() + 1]; } if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights, instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? -1 : 1; z = y * (wx + m_weights[m_weights.length - 1]); } else { y = instance.classValue(); z = y - (wx + m_weights[m_weights.length - 1]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights.length - 1; i++) { m_weights[i] *= multiplier; } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[indS] += factor * instance.valueSparse(p1); } } // update the bias m_weights[m_weights.length - 1] += factor; } m_t++; } }
From source file:moa.classifiers.functions.SPegasos.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model *//*from www.j a v a 2 s.c o m*/ @Override public void trainOnInstanceImpl(Instance instance) { if (m_weights == null) { m_weights = new double[instance.numAttributes() + 1]; } if (!instance.classIsMissing()) { double learningRate = 1.0 / (m_lambda * m_t); //double scale = 1.0 - learningRate * m_lambda; double scale = 1.0 - 1.0 / m_t; double y = (instance.classValue() == 0) ? -1 : 1; double wx = dotProd(instance, m_weights, instance.classIndex()); double z = y * (wx + m_weights[m_weights.length - 1]); for (int j = 0; j < m_weights.length - 1; j++) { if (j != instance.classIndex()) { m_weights[j] *= scale; } } if (m_loss == LOGLOSS || (z < 1)) { double loss = dloss(z); int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { double m = learningRate * loss * (instance.valueSparse(p1) * y); m_weights[indS] += m; } } // update the bias m_weights[m_weights.length - 1] += learningRate * loss * y; } double norm = 0; for (int k = 0; k < m_weights.length - 1; k++) { if (k != instance.classIndex()) { norm += (m_weights[k] * m_weights[k]); } } double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm))); if (scale2 < 1.0) { scale2 = Math.sqrt(scale2); for (int j = 0; j < m_weights.length - 1; j++) { if (j != instance.classIndex()) { m_weights[j] *= scale2; } } } m_t++; } }
From source file:moa.tasks.EvaluateNonStationaryDynamicStream.java
License:Open Source License
/** * * * @return instances used for training// ww w . ja va 2 s .c o m */ private int train() { this.monitor.setCurrentActivityDescription((this.inWarmupPhase) ? "Warmup Training" : "Online Training"); int ret = 0; while (!this.latentTrainingInstQueue.isEmpty() && this.latentTrainingInstQueue.peek().deadline <= this.instancesProcessed) { Instance x = this.latentTrainingInstQueue.pop().inst; if (x.weight() > 0.0 || this.sendZeroWeightsOption.isSet()) { if (!x.classIsMissing()) { learner.trainOnInstance(x); this.knownLabels[(int) x.classValue()] += x.weight(); ret++; } } } assert this.latentTrainingInstQueue.size() < (this.trainingTimeDelayOption.getValue() + 1) : "Cache 'latentTrainingInstQueue' is larger than designed."; return ret; }
From source file:moa.tasks.EvaluatePrequential.java
License:Open Source License
@Override protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) { Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption); InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption); ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption( this.evaluatorOption); LearningCurve learningCurve = new LearningCurve("learning evaluation instances"); //New for prequential methods if (evaluator instanceof WindowClassificationPerformanceEvaluator) { //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue()); if (widthOption.getValue() != 1000) { System.out.println(/*from w ww . ja v a 2 s .c o m*/ "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w " + widthOption.getValue() + ")"); return learningCurve; } } if (evaluator instanceof EWMAClassificationPerformanceEvaluator) { //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue()); if (alphaOption.getValue() != .01) { System.out.println( "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")"); return learningCurve; } } if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) { //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue()); if (alphaOption.getValue() != .01) { System.out.println( "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a " + alphaOption.getValue() + ")"); return learningCurve; } } //End New for prequential methods learner.setModelContext(stream.getHeader()); int maxInstances = this.instanceLimitOption.getValue(); long instancesProcessed = 0; int maxSeconds = this.timeLimitOption.getValue(); int secondsElapsed = 0; monitor.setCurrentActivity("Evaluating learner...", -1.0); File dumpFile = this.dumpFileOption.getFile(); PrintStream immediateResultStream = null; if (dumpFile != null) { try { if (dumpFile.exists()) { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true); } else { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex); } } //File for output predictions File outputPredictionFile = this.outputPredictionFileOption.getFile(); PrintStream outputPredictionResultStream = null; if (outputPredictionFile != null) { try { if (outputPredictionFile.exists()) { outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true), true); } else { outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex); } } boolean firstDump = true; boolean preciseCPUTiming = TimingUtils.enablePreciseTiming(); long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); long lastEvaluateStartTime = evaluateStartTime; double RAMHours = 0.0; while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances)) && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) { Instance trainInst = stream.nextInstance(); Instance testInst = (Instance) trainInst.copy(); if (testInst.classIsMissing() == false) { // Added for semisupervised setting: test only if we have the label double[] prediction = learner.getVotesForInstance(testInst); // Output prediction if (outputPredictionFile != null) { outputPredictionResultStream.println(Utils.maxIndex(prediction) + "," + testInst.classValue()); } evaluator.addResult(testInst, prediction); } learner.trainOnInstance(trainInst); instancesProcessed++; if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0 || stream.hasMoreInstances() == false) { long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime); double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime); double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours RAMHours += RAMHoursIncrement; lastEvaluateStartTime = evaluateTime; learningCurve.insertEntry(new LearningEvaluation( new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed), new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)", time), new Measurement("model cost (RAM-Hours)", RAMHours) }, evaluator, learner)); if (immediateResultStream != null) { if (firstDump) { immediateResultStream.println(learningCurve.headerToString()); firstDump = false; } immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1)); immediateResultStream.flush(); } } if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } long estimatedRemainingInstances = stream.estimatedRemainingInstances(); if (maxInstances > 0) { long maxRemaining = maxInstances - instancesProcessed; if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) { estimatedRemainingInstances = maxRemaining; } } monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0 : (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances)); if (monitor.resultPreviewRequested()) { monitor.setLatestResultPreview(learningCurve.copy()); } secondsElapsed = (int) TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime); } } if (immediateResultStream != null) { immediateResultStream.close(); } if (outputPredictionResultStream != null) { outputPredictionResultStream.close(); } return learningCurve; }
From source file:net.sf.jclal.sampling.supervised.Resample.java
License:Open Source License
/** * * @param dataSet The dataset to extract the instances. */// w ww . jav a2s . co m @Override public void sampling(IDataset dataSet) { if (!(dataSet instanceof WekaDataset)) { throw new RuntimeException("This sample method only can be used with a single label weka dataset"); } WekaDataset wekaDataSet = (WekaDataset) dataSet; int origSize = wekaDataSet.getNumInstances(); int sampleSize = (int) (origSize * getPercentageInstancesToLabelled() / 100); // Subsample that takes class distribution into consideration // Sort according to class attribute. wekaDataSet.getDataset().sort(wekaDataSet.getClassIndex()); // Create an index of where each class value starts int[] classIndices = new int[wekaDataSet.getNumClasses() + 1]; int currentClass = 0; classIndices[currentClass] = 0; for (int i = 0; i < dataSet.getNumInstances(); i++) { Instance current = dataSet.instance(i); if (current.classIsMissing()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = i; } break; } else if (current.classValue() != currentClass) { for (int j = currentClass + 1; j <= current.classValue(); j++) { classIndices[j] = i; } currentClass = (int) current.classValue(); } } if (currentClass <= wekaDataSet.getNumClasses()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = dataSet.getNumInstances(); } } int actualClasses = 0; for (int i = 0; i < classIndices.length - 1; i++) { if (classIndices[i] != classIndices[i + 1]) { actualClasses++; } } // Convert pending input instances if (isNoReplacement()) { createSubsampleWithoutReplacement(wekaDataSet, sampleSize, actualClasses, classIndices); } else { createSubsampleWithReplacement(wekaDataSet, sampleSize, actualClasses, classIndices); } }
From source file:org.esa.nest.gpf.SGD.java
/** * Updates the classifier with the given instance. * * @param instance the new training instance to include in the model * @exception Exception if the instance could not be incorporated in the * model.// www .j a v a 2 s . co m */ @Override public void updateClassifier(Instance instance) throws Exception { if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights, instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? -1 : 1; z = y * (wx + m_weights[m_weights.length - 1]); } else { y = instance.classValue(); z = y - (wx + m_weights[m_weights.length - 1]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights.length - 1; i++) { m_weights[i] *= multiplier; } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[indS] += factor * instance.valueSparse(p1); } } // update the bias m_weights[m_weights.length - 1] += factor; } m_t++; } }
From source file:resample.OverSubsample.java
License:Open Source License
/** * Creates a subsample of the current set of input instances. The output * instances are pushed onto the output queue for collection. *///from w w w . j av a 2 s.c o m private void createSubsample() { int classI = getInputFormat().classIndex(); // Sort according to class attribute. getInputFormat().sort(classI); // Determine where each class starts in the sorted dataset int[] classIndices = getClassIndices(); // Get the existing class distribution int[] counts = new int[getInputFormat().numClasses()]; double[] weights = new double[getInputFormat().numClasses()]; int max = -1; for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); if (current.classIsMissing() == false) { counts[(int) current.classValue()]++; weights[(int) current.classValue()] += current.weight(); } } // Convert from total weight to average weight for (int i = 0; i < counts.length; i++) { if (counts[i] > 0) { weights[i] = weights[i] / counts[i]; } /* System.err.println("Class:" + i + " " + getInputFormat().classAttribute().value(i) + " Count:" + counts[i] + " Total:" + weights[i] * counts[i] + " Avg:" + weights[i]); */ } // find the class with the minimum number of instances int maxIndex = -1; for (int i = 0; i < counts.length; i++) { if ((max < 0) && (counts[i] > 0)) { max = counts[i]; maxIndex = i; } else if ((counts[i] > max) && (counts[i] > 0)) { max = counts[i]; maxIndex = i; } } if (max < 0) { System.err.println("SpreadSubsample: *warning* none of the classes have any values in them."); return; } // determine the new distribution int[] new_counts = new int[getInputFormat().numClasses()]; for (int i = 0; i < counts.length; i++) { new_counts[i] = (int) Math.abs(Math.max(counts[i], max * m_DistributionSpread)); if (i == maxIndex) { if (m_DistributionSpread > 0 && m_DistributionSpread < 1.0) { // don't undersample the majority class! new_counts[i] = counts[i]; } } if (m_DistributionSpread == 0) { new_counts[i] = counts[i]; } if (m_MaxCount > 0) { new_counts[i] = Math.min(new_counts[i], m_MaxCount); } } // Sample with replacement Random random = new Random(m_RandomSeed); //Hashtable t = new Hashtable(); for (int j = 0; j < new_counts.length; j++) { double newWeight = 1.0; if (m_AdjustWeights && (new_counts[j] > 0)) { newWeight = weights[j] * counts[j] / new_counts[j]; /* System.err.println("Class:" + j + " " + getInputFormat().classAttribute().value(j) + " Count:" + counts[j] + " Total:" + weights[j] * counts[j] + " Avg:" + weights[j] + " NewCount:" + new_counts[j] + " NewAvg:" + newWeight); */ } int index = -1; for (int k = 0; k < new_counts[j]; k++) { //boolean ok = false; //do { index = classIndices[j] + (Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j])); // Have we used this instance before? //if (t.get("" + index) == null) { // if not, add it to the hashtable and use it //t.put("" + index, ""); //ok = true; if (index >= 0) { Instance newInst = (Instance) getInputFormat().instance(index).copy(); if (m_AdjustWeights) { newInst.setWeight(newWeight); } push(newInst); } //} //} while (!ok); } } }
From source file:resample.OverSubsample.java
License:Open Source License
/** * Creates an index containing the position where each class starts in * the getInputFormat(). m_InputFormat must be sorted on the class attribute. * /* w w w . j ava2s. c o m*/ * @return the positions */ private int[] getClassIndices() { // Create an index of where each class value starts int[] classIndices = new int[getInputFormat().numClasses() + 1]; int currentClass = 0; classIndices[currentClass] = 0; for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); if (current.classIsMissing()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = i; } break; } else if (current.classValue() != currentClass) { for (int j = currentClass + 1; j <= current.classValue(); j++) { classIndices[j] = i; } currentClass = (int) current.classValue(); } } if (currentClass <= getInputFormat().numClasses()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = getInputFormat().numInstances(); } } return classIndices; }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Adds the supplied instance to the training set. * * @param instance the instance to add//from w w w . j a v a2 s. c om * @throws Exception if instance could not be incorporated * successfully */ public void updateClassifier(Instance instance) throws Exception { if (m_Train.equalHeaders(instance.dataset()) == false) { throw new Exception("Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset())); } if (instance.classIsMissing()) { return; } m_Train.add(instance); m_NNSearch.update(instance); m_kNNValid = false; if ((m_WindowSize > 0) && (m_Train.numInstances() > m_WindowSize)) { boolean deletedInstance = false; while (m_Train.numInstances() > m_WindowSize) { m_Train.delete(0); deletedInstance = true; } //rebuild datastructure KDTree currently can't delete if (deletedInstance == true) m_NNSearch.setInstances(m_Train); } }