Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
 * Samoa instance from weka instance./* ww  w  . j a v a 2  s  .c  o  m*/
 *
 * @param inst the inst
 * @return the instance
 */
public Instance samoaInstance(weka.core.Instance inst) {
    Instance samoaInstance;
    if (inst instanceof weka.core.SparseInstance) {
        double[] attributeValues = new double[inst.numValues()];
        int[] indexValues = new int[inst.numValues()];
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != inst.classIndex()) {
                attributeValues[i] = inst.valueSparse(i);
                indexValues[i] = inst.index(i);
            }
        }
        samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes());
    } else {
        samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray());
        //samoaInstance.deleteAttributeAt(inst.classIndex());
    }
    if (this.samoaInstanceInformation == null) {
        this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset());
    }
    samoaInstance.setDataset(samoaInstanceInformation);
    samoaInstance.setClassValue(inst.classValue());
    return samoaInstance;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Evaluates the classifier on a single instance and records the
 * prediction (if the class is nominal).
 *
 * @param classifier machine learning classifier
 * @param instance the test instance to be classified
 * @return the prediction made by the clasifier
 * @throws Exception if model could not be evaluated 
 * successfully or the data contains string attributes
 *///from   w w  w  .ja va 2s  .  co m
public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception {

    Instance classMissing = (Instance) instance.copy();
    double pred = 0;
    classMissing.setDataset(instance.dataset());
    classMissing.setClassMissing();
    if (m_ClassIsNominal) {
        if (m_Predictions == null) {
            m_Predictions = new FastVector();
        }
        double[] dist = classifier.distributionForInstance(classMissing);
        pred = Utils.maxIndex(dist);
        if (dist[(int) pred] <= 0) {
            pred = Instance.missingValue();
        }
        updateStatsForClassifier(dist, instance);
        m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight()));
    } else {
        pred = classifier.classifyInstance(classMissing);
        updateStatsForPredictor(pred, instance);
    }
    return pred;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Updates all the statistics about a classifiers performance for 
 * the current test instance.// w  w  w  .j a v  a  2 s. co m
 *
 * @param predictedDistribution the probabilities assigned to 
 * each class
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not
 * set
 */
protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception {

    int actualClass = (int) instance.classValue();

    if (!instance.classIsMissing()) {
        updateMargins(predictedDistribution, actualClass, instance.weight());

        // Determine the predicted class (doesn't detect multiple 
        // classifications)
        int predictedClass = -1;
        double bestProb = 0.0;
        for (int i = 0; i < m_NumClasses; i++) {
            if (predictedDistribution[i] > bestProb) {
                predictedClass = i;
                bestProb = predictedDistribution[i];
            }
        }

        m_WithClass += instance.weight();

        // Determine misclassification cost
        if (m_CostMatrix != null) {
            if (predictedClass < 0) {
                // For missing predictions, we assume the worst possible cost.
                // This is pretty harsh.
                // Perhaps we could take the negative of the cost of a correct
                // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
                // although often this will be zero
                m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
            } else {
                m_TotalCost += instance.weight()
                        * m_CostMatrix.getElement(actualClass, predictedClass, instance);
            }
        }

        // Update counts when no class was predicted
        if (predictedClass < 0) {
            m_Unclassified += instance.weight();
            return;
        }

        double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]);
        double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum);
        if (predictedProb >= priorProb) {
            m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight();
        } else {
            m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight();
        }

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

        updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight());

        // Update other stats
        m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
        if (predictedClass != actualClass) {
            m_Incorrect += instance.weight();
        } else {
            m_Correct += instance.weight();
        }
    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Updates all the statistics about a predictors performance for 
 * the current test instance.//from www.ja va  2s  .c  o  m
 *
 * @param predictedValue the numeric value the classifier predicts
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not
 * set
 */
protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        // Update stats
        m_WithClass += instance.weight();
        if (Instance.isMissingValue(predictedValue)) {
            m_Unclassified += instance.weight();
            return;
        }
        m_SumClass += instance.weight() * instance.classValue();
        m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue();
        m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue;
        m_SumPredicted += instance.weight() * predictedValue;
        m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

        if (m_ErrorEstimator == null) {
            setNumericPriorsFromBuffer();
        }
        double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()),
                MIN_SF_PROB);
        double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB);

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
        m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight());

        updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()),
                instance.weight());

    } else
        m_MissingClass += instance.weight();
}

From source file:data.Bag.java

License:Open Source License

/**
 * Constructor./*  w w  w .j ava2 s  .  c  o m*/
 * 
 * @param instance
 *            A Weka's Instance to be transformed into a Bag.
 * @throws Exception
 *             To be handled in an upper level.
 * 
 */
public Bag(Instance instance) throws Exception {
    super(instance);
    m_AttValues = instance.toDoubleArray();
    m_Weight = instance.weight();
    m_Dataset = instance.dataset();
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Convert a pc transformed instance back to the original space
 * //from w w  w. j a v a 2 s  . c om
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst) throws Exception {
    double[] newVals = null;

    if (m_hasClass) {
        newVals = new double[m_numAttribs + 1];
    } else {
        newVals = new double[m_numAttribs];
    }

    if (m_hasClass) {
        // class is always appended as the last attribute
        newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
    }

    for (int i = 0; i < m_eTranspose[0].length; i++) {
        double tempval = 0.0;
        for (int j = 1; j < m_eTranspose.length; j++) {
            tempval += (m_eTranspose[j][i] * inst.value(j - 1));
        }
        newVals[i] = tempval;
        if (!m_center) {
            newVals[i] *= m_stdDevs[i];
        }
        newVals[i] += m_means[i];
    }

    if (inst instanceof SparseInstance) {
        return new SparseInstance(inst.weight(), newVals);
    } else {
        return new Instance(inst.weight(), newVals);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Transform an instance in original (unormalized) format. Convert back
 * to the original space if requested.//  w  w w. j ava  2  s  .c  o m
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {

    if (m_eigenvalues == null) {
        throw new Exception("convertInstance: Principal components not " + "built yet");
    }

    double[] newVals = new double[m_outputNumAtts];
    Instance tempInst = (Instance) instance.copy();
    if (!instance.dataset().equalHeaders(m_trainHeader)) {
        throw new Exception("Can't convert instance: header's don't match: " + "PrincipalComponents\n"
                + "Can't convert instance: header's don't match.");
    }

    m_replaceMissingFilter.input(tempInst);
    m_replaceMissingFilter.batchFinished();
    tempInst = m_replaceMissingFilter.output();

    /*if (m_normalize) {
      m_normalizeFilter.input(tempInst);
      m_normalizeFilter.batchFinished();
      tempInst = m_normalizeFilter.output();
    }*/

    m_nominalToBinFilter.input(tempInst);
    m_nominalToBinFilter.batchFinished();
    tempInst = m_nominalToBinFilter.output();

    if (m_attributeFilter != null) {
        m_attributeFilter.input(tempInst);
        m_attributeFilter.batchFinished();
        tempInst = m_attributeFilter.output();
    }

    if (!m_center) {
        m_standardizeFilter.input(tempInst);
        m_standardizeFilter.batchFinished();
        tempInst = m_standardizeFilter.output();
    } else {
        m_centerFilter.input(tempInst);
        m_centerFilter.batchFinished();
        tempInst = m_centerFilter.output();
    }

    if (m_hasClass) {
        newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
    }

    double cumulative = 0;
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        double tempval = 0.0;
        for (int j = 0; j < m_numAttribs; j++) {
            tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j));
        }
        newVals[m_numAttribs - i - 1] = tempval;
        cumulative += m_eigenvalues[m_sortedEigens[i]];
        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (!m_transBackToOriginal) {
        if (instance instanceof SparseInstance) {
            return new SparseInstance(instance.weight(), newVals);
        } else {
            return new Instance(instance.weight(), newVals);
        }
    } else {
        if (instance instanceof SparseInstance) {
            return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals));
        } else {
            return convertInstanceToOriginal(new Instance(instance.weight(), newVals));
        }
    }
}

From source file:distributedRedditAnalyser.OzaBoost.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    try {/*from w ww . jav  a 2s .  com*/
        lock.acquire();
        //Get a new classifier
        Classifier newClassifier = ((Classifier) getPreparedClassOption(this.baseLearnerOption)).copy();
        ensemble.add(new ClassifierInstance(newClassifier));

        //If we have too many classifiers
        while (ensemble.size() > ensembleSizeOption.getValue())
            ensemble.pollFirst();

        double lambda_d = 1.0;
        for (ClassifierInstance c : ensemble) {
            double k = this.pureBoostOption.isSet() ? lambda_d
                    : MiscUtils.poisson(lambda_d, this.classifierRandom);
            if (k > 0.0) {
                Instance weightedInst = (Instance) inst.copy();
                weightedInst.setWeight(inst.weight() * k);
                c.getClassifier().trainOnInstance(weightedInst);
            }
            if (c.getClassifier().correctlyClassifies(inst)) {
                c.setScms(c.getScms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getScms());
            } else {
                c.setSwms(c.getSwms() + lambda_d);
                lambda_d *= this.trainingWeightSeenByModel / (2 * c.getSwms());
            }
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        lock.release();
    }
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//w w  w  .  j ava2  s.c om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java

License:Open Source License

public static Instances transformInstances(final Instances src, final CoordinateTransform transform) {
    final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>();
    for (int i = 0; i < transform.outDimension(); ++i) {
        out_attributes.add(new Attribute("x" + i));
    }//from  w  w w .j ava2 s . co m
    out_attributes.add((Attribute) src.classAttribute().copy());
    final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0);
    for (int i = 0; i < src.size(); ++i) {
        final Instance inst = src.get(i);
        final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst));
        final RealVector transformed_vector = transform.encode(flat).x;
        final double[] transformed = new double[transformed_vector.getDimension() + 1];
        for (int j = 0; j < transformed_vector.getDimension(); ++j) {
            transformed[j] = transformed_vector.getEntry(j);
        }
        transformed[transformed.length - 1] = inst.classValue();
        final Instance transformed_instance = new DenseInstance(inst.weight(), transformed);
        out.add(transformed_instance);
        transformed_instance.setDataset(out);
    }
    out.setClassIndex(out.numAttributes() - 1);
    return out;
}