Example usage for weka.core Instances enumerateInstances

List of usage examples for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances() 

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/* w  ww.  ja v  a  2s.co  m*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Sets the weights for the next iteration.
 *//*  w  w w  . ja v  a2  s  .c  o m*/
protected double setWeights(Instances trainData, Classifier cls, double sourceFraction, int numSourceInstances,
        boolean isFinal) throws Exception {

    Enumeration enu = trainData.enumerateInstances();
    int instNum = 0;
    double[] errors = new double[trainData.numInstances()];
    double max = 0;
    int i = 0;
    while (enu.hasMoreElements()) {
        Instance instance = (Instance) enu.nextElement();
        errors[i] = Math.abs(cls.classifyInstance(instance) - instance.classValue());
        if (i >= numSourceInstances && errors[i] > max)
            max = errors[i];
        i++;
    }

    if (max == 0)
        return -1;

    //get avg loss
    double loss = 0;
    double initialTWeightSum = 0;
    double allWeightSum = 0;
    for (int j = 0; j < errors.length; j++) {
        errors[j] /= max;
        Instance instance = trainData.instance(j);
        loss += instance.weight() * errors[j];
        if (j >= numSourceInstances) {
            //loss += instance.weight() * errors[j];
            initialTWeightSum += instance.weight();
        }
        allWeightSum += instance.weight();
    }
    //loss /= weightSum;
    loss /= allWeightSum;

    targetWeight = initialTWeightSum / allWeightSum;
    /*
    if (!isFinal){
    System.out.println("Target weight: " + targetWeight);
    System.out.println("max: " + max);
    System.out.println("avg error: " + loss * max);
    System.out.println("Loss: " + loss);
    }
    */

    double beta;

    if (fixedBeta)
        beta = 0.4 / 0.6;
    else {
        if (isFinal && loss > 0.499)//bad, so quit
            //return -1;
            loss = 0.499; //since we're doing CV, no reason to quit

        beta = loss / (1 - loss); //or just use beta = .4/.6, since beta isn't as meaningful in AdaBoost.R2;
    }

    double tWeightSum = 0;
    if (!isFinal) {
        //need to find b so that weight of source be sourceFraction*num source
        //do binary search
        double goal = sourceFraction * errors.length;
        double bMin = .001;
        double bMax = .999;
        double b;
        double sourceSum = 0;
        while (bMax - bMin > .001) {
            b = (bMax + bMin) / 2;
            double sum = 0;
            for (int j = 0; j < numSourceInstances; j++) {
                Instance instance = trainData.instance(j);
                sum += Math.pow(b, errors[j]) * instance.weight();
            }
            if (sum > goal)
                bMax = b;
            else
                bMin = b;
        }
        b = (bMax + bMin) / 2;
        //System.out.println(b);         
        for (int j = 0; j < numSourceInstances; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * Math.pow(bMin, errors[j]));
            sourceSum += instance.weight();
        }

        //now adjust target weights
        goal = errors.length - sourceSum;
        double m = goal / initialTWeightSum;

        for (int j = numSourceInstances; j < errors.length; j++) {
            Instance instance = trainData.instance(j);
            instance.setWeight(instance.weight() * m);
        }
    } else {//final
        if (!doUpsource) { //modify only target weights
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = initialTWeightSum / tWeightSum;
            for (int j = numSourceInstances; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        } else { //modify all weights
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * Math.pow(beta, -errors[j]));
                tWeightSum += instance.weight();
            }

            double weightSumInverse = errors.length / tWeightSum;
            for (int j = 0; j < errors.length; j++) {
                Instance instance = trainData.instance(j);
                instance.setWeight(instance.weight() * weightSumInverse);
            }
        }

    }

    return beta;
}

From source file:GrowTree.java

public boolean homogeneous(Instances D) {
    distribution = new double[D.numClasses()];
    Enumeration eninst = D.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        distribution[(int) ele.classValue()]++;
    }//from  w ww . ja v  a2  s . com

    int cnt = 0;
    for (int i = 0; i < D.numClasses(); i++) {
        if (distribution[i] > 0)
            cnt++;
    }
    if (cnt <= 1) // if all instances are of single class
        return true;
    else
        return false;
}

From source file:GrowTree.java

double label(Instances D) {
    Enumeration eninst = D.enumerateInstances();

    Instance ele = (Instance) eninst.nextElement();
    return ele.classValue();
}

From source file:GrowTree.java

Attribute bestSplit(Instances D) {
    double imin = 1.0;
    Attribute fbest = null;//from   w ww .j  a v a  2  s.  co  m
    Enumeration enat = D.enumerateAttributes();
    while (enat.hasMoreElements()) {
        Attribute a = (Attribute) enat.nextElement();
        //split D into subsets d1 to dn based on values vi based on features
        Instances[] split = new Instances[a.numValues()];
        for (int i = 0; i < a.numValues(); i++) {
            split[i] = new Instances(D, D.numInstances());
        }
        Enumeration x = D.enumerateInstances();
        while (x.hasMoreElements()) {
            Instance in = (Instance) x.nextElement();
            split[(int) in.value(a)].add(in);
        }
        for (int i = 0; i < split.length; i++) {
            split[i].compactify();
        }
        for (int i = 0; i < a.numValues(); i++) {
            if (imp(split[i]) < imin) {
                imin = imp(split[i]);
                fbest = a; //evaluate the best feature to make root
            }
        }
    }
    return fbest;

}

From source file:GrowTree.java

public double imp(Instances data) {
    double localdistribution[] = new double[data.numClasses()];
    Enumeration eninst = data.enumerateInstances();
    while (eninst.hasMoreElements()) {
        Instance ele = (Instance) eninst.nextElement();
        localdistribution[(int) ele.classValue()]++;
    }//from w w w .j a v  a2 s.c  o  m

    return imp;
}

From source file:ID3Chi.java

License:Open Source License

private void MakeALeaf(Instances data) {

    data.deleteWithMissing(m_Attribute);

    if (data.numInstances() == 0) {
        SetNullDistribution(data);//ww w  . j a va 2  s. co m
        return;
    }

    m_Distribution = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
    }
    Utils.normalize(m_Distribution);
    m_ClassValue = Utils.maxIndex(m_Distribution);
    m_ClassAttribute = data.classAttribute();

    // set m_Attribute to null to mark this node as a leaf
    m_Attribute = null;
}

From source file:ID3Chi.java

License:Open Source License

private double[] GetClassCounts(Instances data) {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }//from   w  ww .j a va2s .  co  m
    return classCounts;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data//from   w  ww.j a  v  a2 s . c  o m
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {

    // [att.numValues()] is location for "unknown" values
    Instances[] subset = new Instances[att.numValues() + 1];
    for (int j = 0; j <= att.numValues(); j++) {
        subset[j] = new Instances(data, data.numInstances());
    }

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            subset[att.numValues()].add(inst);
        } else {
            subset[(int) inst.value(att)].add(inst);
        }
    }
    for (int i = 0; i < subset.length; i++) {
        subset[i].compactify();
    }
    return subset;
}

From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all attributes that are
 * numeric.//ww w. j a v  a2  s .co  m
 *
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();

    int numAttributes = data.numAttributes();
    m_DFs = new int[numAttributes];
    Enumeration e = data.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance instance = (Instance) e.nextElement();
        int numValues = instance.numValues();
        for (int valueIndex = 0; valueIndex < numValues; valueIndex++) {
            int attIndex = instance.index(valueIndex);
            if (attIndex != classIndex) {
                double value = instance.valueSparse(valueIndex);
                //missingvalues werden also 0 betrachtet.
                if (m_missingAsZero) {
                    if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex)
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                } else {
                    if (value != 0.0) {
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                }
            }
        }
    }
}