Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java

License:Open Source License

/**
 * builds the classifier/*w  w  w . j a  va2  s  .  com*/
 * 
 * @param data the training instances
 * @throws Exception if something goes wrong
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // save original header (needed for clusters to classes output)
    m_OriginalHeader = data.stringFreeStructure();

    // remove class attribute for clusterer
    Instances clusterData = new Instances(data);
    clusterData.setClassIndex(-1);
    clusterData.deleteAttributeAt(data.classIndex());
    m_ClusteringHeader = clusterData.stringFreeStructure();

    if (m_ClusteringHeader.numAttributes() == 0) {
        System.err.println("Data contains only class attribute, defaulting to ZeroR model.");
        m_ZeroR = new ZeroR();
        m_ZeroR.buildClassifier(data);
    } else {
        m_ZeroR = null;

        // build clusterer
        m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer);
        m_ActualClusterer.buildClusterer(clusterData);

        if (!getLabelAllClusters()) {

            // determine classes-to-clusters mapping
            ClusterEvaluation eval = new ClusterEvaluation();
            eval.setClusterer(m_ActualClusterer);
            eval.evaluateClusterer(clusterData);
            double[] clusterAssignments = eval.getClusterAssignments();
            int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()];
            int[] clusterTotals = new int[eval.getNumClusters()];
            double[] best = new double[eval.getNumClusters() + 1];
            double[] current = new double[eval.getNumClusters() + 1];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance instance = data.instance(i);
                if (!instance.classIsMissing()) {
                    counts[(int) clusterAssignments[i]][(int) instance.classValue()]++;
                    clusterTotals[(int) clusterAssignments[i]]++;
                }
            }
            best[eval.getNumClusters()] = Double.MAX_VALUE;
            ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0);
            m_ClustersToClasses = new double[best.length];
            System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length);
        } else {
            m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance clusterInstance = clusterData.instance(i);
                Instance originalInstance = data.instance(i);
                if (!originalInstance.classIsMissing()) {
                    double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance);
                    for (int j = 0; j < probs.length; j++) {
                        m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j];
                    }
                }
            }
            for (int i = 0; i < m_ClusterClassProbs.length; i++) {
                Utils.normalize(m_ClusterClassProbs[i]);
            }
        }
    }
}

From source file:com.tum.classifiertest.DataCache.java

License:Open Source License

/**
 * Creates a DataCache by copying data from a weka.core.Instances object.
 *//*from w  w w .j  a v a  2s . c  o  m*/
public DataCache(Instances origData) throws Exception {

    classIndex = origData.classIndex();
    numAttributes = origData.numAttributes();
    numClasses = origData.numClasses();
    numInstances = origData.numInstances();

    attNumVals = new int[origData.numAttributes()];
    for (int i = 0; i < attNumVals.length; i++) {
        if (origData.attribute(i).isNumeric()) {
            attNumVals[i] = 0;
        } else if (origData.attribute(i).isNominal()) {
            attNumVals[i] = origData.attribute(i).numValues();
        } else
            throw new Exception("Only numeric and nominal attributes are supported.");
    }

    /* Array is indexed by attribute first, to speed access in RF splitting. */
    vals = new float[numAttributes][numInstances];
    for (int a = 0; a < numAttributes; a++) {
        for (int i = 0; i < numInstances; i++) {
            if (origData.instance(i).isMissing(a))
                vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end
            else
                vals[a][i] = (float) origData.instance(i).value(a); // deep copy
        }
    }

    instWeights = new double[numInstances];
    instClassValues = new int[numInstances];
    for (int i = 0; i < numInstances; i++) {
        instWeights[i] = origData.instance(i).weight();
        instClassValues[i] = (int) origData.instance(i).classValue();
    }

    /* compute the sortedInstances for the whole dataset */

    sortedIndices = new int[numAttributes][];

    for (int a = 0; a < numAttributes; a++) { // ================= attr by attr

        if (a == classIndex)
            continue;

        if (attNumVals[a] > 0) { // ------------------------------------- nominal

            // Handling nominal attributes: as of FastRF 0.99, they're sorted as well
            // missing values are coded as Float.MAX_VALUE and go to the end

            sortedIndices[a] = new int[numInstances];
            //int count = 0;

            sortedIndices[a] = FastRfUtils.sort(vals[a]);

            /*for (int i = 0; i < numInstances; i++) {
              if ( !this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }
                    
            for (int i = 0; i < numInstances; i++) {
              if ( this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }*/

        } else { // ----------------------------------------------------- numeric

            // Sorted indices are computed for numeric attributes
            // missing values are coded as Float.MAX_VALUE and go to the end
            sortedIndices[a] = FastRfUtils.sort(vals[a]);

        } // ---------------------------------------------------------- attr kind

    } // ========================================================= attr by attr

    // System.out.println(" Done.");

}

From source file:com.tum.classifiertest.FastRfBagging.java

License:Open Source License

/**
 * Compute the out-of-bag error for a set of instances.
 *
 * @param data       the instances//ww w.j  av a  2s. co m
 * @param inBag      numTrees x numInstances indicating out-of-bag instances
 * @param threadPool the pool of threads
 *
 * @return the oob error
 */
private double computeOOBError(Instances data, boolean[][] inBag, ExecutorService threadPool)
        throws InterruptedException, ExecutionException {

    boolean numeric = data.classAttribute().isNumeric();

    List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances());
    for (int i = 0; i < data.numInstances(); i++) {
        VotesCollector aCollector = new VotesCollector(m_Classifiers, i, data, inBag);
        votes.add(threadPool.submit(aCollector));
    }

    double outOfBagCount = 0.0;
    double errorSum = 0.0;

    for (int i = 0; i < data.numInstances(); i++) {

        double vote = votes.get(i).get();

        // error for instance
        outOfBagCount += data.instance(i).weight();
        if (numeric) {
            errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
        } else {
            if (vote != data.instance(i).classValue())
                errorSum += data.instance(i).weight();
        }

    }

    return errorSum / outOfBagCount;
}

From source file:com.tum.classifiertest.FastRfUtils.java

License:Open Source License

/**
 * Produces a random permutation of the values of an attribute in a dataset using Knuth shuffle.
 * <p/>/*from   w  w w. jav  a  2 s  . c om*/
 * Copies back the current values of the previously scrambled attribute and uses the given permutation
 * to scramble the values of the new attribute all by copying from the original dataset.
 *
 * @param src      the source dataset
 * @param dst      the scrambled dataset
 * @param attIndex the attribute index
 * @param perm     the random permutation
 *
 * @return fluent
 */
public static Instances scramble(Instances src, Instances dst, final int attIndex, int[] perm) {

    for (int i = 0; i < src.numInstances(); i++) {

        Instance scrambled = dst.instance(i);

        if (attIndex > 0)
            scrambled.setValue(attIndex - 1, src.instance(i).value(attIndex - 1));
        scrambled.setValue(attIndex, src.instance(perm[i]).value(attIndex));
    }

    return dst;
}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
* Samoa instances from weka instances.//  w  ww.  j av  a 2s.c o m
*
* @param instances the instances
* @return the instances
*/
public Instances samoaInstances(weka.core.Instances instances) {
    Instances samoaInstances = samoaInstancesInformation(instances);
    //We assume that we have only one samoaInstanceInformation for WekaToSamoaInstanceConverter
    this.samoaInstanceInformation = samoaInstances;
    for (int i = 0; i < instances.numInstances(); i++) {
        samoaInstances.add(samoaInstance(instances.instance(i)));
    }
    return samoaInstances;
}

From source file:com.yimei.core.Discretizer.java

public static Instances discretize(Instances data) {
    Instances discData = new Instances(data);
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNumeric()) {
            double max = Double.MIN_VALUE;
            double min = Double.MAX_VALUE;

            for (int j = 0; j < data.size(); j++) {
                double value = data.instance(j).value(i);
                if (value > max) {
                    max = value;/*from   ww w. j a va 2 s  .co m*/
                }
                if (value < min) {
                    min = value;
                }
            }

            double interval = (max - min) / numOfIntervals;

            for (int j = 0; j < data.size(); j++) {
                long discValue = Math.round((data.instance(j).value(i) - min) / interval);
                discData.instance(j).setValue(i, discValue);
            }
        }
    }

    return discData;
}

From source file:com.zooclassifier.Model.FileLoader.java

public FileLoader(String filename) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new FileReader(filename));
    ArffLoader.ArffReader arff = new ArffLoader.ArffReader(reader);
    Instances data = arff.getData();
    data.setClassIndex(data.numAttributes() - 1);

    attributes = new String[data.numInstances()][data.numAttributes() - 1];
    labels = new String[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        Instance instance = data.instance(i);
        for (int j = 0; j < instance.numAttributes() - 1; j++) {
            attributes[i][j] = instance.stringValue(j);
        }/*from ww w .ja v  a 2  s. c  om*/
        labels[i] = instance.stringValue(instance.numAttributes() - 1);
    }

    attributesLegalValues = new String[data.numAttributes() - 1][];
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        attributesLegalValues[i] = (String[]) Collections.list(data.attribute(i).enumerateValues())
                .toArray(new String[data.attribute(i).numValues()]);
    }

    labelsLegalValues = (String[]) Collections.list(data.attribute(data.numAttributes() - 1).enumerateValues())
            .toArray(new String[data.attribute(data.numAttributes() - 1).numValues()]);
}

From source file:control.CosineDistance.java

License:Open Source License

/**
 * Returns the index of the closest point to the current instance.
 * Index is index in Instances object that is the second parameter.
 *
 * @param instance    the instance to assign a cluster to
 * @param allPoints    all points/*from  w w  w .  java 2 s.c  o  m*/
 * @param pointList    the list of points
 * @return       the index of the closest point
 * @throws Exception   if something goes wrong
 */
public int closestPoint(Instance instance, Instances allPoints, int[] pointList) throws Exception {
    double minDist = Integer.MAX_VALUE;
    int bestPoint = 0;
    for (int i = 0; i < pointList.length; i++) {
        double dist = distance(instance, allPoints.instance(pointList[i]), Double.POSITIVE_INFINITY);
        if (dist < minDist) {
            minDist = dist;
            bestPoint = i;
        }
    }
    return pointList[bestPoint];
}

From source file:core.classifier.MyFirstClassifier.java

License:Open Source License

/**
 * Method for building the classifier. Implements a one-against-one
 * wrapper for multi-class problems./*from  ww  w . ja  v  a  2  s . c o m*/
 *
 * @param insts the set of training instances
 * @throws Exception if the classifier can't be built successfully
 */
public void buildClassifier(Instances insts) throws Exception {

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(insts);

        // remove instances with missing class
        insts = new Instances(insts);
        insts.deleteWithMissingClass();

        /* Removes all the instances with weight equal to 0.
         MUST be done since condition (8) of Keerthi's paper
         is made with the assertion Ci > 0 (See equation (3a). */
        Instances data = new Instances(insts, insts.numInstances());
        for (int i = 0; i < insts.numInstances(); i++) {
            if (insts.instance(i).weight() > 0)
                data.add(insts.instance(i));
        }
        if (data.numInstances() == 0) {
            throw new Exception("No training instances left after removing " + "instances with weight 0!");
        }
        insts = data;
    }

    if (!m_checksTurnedOff) {
        m_Missing = new ReplaceMissingValues();
        m_Missing.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Missing);
    } else {
        m_Missing = null;
    }

    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
        boolean onlyNumeric = true;
        if (!m_checksTurnedOff) {
            for (int i = 0; i < insts.numAttributes(); i++) {
                if (i != insts.classIndex()) {
                    if (!insts.attribute(i).isNumeric()) {
                        onlyNumeric = false;
                        break;
                    }
                }
            }
        }

        if (!onlyNumeric) {
            m_NominalToBinary = new NominalToBinary();
            m_NominalToBinary.setInputFormat(insts);
            insts = Filter.useFilter(insts, m_NominalToBinary);
        } else {
            m_NominalToBinary = null;
        }
    } else {
        m_NominalToBinary = null;
    }

    if (m_filterType == FILTER_STANDARDIZE) {
        m_Filter = new Standardize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else if (m_filterType == FILTER_NORMALIZE) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    } else {
        m_Filter = null;
    }

    m_classIndex = insts.classIndex();
    m_classAttribute = insts.classAttribute();
    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);

    // Generate subsets representing each class
    Instances[] subsets = new Instances[insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i] = new Instances(insts, insts.numInstances());
    }
    for (int j = 0; j < insts.numInstances(); j++) {
        Instance inst = insts.instance(j);
        subsets[(int) inst.classValue()].add(inst);
    }
    for (int i = 0; i < insts.numClasses(); i++) {
        subsets[i].compactify();
    }

    // Build the binary classifiers
    Random rand = new Random(m_randomSeed);
    m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()];
    for (int i = 0; i < insts.numClasses(); i++) {
        for (int j = i + 1; j < insts.numClasses(); j++) {
            m_classifiers[i][j] = new BinarySMO();
            m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel()));
            Instances data = new Instances(insts, insts.numInstances());
            for (int k = 0; k < subsets[i].numInstances(); k++) {
                data.add(subsets[i].instance(k));
            }
            for (int k = 0; k < subsets[j].numInstances(); k++) {
                data.add(subsets[j].instance(k));
            }
            data.compactify();
            data.randomize(rand);
            m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed);
        }
    }
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

public Instances DeleteNoise(Instances data) {
    noise = data.stringFreeStructure();/* ww  w .  j a  v a  2s  .  c om*/
    for (int i = 0; i < data.numInstances(); i++) {
        if (data.instance(i).value(1) == -1) {
            noise.add(data.instance(i));
            data.delete(i);
            i--;
        }
    }
    return data;
}