Example usage for weka.core Instances instance

List of usage examples for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index) 

Source Link

Document

Returns the instance at the given position.

Usage

From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java

License:Open Source License

public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;/*from ww  w .  ja v a2 s .com*/

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing
                // as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most
                // common value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder)
        m_Assignments = clusterAssignments;

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder)
        initInstances = new Instances(instances);
    else
        initInstances = instances;

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index++] = tempI[k];
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (m_Iterations == m_MaxIterations)
            converged = true;

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Utils.missingValue();
                }
            }
            m_ClusterStdDevs.add(new DenseInstance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }
}

From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java

License:Open Source License

protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    // used only for Manhattan Distance
    Instances sortedMembers = null;/*  w  w w .ja v a 2  s  . co  m*/
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        // in case of Euclidian distance the centroid is the mean point
        // in case of Manhattan distance the centroid is the median point
        // in both cases, if the attribute is nominal, the centroid is the
        // mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance) {
            // singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                sortedMembers.kthSmallestValue(j, middle + 1);
                vals[j] = sortedMembers.instance(middle).value(j);
                if (dataIsEven) {
                    sortedMembers.kthSmallestValue(j, middle + 2);
                    vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Utils.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Utils.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo)
        m_ClusterCentroids.add(new DenseInstance(1.0, vals));
    return vals;
}

From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java

License:Apache License

public Map<String, String> makePrediction(String username, FileInputStream trainingModel,
        FileReader testingDataset) throws Exception {
    Map<String, String> predictions = new HashMap<>();

    ObjectInputStream inputStream = new ObjectInputStream(trainingModel);
    weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject();
    inputStream.close();//from   w w  w  .  jav  a 2  s  . c  om

    Instances instances = new Instances(new BufferedReader(testingDataset));

    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }

    int last = instances.numInstances() - 1;

    if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) {
        double label = classifier.classifyInstance(instances.instance(last));
        instances.instance(last).setClassValue(label);
        predictions.put(username, instances.instance(last).stringValue(instances.classIndex()));
    }

    return predictions;
}

From source file:categorization.SpectralWEKA.java

License:Open Source License

/**
 * Generates a clusterer by the mean of spectral clustering algorithm.
 *
 * @param data set of instances serving as training data
 * @exception Exception if the clusterer has not been generated successfully
 *//*w w w  .j  a  v  a2s . com*/
public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix)
        w = DoubleFactory2D.sparse.make(n, n);
    else
        w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++)
        v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    //Sets up similarity matrix
    for (int i = 0; i < n; i++)
        for (int j = i; j < n; j++) {
            /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
            if((r == -1) || (dist < r)) {
              double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
              w.set(i, j, sim);
              w.set(j, i, sim);
            }*/
            /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
             System.out.println(key[0]);
             System.out.println(key[1]);
             System.out.println(simScoreMap.containsKey(key));
             Double simValue = simScoreMap.get(key);*/

            double sim = sim_matrix[i][j];
            w.set(i, j, sim);
            w.set(j, i, sim);
        }

    //Partitions points
    int[][] p = partition(w, alpha_star);

    //Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++)
        for (int j = 0; j < p[i].length; j++)
            cluster[p[i][j]] = i;

    //System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Updates counters for a set of item sets and a set of instances.
 *
 * @param itemSets the set of item sets which are to be updated
 * @param instances the instances to be used for updating the counters
 *//*  w  w  w.  j  a  v  a2s. com*/
public static void upDateCounters(FastVector itemSets, Instances instances) {

    for (int i = 0; i < instances.numInstances(); i++) {
        Enumeration enu = itemSets.elements();
        while (enu.hasMoreElements())
            ((ItemSet) enu.nextElement()).upDateCounter(instances.instance(i));
    }
}

From source file:cba.RuleItem.java

License:Open Source License

/**
 * Constructs a new RuleItem if the support of the given rule is above the support threshold.
 * @param premise the premise/*from   www . j a  v  a2  s  .  co m*/
 * @param consequence the consequence
 * @param instances the instances
 * @param genTime the time of generation of the current premise and consequence
 * @param minRuleCount the support threshold
 * @param m_midPoints the mid points of the intervals
 * @param m_priors the estimated priori probabilities (in a hashtable)
 * @return a RuleItem if its support is above the threshold, null otherwise
 */
public RuleItem generateRuleItem(ItemSet premise, ItemSet consequence, Instances instances, int genTime,
        int minRuleCount, double[] m_midPoints, Hashtable m_priors) {
    ItemSet rule = new ItemSet(instances.numInstances());
    rule.m_items = new int[(consequence.m_items).length];
    System.arraycopy(premise.m_items, 0, rule.m_items, 0, (premise.m_items).length);
    for (int k = 0; k < consequence.m_items.length; k++) {
        if (consequence.m_items[k] != -1)
            rule.m_items[k] = consequence.m_items[k];
    }
    for (int i = 0; i < instances.numInstances(); i++)
        rule.upDateCounter(instances.instance(i));
    int ruleSupport = rule.support();
    if (ruleSupport > minRuleCount) {
        RuleItem newRule = new RuleItem(premise, consequence, genTime, ruleSupport, m_midPoints, m_priors);
        return newRule;
    }
    return null;
}

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

private static double[] getAttributeValues(Instances test) {
    int n = test.numInstances();
    double[] ret = new double[n];
    for (int i = 0; i < n; i++) {
        Instance ins = test.instance(i);
        ret[i] = ins.classValue();/*ww w . ja va  2 s .c o  m*/
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static double[][] getData(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        ret[i] = ins.toDoubleArray();/*from  w w  w .j  a v a 2  s .  c o m*/
    }
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static CMatrix toMatrix(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        ret[i] = ins.toDoubleArray();//from  w w w. j  av a 2 s.  c om
    }
    return CMatrix.getInstance(ret);
}

From source file:cezeri.utils.FactoryInstance.java

public static CMatrix fromInstances(Instances m) {
    double[][] ret = new double[m.numInstances()][m.numAttributes()];
    for (int i = 0; i < m.numInstances(); i++) {
        Instance ins = m.instance(i);
        ret[i] = ins.toDoubleArray();// w  ww. j a v  a2  s  .c  om
    }
    return CMatrix.getInstance(ret);
}