Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset, int capacity)

Source Link

Document

Constructor creating an empty set of instances.

Usage

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Return the header of the training data after all filtering - i.e missing
 * values and nominal to binary.//from  w w  w. j  a  v a  2s .  co  m
 *
 * @return the header of the training data after all filtering.
 */
public Instances getFilteredInputFormat() {
    return new Instances(m_trainInstances, 0);
}

From source file:WekaRegressor.java

License:Open Source License

public WekaRegressor(WekaRegressor toCopy) {
    this.wekaClassifier = OtherUtils.serializationCopy(toCopy.wekaClassifier);
    if (toCopy.wekaDataSet != null)
        this.wekaDataSet = OtherUtils.serializationCopy(new Instances(toCopy.wekaDataSet, 0));
}

From source file:WekaRegressor.java

License:Open Source License

@Override
public void train(RegressionDataSet dataSet) {
    try {/*  w w w.jav  a  2  s. c  o  m*/
        Instances instances = InstanceHandler.dataSetToInstances(dataSet);
        this.wekaDataSet = OtherUtils.serializationCopy(new Instances(instances, 0));
        wekaClassifier.buildClassifier(instances);
    } catch (Exception ex) {
        throw new FailedToFitException(ex);
    }
}

From source file:GrowTree.java

Attribute bestSplit(Instances D) {
    double imin = 1.0;
    Attribute fbest = null;/*from   www .  j av  a2 s  .  c  o  m*/
    Enumeration enat = D.enumerateAttributes();
    while (enat.hasMoreElements()) {
        Attribute a = (Attribute) enat.nextElement();
        //split D into subsets d1 to dn based on values vi based on features
        Instances[] split = new Instances[a.numValues()];
        for (int i = 0; i < a.numValues(); i++) {
            split[i] = new Instances(D, D.numInstances());
        }
        Enumeration x = D.enumerateInstances();
        while (x.hasMoreElements()) {
            Instance in = (Instance) x.nextElement();
            split[(int) in.value(a)].add(in);
        }
        for (int i = 0; i < split.length; i++) {
            split[i].compactify();
        }
        for (int i = 0; i < a.numValues(); i++) {
            if (imp(split[i]) < imin) {
                imin = imp(split[i]);
                fbest = a; //evaluate the best feature to make root
            }
        }
    }
    return fbest;

}

From source file:ArffLoader.java

License:Open Source License

/**
 * Determines and returns (if possible) the structure (internally the header)
 * of the data set as an empty set of instances.
 * //w  ww . j  av  a 2s .  c  om
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if an error occurs
 */
public Instances getStructure() throws IOException {

    if (m_structure == null) {
        if (m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }
        try {
            m_ArffReader = new ArffReader(m_sourceReader, 1);
            m_structure = m_ArffReader.getStructure();
        } catch (Exception ex) {
            throw new IOException("Unable to determine structure as arff (Reason: " + ex.toString() + ").");
        }
    }

    return new Instances(m_structure, 0);
}

From source file:ArffLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by a
 * call to getStructure then method should do so before processing the rest of
 * the data set.//from ww w .  j  a va2  s  .  co  m
 * 
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */

public Instances getDataSet() throws IOException {

    Instances insts = null;
    try {
        if (m_sourceReader == null) {
            throw new IOException("No source has been specified");
        }

        if (m_structure == null) {
            getStructure();
        }

        // Read all instances
        Instance inst;
        insts = new Instances(m_structure, 0);
        while ((inst = m_ArffReader.readInstance(m_structure)) != null) {
            insts.add(inst);
        }

        // Instances readIn = new Instances(m_structure);
    } finally {
        if (m_sourceReader != null) {
            // close the stream
            m_sourceReader.close();
        }
    }

    return insts;
}

From source file:WrapperSubset.java

License:Open Source License

@Override
public int[] postProcess(int[] attributeSet) {

    // save memory
    m_trainInstances = new Instances(m_trainInstances, 0);

    return attributeSet;
}

From source file:ID3Chi.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data/*ww  w  .  jav  a  2 s .com*/
 *            the data which is to be split
 * @param att
 *            the attribute to be used for splitting
 * @return the sets of instances produced by the split
 */
private Instances[] splitData(Instances data, Attribute att) {

    // [att.numValues()] is location for "unknown" values
    Instances[] subset = new Instances[att.numValues() + 1];
    for (int j = 0; j <= att.numValues(); j++) {
        subset[j] = new Instances(data, data.numInstances());
    }

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att)) {
            subset[att.numValues()].add(inst);
        } else {
            subset[(int) inst.value(att)].add(inst);
        }
    }
    for (int i = 0; i < subset.length; i++) {
        subset[i].compactify();
    }
    return subset;
}

From source file:MPCKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Instances in data have to be
 * either all sparse or all non-sparse//from   w  w  w. j  a v a2s  .co m
 *
 * @param data set of instances serving as training data 
 * @exception Exception if the clusterer has not been 
 * generated successfully
 */
public void buildClusterer(Instances data) throws Exception {
    System.out.println("ML weight=" + m_MLweight);
    System.out.println("CL weight= " + m_CLweight);
    System.out.println("LOG term weight=" + m_logTermWeight);
    System.out.println("Regularizer weight= " + m_regularizerTermWeight);
    m_RandomNumberGenerator = new Random(m_RandomSeed);

    if (m_metric instanceof OfflineLearnableMetric) {
        m_isOfflineMetric = true;
    } else {
        m_isOfflineMetric = false;
    }

    // Don't rebuild the metric if it was already trained
    if (!m_metricBuilt) {
        m_metric.buildMetric(data);
        m_metricBuilt = true;
        m_metricLearner.setMetric(m_metric);
        m_metricLearner.setClusterer(this);

        m_metrics = new LearnableMetric[m_NumClusters];
        m_metricLearners = new MPCKMeansMetricLearner[m_NumClusters];
        for (int i = 0; i < m_metrics.length; i++) {
            if (m_useMultipleMetrics) {
                m_metrics[i] = (LearnableMetric) m_metric.clone();
                m_metricLearners[i] = (MPCKMeansMetricLearner) m_metricLearner.clone();
                m_metricLearners[i].setMetric(m_metrics[i]);
                m_metricLearners[i].setClusterer(this);
            } else {
                m_metrics[i] = m_metric;
                m_metricLearners[i] = m_metricLearner;
            }
        }
    }

    setInstances(data);
    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);
    m_ClusterAssignments = new int[m_Instances.numInstances()];

    if (m_Instances.checkForNominalAttributes() && m_Instances.checkForStringAttributes()) {
        throw new UnsupportedAttributeTypeException("Cannot handle nominal attributes\n");
    }

    m_ClusterCentroids = m_Initializer.initialize();

    // if all instances are smoothed by the metric, the centroids
    // need to be smoothed too (note that this is independent of
    // centroid smoothing performed by K-Means)
    if (m_metric instanceof InstanceConverter) {
        System.out.println("Converting centroids...");
        Instances convertedCentroids = new Instances(m_ClusterCentroids, m_NumClusters);
        for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) {
            Instance centroid = m_ClusterCentroids.instance(i);
            convertedCentroids.add(((InstanceConverter) m_metric).convertInstance(centroid));
        }

        m_ClusterCentroids.delete();
        for (int i = 0; i < convertedCentroids.numInstances(); i++) {
            m_ClusterCentroids.add(convertedCentroids.instance(i));
        }
    }

    System.out.println("Done initializing clustering ...");
    getIndexClusters();

    if (m_verbose && m_Seedable) {
        printIndexClusters();
        for (int i = 0; i < m_NumClusters; i++) {
            System.out.println("Centroid " + i + ": " + m_ClusterCentroids.instance(i));
        }
    }

    // Some extra work for smoothing metrics
    if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) {

        SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric;
        Instances smoothedCentroids = new Instances(m_Instances, m_NumClusters);

        for (int i = 0; i < m_ClusterCentroids.numInstances(); i++) {
            Instance smoothedCentroid = smoothingMetric.smoothInstance(m_ClusterCentroids.instance(i));
            smoothedCentroids.add(smoothedCentroid);
        }
        m_ClusterCentroids = smoothedCentroids;

        updateSmoothingMetrics();
    }

    runKMeans();
}

From source file:MPCKMeans.java

License:Open Source License

/** M-step of the KMeans clustering algorithm -- updates cluster centroids
 *//*  w w w. j a v a  2  s. c om*/
protected void updateClusterCentroids() throws Exception {
    Instances[] tempI = new Instances[m_NumClusters];
    Instances tempCentroids = m_ClusterCentroids;
    Instances tempNewCentroids = new Instances(m_Instances, m_NumClusters);
    m_ClusterCentroids = new Instances(m_Instances, m_NumClusters);

    // tempI[i] stores the cluster instances for cluster i
    for (int i = 0; i < m_NumClusters; i++) {
        tempI[i] = new Instances(m_Instances, 0);
    }
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        tempI[m_ClusterAssignments[i]].add(m_Instances.instance(i));
    }

    // Calculates cluster centroids
    for (int i = 0; i < m_NumClusters; i++) {
        double[] values = new double[m_Instances.numAttributes()];
        Instance centroid = null;

        if (m_isSparseInstance) { // uses fast meanOrMode
            values = ClusterUtils.meanOrMode(tempI[i]);
            centroid = new SparseInstance(1.0, values);
        } else { // non-sparse, go through each attribute
            for (int j = 0; j < m_Instances.numAttributes(); j++) {
                values[j] = tempI[i].meanOrMode(j); // uses usual meanOrMode
            }
            centroid = new Instance(1.0, values);
        }

        //        // debugging:  compare  previous centroid w/current:
        //        double w = 0; 
        //        for (int j = 0; j < m_Instances.numAttributes(); j++)  w += values[j] * values[j];
        //        double w1 = 0; 
        //        for (int j = 0; j < m_Instances.numAttributes(); j++)  w1 += tempCentroids.instance(i).value(j) * tempCentroids.instance(i).value(j);

        //        System.out.println("\tOldCentroid=" + w1);
        //        System.out.println("\tNewCentroid=" + w); 
        //        double prevObj = 0, currObj = 0;
        //        for (int j = 0; j < tempI[i].numInstances(); j++) {
        //     Instance instance = tempI[i].instance(j);
        //     double prevPen = m_metrics[i].penalty(instance, tempCentroids.instance(i));
        //     double currPen = m_metrics[i].penalty(instance, centroid);
        //     prevObj += prevPen;
        //     currObj += currPen; 
        //     //System.out.println("\t\t" + j + " " + prevPen + " -> " + currPen + "\t" + prevObj + " -> " + currObj); 
        //        }
        //        // dump instances out if there is a problem.
        //        System.out.println("\t\t" + prevObj + " -> " + currObj); 
        //        if (currObj > prevObj) {

        //     PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream("/tmp/INST.arff")), true);
        //     out.println(new Instances(tempI[i], 0));
        //     out.println(centroid);
        //     out.println(tempCentroids.instance(i)); 
        //     for (int j = 0; j < tempI[i].numInstances(); j++) {
        //       out.println(tempI[i].instance(j));
        //     }
        //     out.close();
        //     System.out.println("  Updated cluster " + i + "("
        //              + tempI[i].numInstances());
        //     System.exit(0); 
        //        } 

        // if we are using a smoothing metric, smooth the centroids
        if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing()) {
            System.out.println("\tSmoothing...");
            SmoothingMetric smoothingMetric = (SmoothingMetric) m_metric;
            centroid = smoothingMetric.smoothInstance(centroid);
        }

        //   DEBUGGING:  replaced line under with block below
        m_ClusterCentroids.add(centroid);
        //        {
        //     tempNewCentroids.add(centroid);
        //     m_ClusterCentroids.delete(); 
        //     for (int j = 0; j <= i; j++) {
        //       m_ClusterCentroids.add(tempNewCentroids.instance(j));
        //     }
        //     for (int j = i+1; j < m_NumClusters; j++) {
        //       m_ClusterCentroids.add(tempCentroids.instance(j));
        //     } 
        //     double objBackup = m_Objective;
        //     System.out.println("  Updated cluster " + i + "("
        //              + tempI[i].numInstances() + "); obj=" +
        //              calculateObjectiveFunction(false));
        //     m_Objective = objBackup;
        //        }

        // in SPKMeans, cluster centroids need to be normalized
        if (m_metric.doesNormalizeData()) {
            m_metric.normalizeInstanceWeighted(m_ClusterCentroids.instance(i));
        }
    }

    if (m_metric instanceof SmoothingMetric && ((SmoothingMetric) m_metric).getUseSmoothing())
        updateSmoothingMetrics();

    for (int i = 0; i < m_NumClusters; i++)
        tempI[i] = null; // free memory
}