Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset) 

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:adams.gui.visualization.debug.objectexport.WekaInstancesExporter.java

License:Open Source License

/**
 * Performs the actual export./*from w ww  . j a  va 2s . co  m*/
 *
 * @param obj      the object to export
 * @param file   the file to export to
 * @return      null if successful, otherwise error message
 */
@Override
protected String doExport(Object obj, File file) {
    Instances data;
    Instance inst;

    try {
        if (obj instanceof Instances) {
            DataSink.write(file.getAbsolutePath(), (Instances) obj);
            return null;
        } else {
            inst = (Instance) obj;
            if (inst.dataset() != null) {
                data = new Instances(inst.dataset());
                data.add((Instance) inst.copy());
                DataSink.write(file.getAbsolutePath(), data);
                return null;
            } else {
                return "Instance has no dataset associated, cannot export as ARFF!";
            }
        }
    } catch (Exception e) {
        return "Failed to write Instances to '" + file + "'!\n" + Utils.throwableToString(e);
    }
}

From source file:adams.gui.visualization.instance.LoadDatasetDialog.java

License:Open Source License

/**
 * Returns the full dataset, can be null if none loaded.
 *
 * @return      the full dataset/*from  w w  w  .  jav  a  2  s. com*/
 */
public Instances getDataset() {
    int index;
    Instances result;

    result = new Instances(m_Instances);
    if (m_ComboBoxSorting.getSelectedIndex() > 0)
        result.sort(m_ComboBoxSorting.getSelectedIndex() - 1);

    index = m_ComboBoxClass.getSelectedIndex();
    if (index > -1)
        index--;
    result.setClassIndex(index);

    return result;
}

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Returns the a spreadsheet with the same header and comments.
 *
 * @return      the spreadsheet//from  w  w w .j a  va  2s. c  o  m
 */
@Override
public Dataset getHeader() {
    Instances data;

    data = new Instances(m_Data);
    return new InstancesView(data);
}

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Returns a spreadsheet containing only the input columns, not class
 * columns.//from  w ww.  ja va 2s . c  om
 *
 * @return      the input features, null if data conists only of class columns
 */
@Override
public SpreadSheet getInputs() {
    Instances data;

    if (m_Data.classIndex() == -1)
        return this;

    data = new Instances(m_Data);
    data.setClassIndex(-1);
    data.deleteAttributeAt(m_Data.classIndex());

    return new InstancesView(data);
}

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Returns a spreadsheet containing only output columns, i.e., the class
 * columns.//from  w  w w  .  j av  a  2s. c  om
 *
 * @return      the output features, null if data has no class columns
 */
@Override
public SpreadSheet getOutputs() {
    Instances data;
    Remove remove;

    if (m_Data.classIndex() == -1)
        return null;

    data = new Instances(m_Data);
    data.setClassIndex(-1);
    remove = new Remove();
    remove.setAttributeIndicesArray(new int[] { m_Data.classIndex() });
    remove.setInvertSelection(true);
    try {
        remove.setInputFormat(data);
        data = Filter.useFilter(data, remove);
        return new InstancesView(data);
    } catch (Exception e) {
        throw new IllegalStateException("Failed to apply Remove filter!", e);
    }
}

From source file:adams.opt.optimise.genetic.fitnessfunctions.AbstractWEKAFitnessFunction.java

License:Open Source License

protected synchronized void init() {
    int classIndex = 0;
    if (!m_init) {
        FileReader reader;/*from   w w  w .ja v  a 2  s . c  om*/
        try {
            reader = new FileReader(m_Dataset.getAbsolutePath());
            m_Instances = new Instances(reader);
            reader.close();
        } catch (Exception e) {
            getLogger().log(Level.SEVERE, "Failed to read instances: " + m_Dataset, e);
            throw new IllegalStateException("Error loading dataset '" + m_Dataset + "': " + e);
        }

        // class index
        if (m_ClassIndex.equals("first"))
            classIndex = 0;
        else if (m_ClassIndex.equals("last"))
            classIndex = m_Instances.numAttributes() - 1;
        else
            classIndex = Integer.parseInt(m_ClassIndex);
        m_Instances.setClassIndex(classIndex);

        m_init = true;
    }
}

From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java

License:Open Source License

public double evaluate(OptData opd) {
    init();/*from w  w w . ja v a 2s  .  c  o m*/
    int cnt = 0;
    int[] weights = getWeights(opd);
    Instances newInstances = new Instances(getInstances());
    for (int i = 0; i < getInstances().numInstances(); i++) {
        Instance in = newInstances.instance(i);
        cnt = 0;
        for (int a = 0; a < getInstances().numAttributes(); a++) {
            if (a == getInstances().classIndex())
                continue;
            if (weights[cnt++] == 0) {
                in.setValue(a, 0);
            } else {
                in.setValue(a, in.value(a));
            }
        }
    }
    Classifier newClassifier = null;

    try {
        newClassifier = (Classifier) OptionUtils.shallowCopy(getClassifier());
        // evaluate classifier on data
        Evaluation evaluation = new Evaluation(newInstances);
        evaluation.crossValidateModel(newClassifier, newInstances, getFolds(),
                new Random(getCrossValidationSeed()));

        // obtain measure
        double measure = 0;
        if (getMeasure() == Measure.ACC)
            measure = evaluation.pctCorrect();
        else if (getMeasure() == Measure.CC)
            measure = evaluation.correlationCoefficient();
        else if (getMeasure() == Measure.MAE)
            measure = evaluation.meanAbsoluteError();
        else if (getMeasure() == Measure.RAE)
            measure = evaluation.relativeAbsoluteError();
        else if (getMeasure() == Measure.RMSE)
            measure = evaluation.rootMeanSquaredError();
        else if (getMeasure() == Measure.RRSE)
            measure = evaluation.rootRelativeSquaredError();
        else
            throw new IllegalStateException("Unhandled measure '" + getMeasure() + "'!");
        measure = getMeasure().adjust(measure);

        return (measure);
        // process fitness

    } catch (Exception e) {
        getLogger().log(Level.SEVERE, "Error evaluating", e);
    }

    return 0;
}

From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java

License:Open Source License

/**
 * Callback for best measure so far/*w w  w. j  a va2 s  .c o  m*/
 */
@Override
public void newBest(double val, OptData opd) {
    int cnt = 0;
    int[] weights = getWeights(opd);
    Instances newInstances = new Instances(getInstances());
    for (int i = 0; i < getInstances().numInstances(); i++) {
        Instance in = newInstances.instance(i);
        cnt = 0;
        for (int a = 0; a < getInstances().numAttributes(); a++) {
            if (a == getInstances().classIndex())
                continue;
            if (weights[cnt++] == 0) {
                in.setValue(a, 0);
            } else {
                in.setValue(a, in.value(a));
            }
        }
    }
    try {
        File file = new File(getOutputDirectory().getAbsolutePath() + File.separator
                + Double.toString(getMeasure().adjust(val)) + ".arff");
        file.createNewFile();
        Writer writer = new BufferedWriter(new FileWriter(file));
        Instances header = new Instances(newInstances, 0);

        // remove filter setup
        Remove remove = new Remove();
        remove.setAttributeIndices(getRemoveAsString(weights));
        remove.setInvertSelection(true);

        header.setRelationName(OptionUtils.getCommandLine(remove));

        writer.write(header.toString());
        writer.write("\n");
        for (int i = 0; i < newInstances.numInstances(); i++) {
            writer.write(newInstances.instance(i).toString());
            writer.write("\n");
        }
        writer.flush();
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer
 * that are not being set via options.//from   w w  w  .  j  a  v  a  2 s.  c o  m
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the clusterer has not been 
 * generated successfully
 */
public void buildClusterer(Instances data) throws Exception {

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    //long start = System.currentTimeMillis();

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    // Missing values replacement is not required so this modification is made
    /*m_ReplaceMissingFilter.setInputFormat(data);
    m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);*/
    Instances m_instances = new Instances(data);

    // To use semantic measurers through DistanceFunction interface
    m_DistanceFunction.setInstances(m_instances);

    initMinMax(m_instances);

    m_ClusterCentroids = new Instances(m_instances, m_NumClusters);

    int n = m_instances.numInstances();
    Random r = new Random(getSeed());
    boolean[] selected = new boolean[n];
    double[] minDistance = new double[n];

    for (int i = 0; i < n; i++)
        minDistance[i] = Double.MAX_VALUE;

    int firstI = r.nextInt(n);
    m_ClusterCentroids.add(m_instances.instance(firstI));
    selected[firstI] = true;

    updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI));

    if (m_NumClusters > n)
        m_NumClusters = n;

    for (int i = 1; i < m_NumClusters; i++) {
        int nextI = farthestAway(minDistance, selected);
        m_ClusterCentroids.add(m_instances.instance(nextI));
        selected[nextI] = true;
        updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI));
    }

    m_instances = new Instances(m_instances, 0);
    //long end = System.currentTimeMillis();
    //System.out.println("Clustering Time = " + (end-start));

    // Save memory!!
    m_DistanceFunction.clean();

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.//from   w w  w  .ja va2 s.c  o m
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;
}