Example usage for weka.core Instances sort

Introduction

In this page you can find the example usage for weka.core Instances sort.

Prototype

public void sort(Attribute att)

Source Link

Document

Sorts the instances based on an attribute.

Usage

From source file:adams.flow.transformer.WekaSubsets.java

License:Open Source License

/**
 * Executes the flow item./*from ww  w  . j  a v a  2 s .  c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    Double old;
    Double curr;
    int i;
    int index;
    Instance inst;

    result = null;

    m_Queue.clear();

    // copy and sort data
    data = new Instances((Instances) m_InputToken.getPayload());
    m_Index.setData(data);
    ;
    index = m_Index.getIntIndex();
    data.sort(index);

    // create subsets
    old = null;
    i = 0;
    while (i < data.numInstances()) {
        inst = data.instance(i);
        curr = inst.value(index);
        if ((old == null) || !curr.equals(old)) {
            m_Queue.add(new Instances(data, data.numInstances()));
            old = curr;
        }
        m_Queue.get(m_Queue.size() - 1).add(inst);
        i++;
    }

    // compact subsets
    for (Instances sub : m_Queue)
        sub.compactify();

    return result;
}

From source file:adams.gui.visualization.instance.LoadDatasetDialog.java

License:Open Source License

/**
 * Returns the full dataset, can be null if none loaded.
 *
 * @return      the full dataset//from w w  w . j  a v  a  2 s.c  o m
 */
public Instances getDataset() {
    int index;
    Instances result;

    result = new Instances(m_Instances);
    if (m_ComboBoxSorting.getSelectedIndex() > 0)
        result.sort(m_ComboBoxSorting.getSelectedIndex() - 1);

    index = m_ComboBoxClass.getSelectedIndex();
    if (index > -1)
        index--;
    result.setClassIndex(index);

    return result;
}

From source file:de.ugoe.cs.cpdp.dataprocessing.SimulationFilter.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    Instances newDataSet = new Instances(traindata);
    traindata.delete();/*  w ww  . ja va 2s .co  m*/

    HashMap<Double, Instance> artifactNames = new HashMap<Double, Instance>();

    // This is to add all data, where the first occurence of the file has a bug
    ArrayList<Double> firstOccurenceArtifactNames = new ArrayList<Double>();

    // Sort dataset (StateID is connected to the date of commit: Lower StateID
    // means earlier commit than a higher stateID)
    Attribute wekaAttribute = newDataSet.attribute("Artifact.Target.StateID");
    newDataSet.sort(wekaAttribute);

    /*
     * Logical summary: If there is an instance that dont have a bug, put it into the hashmap
     * (only unique values in there)
     * 
     * If there is an instance, that hava a bug look up if it is in the hashmap already (this
     * means: it does not had a bug before!): If this is true add it to a new dataset and remove
     * it from the hashmap, so that new changes from "nonBug" -> "bug" for this file can be
     * found.
     * 
     * If the instance has a bug and is not in the hashmap (this means: The file has a bug with
     * its first occurence or this file only has bugs and not an instance with no bug), then (if
     * it is not in the arrayList above) add it to the new dataset. This way it is possible to
     * get the first occurence of a file, which has a bug
     */
    for (int i = 0; i < newDataSet.numInstances(); i++) {
        Instance wekaInstance = newDataSet.instance(i);

        double newBugLabel = wekaInstance.classValue();
        Attribute wekaArtifactName = newDataSet.attribute("Artifact.Name");
        Double artifactName = wekaInstance.value(wekaArtifactName);

        if (newBugLabel == 0.0 && artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 0.0 && !artifactNames.keySet().contains(artifactName)) {
            artifactNames.put(artifactName, wekaInstance);
        } else if (newBugLabel == 1.0 && artifactNames.keySet().contains(artifactName)) {
            traindata.add(wekaInstance);
            artifactNames.remove(artifactName);
        } else if (newBugLabel == 1.0 && !artifactNames.keySet().contains(artifactName)) {
            if (!firstOccurenceArtifactNames.contains(artifactName)) {
                traindata.add(wekaInstance);
                firstOccurenceArtifactNames.add(artifactName);
            }
        }
    }

    // If we have a file, that never had a bug (this is, when it is NOT in the
    // new created dataset, but it is in the HashMap from above) add it to
    // the new dataset

    double[] artifactNamesinNewDataSet = traindata.attributeToDoubleArray(0);
    HashMap<Double, Instance> artifactNamesCopy = new HashMap<Double, Instance>(artifactNames);

    for (Double artifactName : artifactNames.keySet()) {

        for (int i = 0; i < artifactNamesinNewDataSet.length; i++) {
            if (artifactNamesinNewDataSet[i] == artifactName) {
                artifactNamesCopy.remove(artifactName);
            }
        }
    }

    for (Double artifact : artifactNamesCopy.keySet()) {
        traindata.add(artifactNamesCopy.get(artifact));
    }

}

From source file:Helper.CustomFilter.java

public Instances convertNumericRange(Instances structure) throws Exception {
    for (int i = 0; i < structure.numAttributes() - 1; i++) {
        if (structure.attribute(i).typeToString(structure.attribute(i)).equals("numeric")) {
            structure.sort(i);
            structure = toRange(structure, i);
        }/*from  ww w  . j a v a2  s .  com*/
    }
    return structure;
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Creates a C4.5-type split on the given data.
 *
 * @exception Exception if something goes wrong
 *//*  w  w w . j  a va 2 s. c om*/
public void buildClassifier(Instances trainInstances) throws Exception {

    // Initialize the remaining instance variables.
    m_numSubsets = 0;
    m_splitPoint = Double.MAX_VALUE;
    m_infoGain = 0;
    m_gainRatio = 0;

    // Different treatment for enumerated and numeric
    // attributes.
    if (trainInstances.attribute(m_attIndex).isNominal()) {
        handleEnumeratedAttribute(trainInstances);
    } else {
        trainInstances.sort(trainInstances.attribute(m_attIndex));
        handleNumericAttribute(trainInstances);
    }
}

From source file:j48.C45Split.java

License:Open Source License

public void buildClassifier(Instances trainInstances) throws Exception {

    // Initialize the remaining instance variables.
    m_numSubsets = 0;/*w w  w  .  j av  a  2 s . c o m*/
    m_splitPoint = Double.MAX_VALUE;
    m_infoGain = 0;
    m_gainRatio = 0;

    // Different treatment for enumerated and numeric
    // attributes.
    if (trainInstances.attribute(m_attIndex).isNominal()) {
        m_complexityIndex = trainInstances.attribute(m_attIndex).numValues();
        m_index = m_complexityIndex;
        handleEnumeratedAttribute(trainInstances);

    } else {
        m_complexityIndex = 2;
        m_index = 0;
        trainInstances.sort(trainInstances.attribute(m_attIndex));

        // ///////////////////////////////////////////////////////////////////////////////////////
        double stdDev = trainInstances.attributeStats(m_attIndex).numericStats.stdDev;
        if (stdDev > 200) {
            //      rrrrr = stdDev/200;
            //      System.out.println(stdDev+" ");
            rrrrr = Math.log10(stdDev) / 1.2;
            //      rrrrr = 1.1;
            //      lllll = stdDev/2000;

            //      lllll = 0.3;

            lllll = Math.log10(stdDev) / 8;
        } else {
            lllll = Math.log10(stdDev) / 1.2;
            //         lllll = stdDev/200;
            //         lllll = 1.1;

            //         rrrrr = stdDev/2000;
            //         rrrrr = 0.3;
            rrrrr = Math.log10(stdDev) / 8;

        }
        handleNumericAttribute(trainInstances);
    }
}

From source file:j48.NBTreeSplit.java

License:Open Source License

/**
 * Creates a NBTree-type split on the given data. Assumes that none of
 * the class values is missing./*from  ww w  .  ja v  a 2s. c o  m*/
 *
 * @exception Exception if something goes wrong
 */
public void buildClassifier(Instances trainInstances) throws Exception {

    // Initialize the remaining instance variables.
    m_numSubsets = 0;
    m_splitPoint = Double.MAX_VALUE;
    m_errors = 0;
    if (m_globalNB != null) {
        m_errors = m_globalNB.getErrors();
    }

    // Different treatment for enumerated and numeric
    // attributes.
    if (trainInstances.attribute(m_attIndex).isNominal()) {
        m_complexityIndex = trainInstances.attribute(m_attIndex).numValues();
        handleEnumeratedAttribute(trainInstances);
    } else {
        m_complexityIndex = 2;
        trainInstances.sort(trainInstances.attribute(m_attIndex));
        handleNumericAttribute(trainInstances);
    }
}

From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java

License:Open Source License

@Override
public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) {

    // can't handle empty case
    if (learners.isEmpty()) {
        return this.ensemble.build(essaySet, ensembleName, learners);
    }//from   w  ww .  j a v  a2s  .  com

    // create a dummy dataset.
    DatasetBuilder builder = new DatasetBuilder();
    builder.addVariable("id");
    builder.addNominalVariable("class", Contest.getRubrics(essaySet));
    Instances dummy = builder.getDataset("dummy");

    // add data
    Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet);
    for (double id : learners.get(0).getPreds().keySet()) {
        dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) }));
    }

    // stratify
    dummy.sort(0);
    dummy.randomize(new Random(1));
    dummy.setClassIndex(1);
    dummy.stratify(nFolds);

    // now evaluate each fold
    Map<Double, Double> preds = new HashMap<Double, Double>();
    for (int k = 0; k < nFolds; k++) {
        Instances train = dummy.trainCV(nFolds, k);
        Instances test = dummy.testCV(nFolds, k);

        List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : learners) {
            WeakLearner copy = learner.copyOf();
            for (int i = 0; i < test.numInstances(); i++) {
                copy.getPreds().remove(test.instance(i).value(0));
                copy.getProbs().remove(test.instance(i).value(0));
            }
            cvLeaners.add(copy);
        }

        // train on fold
        StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners);

        List<WeakLearner> testLeaners = new ArrayList<WeakLearner>();
        for (WeakLearner learner : cv.getLearners()) {
            WeakLearner copy = learner.copyOf();
            copy.getPreds().clear();
            copy.getProbs().clear();
            WeakLearner source = find(copy.getName(), learners);
            for (int i = 0; i < test.numInstances(); i++) {
                double id = test.instance(i).value(0);
                copy.getPreds().put(id, source.getPreds().get(id));
                copy.getProbs().put(id, source.getProbs().get(id));
            }
            testLeaners.add(copy);
        }

        preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext()));
    }

    // now prepare final result

    StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners);

    double trainingError = strong.getKappa();
    double cvError = Calc.kappa(essaySet, preds, groundTruth);
    //   Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError);      

    strong.setKappa(cvError);
    return strong;
}

From source file:lu.lippmann.cdb.datasetview.tasks.SortInstancesTask.java

License:Open Source License

/**
 * {@inheritDoc}//www.ja  va2 s  . c om
 */
@Override
Instances process0(final Instances dataSet) throws Exception {

    final String s = (String) JOptionPane.showInputDialog(null, "Select an attribute:\n", "Attribute selection",
            JOptionPane.PLAIN_MESSAGE, null, WekaDataStatsUtil.getAttributeNames(dataSet).toArray(), "");

    if (s != null)
        dataSet.sort(dataSet.attribute(s));
    return dataSet;
}

From source file:lu.lippmann.cdb.ext.hydviga.gaps.GapFiller.java

License:Open Source License

private Instances fillAllGaps(final Instances ds) throws Exception {
    Instances newds = new Instances(ds);

    final int firstDateIdx = WekaDataStatsUtil.getFirstDateAttributeIdx(newds);
    final String datename = newds.attribute(firstDateIdx).name();
    if (firstDateIdx == -1) {
        throw new Exception("No date attribute in this dataset!");
    }//from   w w w  . j a  va2  s .c  o  m

    /* add a 'fake numerical' time field */
    newds.insertAttributeAt(new Attribute(datename + "_fake"), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1, newds.instance(i).value(firstDateIdx));
    }

    /* remove the 'true' time field */
    newds.deleteAttributeAt(firstDateIdx);

    /* process the dataset */
    newds = fillGaps0(newds);

    /* re-add the 'true' time field according to the 'fake numerical' time field */
    final String df = ds.attribute(firstDateIdx).getDateFormat();
    newds.insertAttributeAt(new Attribute(datename + "_new", df), newds.numAttributes());
    for (int i = 0; i < newds.numInstances(); i++) {
        newds.instance(i).setValue(newds.numAttributes() - 1,
                newds.instance(i).value(newds.numAttributes() - 2));
    }

    /* delete the 'fake numerical' time field */
    newds.deleteAttributeAt(newds.numAttributes() - 2);

    newds.sort(newds.numAttributes() - 1);

    return newds;
}