Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:entity.NoiseInjectionManager.java

License:Open Source License

/**
 * //  ww  w  . j  ava2s.c om
 * Increments fp and fn by specified percentages.
 * Randomize order of instances and modifies instances until noise quota is reached.
 * Than randomized instances again.
 * NOTE: It modifies the given dataset, because it is a reference.
 *  
 * @param origDataset
 * @param fpPercentage
 * @param fnPercentage
 * @return Instances noisyDataset
 */
public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) {

    // exits if no noise must be added
    if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) {
        if (verbose)
            System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere");
        return origDataset;
    }

    // total instances in dataset
    int numInstances = origDataset.numInstances();

    // finds positive (buggy) and negative (non-buggy) instances numbers
    int numOfPositives = 0;
    int numOfNegatives = 0;

    for (int j = 0; j < numInstances; j++) {

        if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) {
            numOfPositives++;
        }
        // this is a redundant control, but better safe than sorry
        else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) {
            numOfNegatives++;
        }
    }

    // calculates the number of false positives to insert
    int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100);
    int fpInserted = 0;
    if (verbose)
        System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert
                + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives
                + " %fp= " + fpPercentage);

    // calculates the number of false negatives to insert
    int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100);
    int fnInserted = 0;
    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= "
                + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage);

    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel
                + " - nonbuggy label: " + Settings.nonbuggyLabel);

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);

    for (int i = 0; i < origDataset.numInstances(); i++) {
        if (verbose)
            System.out.print("\nORIGINAL VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));

        // gets the classification attribute (it HAS to be the last)
        Attribute att = origDataset.instance(i).attribute(origDataset.classIndex());

        // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn 
        if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex())
                .equals(Settings.buggyLabel))) {

            origDataset.instance(i).setValue(att, Settings.nonbuggyLabel);
            fnInserted++;
            if (verbose)
                System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " ");
        }

        // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp 
        else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex())
                .equals(Settings.nonbuggyLabel))) {

            origDataset.instance(i).setValue(att, Settings.buggyLabel);
            fpInserted++;
            if (verbose)
                System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " ");

        }

        if (verbose)
            System.out.print(" FINAL ELEMENT VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));
    }

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);
    return origDataset;
}

From source file:entity.NoiseInjectionManager.java

License:Open Source License

/**
 * Increments fp and fn in combination by a specified percentages.
 * Randomize order of instances and modifies instances until noise quota is reached.
 * Than randomized instances again.//from  w  ww .j a v  a 2 s .  co m
 * NOTE: It modifies the given dataset, because it is a reference.
 * 
 * @param origDataset
 * @param combinedFpFnPercentage
 * @return noisydata
 */
public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) {

    // exits if no noise must be added
    if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) {
        if (verbose)
            System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere");
        return origDataset;
    }

    // total instances in dataset
    int numInstances = origDataset.numInstances();

    // finds positive (buggy) and negative (non-buggy) instances numbers
    int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100);
    int fpAndFnInserted = 0;
    if (verbose)
        System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert
                + ", totIntances= " + origDataset.numInstances());

    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel
                + " - nonbuggy label: " + Settings.nonbuggyLabel);

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);

    for (int i = 0; i < origDataset.numInstances(); i++) {
        if (verbose)
            System.out.print("\nORIGINAL VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));

        // gets the classification attribute (it HAS to be the last)
        Attribute att = origDataset.instance(i).attribute(origDataset.classIndex());

        // if there are fn or fp to add  
        if (fpAndFnInserted < fpAndFnToInsert) {

            // if this is a positive instances it turns it into a negative, making it a fn
            if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) {

                if (verbose)
                    System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " ");
                origDataset.instance(i).setValue(att, Settings.nonbuggyLabel);
                fpAndFnInserted++;
            }

            // if this is a negative instances it turns it into a positive, making it a fp
            else if (origDataset.instance(i).stringValue(origDataset.classIndex())
                    .equals(Settings.nonbuggyLabel)) {

                if (verbose)
                    System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " ");
                origDataset.instance(i).setValue(att, Settings.buggyLabel);
                fpAndFnInserted++;
            }
        }

        if (verbose)
            System.out.print(" FINAL ELEMENT VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));
    }

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);
    return origDataset;
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

private int[] determineDictionary(Instances instances) {
    if (stringAttributeIndex < 0) {
        throw new IllegalStateException("String attribute index not valid");
    }//  w w w  .j  a  v a 2s  . com

    // Operate on a per-class basis if class attribute is set
    int classInd = instances.classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = instances.attribute(classInd).numValues();
    }

    HashMap<String, Integer> termIndices = new HashMap<String, Integer>();
    for (int i = 0; i < termList.size(); i++) {
        termIndices.put(termList.get(i), i);
    }

    //Create the trie for matching terms
    Trie termTrie = new Trie(termList);

    //Initialize the dictionary/count map
    ArrayList<HashMap<Integer, Count>> termCounts = new ArrayList<HashMap<Integer, Count>>();
    for (int z = 0; z < values; z++) {
        termCounts.add(new HashMap<Integer, Count>());
    }

    //Go through all the instances and count the emoticons
    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        //Get the string attribute to examine
        String stringValue = instance.stringValue(stringAttributeIndex);

        HashMap<Integer, Count> termCountsForClass = termCounts.get(vInd);

        HashMap<String, Integer> termMatches = termTrie.countNonoverlappingMatches(stringValue);
        for (Map.Entry<String, Integer> entry : termMatches.entrySet()) {
            String term = entry.getKey();
            int termIdx = termIndices.get(term);

            int matches = entry.getValue();

            Count count = termCountsForClass.get(termIdx);
            if (count == null) {
                count = new Count(0);
                termCountsForClass.put(termIdx, count);
            }

            if (matches > 0) {
                count.docCount += 1;
                count.count += matches;
            }
        }
    }

    // Figure out the minimum required word frequency
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        HashMap<Integer, Count> termCountsForClass = termCounts.get(z);

        int array[] = new int[termCountsForClass.size()];
        int pos = 0;
        for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) {
            array[pos] = entry.getValue().count;
            pos++;
        }

        // sort the array
        sortArray(array);

        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    HashSet<String> selectedTerms = new HashSet<String>();
    for (int z = 0; z < values; z++) {
        HashMap<Integer, Count> termCountsForClass = termCounts.get(z);

        for (Map.Entry<Integer, Count> entry : termCountsForClass.entrySet()) {
            int termIndex = entry.getKey();
            String term = termList.get(termIndex);
            Count count = entry.getValue();
            if (count.count >= prune[z]) {
                selectedTerms.add(term);
            }
        }
    }

    //Save the selected terms as a list
    this.m_selectedTerms = new ArrayList<String>(selectedTerms);
    this.m_selectedTermsTrie = new Trie(this.m_selectedTerms);
    this.m_NumInstances = instances.size();

    //Construct the selected terms to index map
    this.m_selectedTermIndices = new HashMap<String, Integer>();
    for (int i = 0; i < m_selectedTerms.size(); i++) {
        m_selectedTermIndices.put(m_selectedTerms.get(i), i);
    }

    // Compute document frequencies, organized by selected term index (not original term index)
    int[] docsCounts = new int[m_selectedTerms.size()];
    for (int i = 0; i < m_selectedTerms.size(); i++) {
        String term = m_selectedTerms.get(i);
        int termIndex = termIndices.get(term);
        int docsCount = 0;
        for (int z = 0; z < values; z++) {
            HashMap<Integer, Count> termCountsForClass = termCounts.get(z);

            Count count = termCountsForClass.get(termIndex);
            if (count != null) {
                docsCount += count.docCount;
            }
        }
        docsCounts[i] = docsCount;
    }
    return docsCounts;
}

From source file:experimentalclassifier.ExperimentalClassifier.java

/**
 * @param args the command line arguments
 *///w ww  .  j av  a 2 s . c  o m
public static void main(String[] args) throws Exception {

    DataSource source = new DataSource("data/iris.csv");

    Instances data = source.getDataSet();

    if (data.classIndex() == -1) {
        data.setClassIndex(data.numAttributes() - 1);
    }

    data.randomize(new Random());

    String[] options = weka.core.Utils.splitOptions("-P 30");
    RemovePercentage remove = new RemovePercentage();
    remove.setOptions(options);
    remove.setInputFormat(data);
    Instances train = Filter.useFilter(data, remove);

    remove.setInvertSelection(true);
    remove.setInputFormat(data);
    Instances test = Filter.useFilter(data, remove);

    Classifier classifier = new HardCodedClassifier();
    classifier.buildClassifier(train);//Currently, this does nothing
    Evaluation eval = new Evaluation(train);
    eval.evaluateModel(classifier, test);
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:expshell.ExpShell.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//  ww w.j  ava  2s.  co m
 */
public static void main(String[] args) throws Exception {
    String file = "C:\\Users\\YH Jonathan Kwok\\Documents\\NetBeansProjects\\ExpShell\\src\\expshell\\iris.csv";

    DataSource source = new DataSource(file);
    Instances data = source.getDataSet();

    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);

    //Randomize it
    data.randomize(new Random(1));

    RemovePercentage rp = new RemovePercentage();
    rp.setPercentage(70);

    rp.setInputFormat(data);
    Instances training = Filter.useFilter(data, rp);

    rp.setInvertSelection(true);
    rp.setInputFormat(data);
    Instances test = Filter.useFilter(data, rp);

    //standardize the data
    Standardize filter = new Standardize();
    filter.setInputFormat(training);

    Instances newTest = Filter.useFilter(test, filter);
    Instances newTraining = Filter.useFilter(training, filter);

    //Part 5 - Now it's a knn
    Classifier knn = new NeuralClassifier();
    knn.buildClassifier(newTraining);
    Evaluation eval = new Evaluation(newTraining);
    eval.evaluateModel(knn, newTest);

    System.out.println(eval.toSummaryString("***** Overall results: *****", false));

}

From source file:fantail.core.Tools.java

License:Open Source License

public static int getNumberTargets(Instances data) throws Exception {
    if (data == null) {
        throw new Exception("data can't be null.");
    }//from   w  w  w  . j  a  v  a 2 s  .  co  m
    if (data.numInstances() <= 0) {
        throw new Exception("data can't be empty.");
    }
    if (data.classIndex() < 0) {
        throw new Exception("class index is not set.");
    }
    Instance tempInst = data.instance(0);
    Instances targets = tempInst.relationalValue(data.classIndex());
    return targets.numAttributes();
}

From source file:farm_ads.MyClassifier.java

public Instances readIntances(String URL) throws Exception {
    FarmAds fa = new FarmAds(URL);
    FarmAdsVector fav = new FarmAdsVector();
    fav.writeFile("data\\dataVecto.dat", fa);
    DataSource source = new DataSource("data\\dataVecto.dat");
    Instances instances = source.getDataSet();
    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }// w  ww .  j  a  v  a  2 s  .  co  m
    return instances;
}

From source file:farm_ads.MyClassifier.java

public Instances readIntances(String URL, Hashtable att, Hashtable numAtt, String iv) throws Exception {
    FarmAds fa = new FarmAds(att, numAtt, iv, URL);
    FarmAdsVector fav = new FarmAdsVector();
    fav.writeFile("data\\dataVecto.dat", fa);
    DataSource source = new DataSource("data\\dataVecto.dat");
    Instances instances = source.getDataSet();
    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }//from w  w  w .j a  v  a  2s .  c  o  m
    return instances;
}

From source file:farm_ads.MyClassifier.java

public Instances readIntancesVecto(String URL) throws Exception {
    DataSource source = new DataSource(URL);
    Instances instances = source.getDataSet();
    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }/*from  w  ww . java2s  . c  o  m*/
    return instances;
}

From source file:farm_ads.MyClassifier.java

public String ClassifyInstance(Classifier c, String instance) throws Exception {

    String format = "%4s %15s %15s\n";
    FarmAds fa = new FarmAds(instance, 1);
    FarmAdsVector fav = new FarmAdsVector();
    fav.writeFile("data\\dataVecto.dat", fa);
    DataSource source = new DataSource("data\\dataVecto.dat");
    Instances instances = source.getDataSet();
    if (instances.classIndex() == -1) {
        instances.setClassIndex(instances.numAttributes() - 1);
    }//from   ww  w.  ja v  a 2  s .c o m

    String s = new String();

    s += "======= Kt qu d on qung co========\n";
    s += String.format(format, "STT", "Trc d on", "Sau d on");

    String[] classAds = { "Ph hp", "Khng Ph Hp" };
    double actValue = instances.firstInstance().classValue();

    Instance newInst = instances.firstInstance();

    double pred = c.classifyInstance(newInst);

    s += String.format(format, Integer.toString(1), classAds[(int) actValue], classAds[(int) pred]);

    if (actValue == pred) {
        s += "\n\n ==> D on ng";
    } else {
        s += "\n\n ==> D on sai";
    }

    return s;
}