Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:entities.WekaBOWFeatureVector.java

public Instance fillFeatureVector(BOWFeatureVector vSource, Instances data) {
    double[] values = new double[data.numAttributes()];

    values[0] = vSource.getCosSimilarityArrayAtIndex(0);//((vSource.getCosSimilarityArrayAtIndex(0) + vSource.getCosSimilarityArrayAtIndex(1)));
    values[1] = vSource.getCosSimilarityArrayAtIndex(1);//((vSource.getCosSimilarityArrayAtIndex(0) + vSource.getCosSimilarityArrayAtIndex(1)));
    values[2] = data.attribute(2).indexOfValue(vSource.getLabel());

    Instance inst = new DenseInstance(1.0, values);

    return inst;/*from  www  . j  a v a2s .  c  om*/
}

From source file:entities.WekaHMMFeatureVector.java

public Instance fillFeatureVector(HMMFeatureVector vSource, Instances data) {
    double[] values = new double[data.numAttributes()];
    double min_prob;

    values[0] = vSource.getProbArrayAtIndex(0)
            / (vSource.getProbArrayAtIndex(0) + vSource.getProbArrayAtIndex(1));
    values[1] = vSource.getProbArrayAtIndex(1)
            / (vSource.getProbArrayAtIndex(0) + vSource.getProbArrayAtIndex(1));
    values[2] = data.attribute(2).indexOfValue(vSource.getLabel());

    Instance inst = new DenseInstance(1.0, values);
    return inst;/*from w  w w .  j av  a2s  .  c  o  m*/
}

From source file:entities.WekaNGGFeatureVector.java

public Instance fillFeatureVector(NGGFeatureVector vSource, Instances data) {

    double[] values = new double[data.numAttributes()];

    values[0] = vSource.getContainmentSimilarityArrayAtIndex(0);
    values[1] = vSource.getSizeSimilarityArrayAtIndex(0);
    values[2] = vSource.getValueSimilarityArrayAtIndex(0);
    values[3] = vSource.getNVSArrayAtIndex(0);
    values[4] = vSource.getContainmentSimilarityArrayAtIndex(1);
    values[5] = vSource.getSizeSimilarityArrayAtIndex(1);
    values[6] = vSource.getValueSimilarityArrayAtIndex(1);
    values[7] = vSource.getNVSArrayAtIndex(1);
    values[8] = data.attribute(8).indexOfValue(vSource.getLabel());

    Instance inst = new DenseInstance(1.0, values);

    return inst;/*from   www  .  ja v  a 2s .  c  om*/
}

From source file:entity.NoiseInjectionManager.java

License:Open Source License

/**
 * /*from  ww w. j a  v  a 2  s. co  m*/
 * Increments fp and fn by specified percentages.
 * Randomize order of instances and modifies instances until noise quota is reached.
 * Than randomized instances again.
 * NOTE: It modifies the given dataset, because it is a reference.
 *  
 * @param origDataset
 * @param fpPercentage
 * @param fnPercentage
 * @return Instances noisyDataset
 */
public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) {

    // exits if no noise must be added
    if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) {
        if (verbose)
            System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere");
        return origDataset;
    }

    // total instances in dataset
    int numInstances = origDataset.numInstances();

    // finds positive (buggy) and negative (non-buggy) instances numbers
    int numOfPositives = 0;
    int numOfNegatives = 0;

    for (int j = 0; j < numInstances; j++) {

        if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) {
            numOfPositives++;
        }
        // this is a redundant control, but better safe than sorry
        else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) {
            numOfNegatives++;
        }
    }

    // calculates the number of false positives to insert
    int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100);
    int fpInserted = 0;
    if (verbose)
        System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert
                + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives
                + " %fp= " + fpPercentage);

    // calculates the number of false negatives to insert
    int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100);
    int fnInserted = 0;
    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= "
                + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage);

    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel
                + " - nonbuggy label: " + Settings.nonbuggyLabel);

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);

    for (int i = 0; i < origDataset.numInstances(); i++) {
        if (verbose)
            System.out.print("\nORIGINAL VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));

        // gets the classification attribute (it HAS to be the last)
        Attribute att = origDataset.instance(i).attribute(origDataset.classIndex());

        // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn 
        if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex())
                .equals(Settings.buggyLabel))) {

            origDataset.instance(i).setValue(att, Settings.nonbuggyLabel);
            fnInserted++;
            if (verbose)
                System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " ");
        }

        // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp 
        else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex())
                .equals(Settings.nonbuggyLabel))) {

            origDataset.instance(i).setValue(att, Settings.buggyLabel);
            fpInserted++;
            if (verbose)
                System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " ");

        }

        if (verbose)
            System.out.print(" FINAL ELEMENT VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));
    }

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);
    return origDataset;
}

From source file:entity.NoiseInjectionManager.java

License:Open Source License

/**
 * Increments fp and fn in combination by a specified percentages.
 * Randomize order of instances and modifies instances until noise quota is reached.
 * Than randomized instances again./* w w  w.  j ava 2  s  .  co  m*/
 * NOTE: It modifies the given dataset, because it is a reference.
 * 
 * @param origDataset
 * @param combinedFpFnPercentage
 * @return noisydata
 */
public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) {

    // exits if no noise must be added
    if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) {
        if (verbose)
            System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere");
        return origDataset;
    }

    // total instances in dataset
    int numInstances = origDataset.numInstances();

    // finds positive (buggy) and negative (non-buggy) instances numbers
    int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100);
    int fpAndFnInserted = 0;
    if (verbose)
        System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert
                + ", totIntances= " + origDataset.numInstances());

    if (verbose)
        System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel
                + " - nonbuggy label: " + Settings.nonbuggyLabel);

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);

    for (int i = 0; i < origDataset.numInstances(); i++) {
        if (verbose)
            System.out.print("\nORIGINAL VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));

        // gets the classification attribute (it HAS to be the last)
        Attribute att = origDataset.instance(i).attribute(origDataset.classIndex());

        // if there are fn or fp to add  
        if (fpAndFnInserted < fpAndFnToInsert) {

            // if this is a positive instances it turns it into a negative, making it a fn
            if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) {

                if (verbose)
                    System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " ");
                origDataset.instance(i).setValue(att, Settings.nonbuggyLabel);
                fpAndFnInserted++;
            }

            // if this is a negative instances it turns it into a positive, making it a fp
            else if (origDataset.instance(i).stringValue(origDataset.classIndex())
                    .equals(Settings.nonbuggyLabel)) {

                if (verbose)
                    System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " ");
                origDataset.instance(i).setValue(att, Settings.buggyLabel);
                fpAndFnInserted++;
            }
        }

        if (verbose)
            System.out.print(" FINAL ELEMENT VALUES: "
                    + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - "
                    + origDataset.instance(i).stringValue(origDataset.classIndex()));
    }

    // randomize order of instances
    origDataset.randomize(RandomizationManager.randomGenerator);
    return origDataset;
}

From source file:epsi.i5.datamining.Weka.java

public void excutionAlgo() throws FileNotFoundException, IOException, Exception {
    BufferedReader reader = new BufferedReader(new FileReader("src/epsi/i5/data/" + fileOne + ".arff"));
    Instances data = new Instances(reader);
    reader.close();/*  w  w  w .  ja v a2  s  .  c  o  m*/
    //System.out.println(data.attribute(0));
    data.setClass(data.attribute(0));
    NaiveBayes NB = new NaiveBayes();
    NB.buildClassifier(data);
    Evaluation naiveBayes = new Evaluation(data);
    naiveBayes.crossValidateModel(NB, data, 10, new Random(1));
    naiveBayes.evaluateModel(NB, data);
    //System.out.println(test.confusionMatrix() + "1");
    //System.out.println(test.correct() + "2");
    System.out.println("*****************************");
    System.out.println("******** Naive Bayes ********");
    System.out.println(naiveBayes.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(naiveBayes.pctCorrect());
    System.out.println("");
    J48 j = new J48();
    j.buildClassifier(data);
    Evaluation jeval = new Evaluation(data);
    jeval.crossValidateModel(j, data, 10, new Random(1));
    jeval.evaluateModel(j, data);
    System.out.println("*****************************");
    System.out.println("************ J48 ************");
    System.out.println(jeval.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(jeval.pctCorrect());
    System.out.println("");
    DecisionTable DT = new DecisionTable();
    DT.buildClassifier(data);
    Evaluation decisionTable = new Evaluation(data);
    decisionTable.crossValidateModel(DT, data, 10, new Random(1));
    decisionTable.evaluateModel(DT, data);
    System.out.println("*****************************");
    System.out.println("******* DecisionTable *******");
    System.out.println(decisionTable.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(decisionTable.pctCorrect());
    System.out.println("");
    OneR OR = new OneR();
    OR.buildClassifier(data);
    Evaluation oneR = new Evaluation(data);
    oneR.crossValidateModel(OR, data, 10, new Random(1));
    oneR.evaluateModel(OR, data);
    System.out.println("*****************************");
    System.out.println("************ OneR ***********");
    System.out.println(oneR.toMatrixString());
    System.out.println("*****************************");
    System.out.println("**** Pourcentage Correct ****");
    System.out.println(oneR.pctCorrect());

    //Polarit
    data.setClass(data.attribute(1));
    System.out.println("");
    M5Rules MR = new M5Rules();
    MR.buildClassifier(data);
    Evaluation m5rules = new Evaluation(data);
    m5rules.crossValidateModel(MR, data, 10, new Random(1));
    m5rules.evaluateModel(MR, data);
    System.out.println("*****************************");
    System.out.println("********** M5Rules **********");
    System.out.println(m5rules.correlationCoefficient());

    System.out.println("");
    LinearRegression LR = new LinearRegression();
    LR.buildClassifier(data);
    Evaluation linearR = new Evaluation(data);
    linearR.crossValidateModel(LR, data, 10, new Random(1));
    linearR.evaluateModel(LR, data);
    System.out.println("*****************************");
    System.out.println("********** linearR **********");
    System.out.println(linearR.correlationCoefficient());
}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.BaggingClassifier.java

License:Apache License

@Override
protected Classifier buildClassifier(DataSet training_ds) {
    logger.debug("Building Bagging classifier.");

    Classifier model = null;//from  ww  w. j av  a 2  s. com

    // Get the independent variable index
    String independent = training_ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    try {

        // Read all the instances in the file (ARFF, CSV, XRFF, ...)
        ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath());
        Instances instances = source.getDataSet();

        // Set the independent variable (powerWatts).
        instances.setClassIndex(instances.attribute(independent).index());

        // Builds a regression model for the given data.
        model = new weka.classifiers.meta.Bagging();

        // Build Linear Regression
        model.buildClassifier(instances);

    } catch (WekaWrapperException e) {
        logger.error("Error while creating Bagging classifier.", e);
        throw new WekaWrapperException("Error while creating Bagging classifier.");

    } catch (Exception e) {
        logger.error("Error while applying Bagging to data set instances.", e);
        throw new WekaWrapperException("Error while applying Bagging to data set instances.");
    }

    return model;

}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.LinearRegressionClassifier.java

License:Apache License

@Override
public Classifier buildClassifier(DataSet training_ds) {

    logger.debug("Building LinearRegression classifier.");

    Classifier model;//w w  w . ja  v  a  2  s. c  o m

    // Get the independent variable index
    String independent = training_ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    try {

        // Read all the instances in the file (ARFF, CSV, XRFF, ...)
        ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath());
        Instances instances = source.getDataSet();

        // Set the independent variable (powerWatts).
        instances.setClassIndex(instances.attribute(independent).index());

        // Builds a regression model for the given data.
        model = new weka.classifiers.functions.LinearRegression();

        // Build Linear Regression
        model.buildClassifier(instances);

    } catch (WekaWrapperException e) {
        logger.error("Error while creating Linear Regression classifier.", e);
        throw new WekaWrapperException("Error while creating Linear Regression classifier.");

    } catch (Exception e) {
        logger.error("Error while applying Linear Regression to data set instances.", e);
        throw new WekaWrapperException("Error while applying Linear Regression to data set instances.");
    }

    return model;

}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.MultilayerPerceptronClassifier.java

License:Apache License

@Override
protected Classifier buildClassifier(DataSet training_ds) {

    logger.debug("Building MultilayerPerceptron classifier.");

    MultilayerPerceptron model;/*from  w  ww.java 2  s.  c  om*/

    // Get the independent variable index
    String independent = training_ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    try {

        // Read all the instances in the file (ARFF, CSV, XRFF, ...)
        ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath());
        Instances instances = source.getDataSet();

        // Set the independent variable (powerWatts).
        instances.setClassIndex(instances.attribute(independent).index());

        // Builds a regression model for the given data.
        model = new weka.classifiers.functions.MultilayerPerceptron();
        model.setHiddenLayers("4");
        model.setTrainingTime(20);

        // Build Linear Regression
        model.buildClassifier(instances);

    } catch (WekaWrapperException e) {
        logger.error("Error while creating Linear Regression classifier.", e);
        throw new WekaWrapperException("Error while creating Linear Regression classifier.");

    } catch (Exception e) {
        logger.error("Error while applying Linear Regression to data set instances.", e);
        throw new WekaWrapperException("Error while applying Linear Regression to data set instances.");
    }

    return model;
}

From source file:es.bsc.autonomic.powermodeller.tools.classifiers.RepTreeClassifier.java

License:Apache License

@Override
public Classifier buildClassifier(DataSet training_ds) {

    logger.debug("Building RepTree classifier.");

    Classifier model;//w  w  w  . j  av a2  s  . c  o  m

    // Get the independent variable index
    String independent = training_ds.getIndependent();

    if (independent == null)
        throw new WekaWrapperException("Independent variable is not set in dataset.");

    try {

        // Read all the instances in the file (ARFF, CSV, XRFF, ...)
        ConverterUtils.DataSource source = new ConverterUtils.DataSource(training_ds.getFilePath());
        Instances instances = source.getDataSet();

        // Set the independent variable (powerWatts).
        instances.setClassIndex(instances.attribute(independent).index());

        // Builds a regression model for the given data.
        model = new weka.classifiers.trees.REPTree();

        // Build Linear Regression
        model.buildClassifier(instances);

    } catch (WekaWrapperException e) {
        logger.error("Error while creating Linear Regression classifier.", e);
        throw new WekaWrapperException("Error while creating Linear Regression classifier.");

    } catch (Exception e) {
        logger.error("Error while applying Linear Regression to data set instances.", e);
        throw new WekaWrapperException("Error while applying Linear Regression to data set instances.");
    }

    return model;

}