Example usage for weka.core Instance stringValue

List of usage examples for weka.core Instance stringValue

Introduction

In this page you can find the example usage for weka.core Instance stringValue.

Prototype

public String stringValue(Attribute att);

Source Link

Document

Returns the value of a nominal, string, date, or relational attribute for the instance as a string.

Usage

From source file:moa.streams.filters.ReplacingMissingValuesFilter.java

License:Open Source License

@Override
public Instance nextInstance() {
    Instance inst = (Instance) this.inputStream.nextInstance().copy();

    // Initialization
    if (numAttributes < 0) {
        numAttributes = inst.numAttributes();
        columnsStatistics = new double[numAttributes];
        numberOfSamples = new long[numAttributes];
        lastNominalValues = new String[numAttributes];
        frequencies = new HashMap[numAttributes];
        for (int i = 0; i < inst.numAttributes(); i++) {
            if (inst.attribute(i).isNominal())
                frequencies[i] = new HashMap<String, Integer>();
        }//from w  w  w .ja v a2  s .c  om

        numericalSelectedStrategy = this.numericReplacementStrategyOption.getChosenIndex();
        nominalSelectedStrategy = this.nominalReplacementStrategyOption.getChosenIndex();
    }

    for (int i = 0; i < numAttributes; i++) {

        // ---- Numerical values ----
        if (inst.attribute(i).isNumeric()) {
            // Handle missing value
            if (inst.isMissing(i)) {
                switch (numericalSelectedStrategy) {
                case 0: // NOTHING
                    break;
                case 1: // LAST KNOWN VALUE
                case 2: // MEAN
                case 3: // MAX
                case 4: // MIN
                    inst.setValue(i, columnsStatistics[i]);
                    break;
                case 5: // CONSTANT
                    inst.setValue(i, numericalConstantValueOption.getValue());
                    break;
                default:
                    continue;
                }
            }
            // Update statistics with non-missing values
            else {
                switch (numericalSelectedStrategy) {
                case 1: // LAST KNOWN VALUE
                    columnsStatistics[i] = inst.value(i);
                    break;
                case 2: // MEAN
                    numberOfSamples[i]++;
                    columnsStatistics[i] = columnsStatistics[i]
                            + (inst.value(i) - columnsStatistics[i]) / numberOfSamples[i];
                    break;
                case 3: // MAX
                    columnsStatistics[i] = columnsStatistics[i] < inst.value(i) ? inst.value(i)
                            : columnsStatistics[i];
                    break;
                case 4: // MIN
                    columnsStatistics[i] = columnsStatistics[i] > inst.value(i) ? inst.value(i)
                            : columnsStatistics[i];
                    break;
                default:
                    continue;
                }
            }
        }
        // ---- Nominal values ----
        else if (inst.attribute(i).isNominal()) {
            // Handle missing value
            if (inst.isMissing(i)) {
                switch (nominalSelectedStrategy) {
                case 0: // NOTHING
                    break;
                case 1: // LAST KNOWN VALUE
                    if (lastNominalValues[i] != null) {
                        inst.setValue(i, lastNominalValues[i]);
                    }
                    break;
                case 2: // MODE
                    if (!frequencies[i].isEmpty()) {
                        // Sort the map to get the most frequent value
                        Map<String, Integer> sortedMap = MapUtil.sortByValue(frequencies[i]);
                        inst.setValue(i, sortedMap.entrySet().iterator().next().getKey());
                    }
                    break;
                default:
                    continue;
                }
            }
            // Update statistics with non-missing values
            else {
                switch (nominalSelectedStrategy) {
                case 1: // LAST KNOWN VALUE
                    lastNominalValues[i] = inst.stringValue(i);
                    break;
                case 2: // MODE
                    Integer previousCounter = frequencies[i].get(inst.stringValue(i));
                    if (previousCounter == null)
                        previousCounter = 0;
                    frequencies[i].put(inst.stringValue(i), ++previousCounter);
                    break;
                default:
                    continue;
                }
            }
        }
    }

    return inst;
}

From source file:mulan.classifier.neural.DataPair.java

License:Open Source License

/**
 * Creates a {@link DataPair} representation for each {@link Instance} contained in
 * {@link MultiLabelInstances} data set. The {@link DataPair} is a light weight representation
 * of instance values (by double values), which is useful when iteration over the data and its
 * values./*  www.  j  a v a2  s . c  o  m*/
 *
 * @param mlDataSet the {@link MultiLabelInstances} which content has to be
 *          converted to list of {@link DataPair}
 * @param bipolarOutput indicates whether output values should be converted
 *          to bipolar values, or left intact as binary
 * @return the list of data pairs
 */
// TODO: this method should be in some kind of "data utils".
public static List<DataPair> createDataPairs(MultiLabelInstances mlDataSet, boolean bipolarOutput) {

    Instances data = mlDataSet.getDataSet();
    int[] featureIndices = mlDataSet.getFeatureIndices();
    int[] labelIndices = mlDataSet.getLabelIndices();
    int numFeatures = featureIndices.length;
    int numLabels = mlDataSet.getNumLabels();

    int numInstances = data.numInstances();
    List<DataPair> dataPairs = new ArrayList<DataPair>(numInstances);
    for (int index = 0; index < numInstances; index++) {
        Instance instance = data.instance(index);
        double[] input = new double[numFeatures];
        for (int i = 0; i < numFeatures; i++) {
            int featureIndex = featureIndices[i];
            Attribute featureAttr = instance.attribute(featureIndex);
            // if attribute is binary, parse the string value ... it is expected to be '0' or '1'
            if (featureAttr.isNominal() && featureAttr.numValues() == 2) {
                input[i] = Double.parseDouble(instance.stringValue(featureIndex));
            } // else :
              // a) the attribute is nominal with multiple values, use indexes as nominal values
              //    do not have to be numbers in general ... this is fall-back ... should be rare case
              // b) is numeric attribute
            else {
                input[i] = instance.value(featureIndex);
            }
        }

        if (mlDataSet.hasMissingLabels(instance))
            continue;

        double[] output = new double[numLabels];
        for (int i = 0; i < numLabels; i++) {
            output[i] = Double
                    .parseDouble(data.attribute(labelIndices[i]).value((int) instance.value(labelIndices[i])));
            if (bipolarOutput && output[i] == 0) {
                output[i] = -1;
            }
        }

        dataPairs.add(new DataPair(input, output));
    }

    return dataPairs;
}

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

private boolean isLabelSet(Instance instance, String labelName, Map<String, Attribute> attributesIndex) {
    if (instance.stringValue(attributesIndex.get(labelName)).equals("1"))
        return true;
    else/*ww  w.j  ava 2 s. c  om*/
        return false;
}

From source file:mulan.data.Statistics.java

License:Open Source License

/** 
 * This method calculates and prints a matrix with the coocurrences of <br>
 * pairs of labels// w w w  . ja  va 2s .  c om
 *
 * @param mdata a multi-label data set
 * @return a matrix of co-occurences
 */
public double[][] calculateCoocurrence(MultiLabelInstances mdata) {
    Instances data = mdata.getDataSet();
    int labels = mdata.getNumLabels();
    double[][] coocurrenceMatrix = new double[labels][labels];

    numPredictors = data.numAttributes() - labels;
    for (int k = 0; k < data.numInstances(); k++) {
        Instance temp = data.instance(k);
        for (int i = 0; i < labels; i++) {
            for (int j = 0; j < labels; j++) {
                if (i >= j) {
                    continue;
                }
                if (temp.stringValue(numPredictors + i).equals("1")
                        && temp.stringValue(numPredictors + j).equals("1")) {
                    coocurrenceMatrix[i][j]++;
                }
            }
        }
    }

    for (int i = 0; i < labels; i++) {
        for (int j = 0; j < labels; j++) {
            System.out.print(coocurrenceMatrix[i][j] + "\t");
        }
        System.out.println();
    }

    return coocurrenceMatrix;
}

From source file:naivebayes.NBTubesAI.java

@Override
public void buildClassifier(Instances data) throws Exception {
    distribution = new HashMap<>();
    classCount = new HashMap<>();

    data = new Instances(data);
    //Delete data tanpa kelas
    data.deleteWithMissingClass();/*www .  j  av a  2 s . co m*/
    //melakukan filter discretize untuk mengubah atribut menjadi nominal
    //menghitung jumlah instance
    m_Instances = new Instances(data);
    numInstance = data.numInstances();
    //Enumerasi seluruh atribut instances
    Enumeration<Attribute> enumAttr = m_Instances.enumerateAttributes();
    //Index attribut saat ini
    int attrIndex = 0;
    //Hashmap untuk menghitung jumlah kemunculan kelas yang bersesuaian

    for (int i = 0; i < m_Instances.classAttribute().numValues(); i++) {
        classCount.put(i + 0.0, 0);
    }
    Enumeration<Instance> forCount = m_Instances.enumerateInstances();
    while (forCount.hasMoreElements()) {
        Instance instCount = forCount.nextElement();
        classCount.put(instCount.classValue(), classCount.get(instCount.classValue()) + 1);
    }

    System.out.println("JMLAH KELAS:" + m_Instances.numClasses());
    System.out.println(classCount.toString());
    //Looping untuk seluruh atribut
    while (enumAttr.hasMoreElements()) {
        Attribute temp = enumAttr.nextElement();

        //nama attribute
        String attrName = temp.name();
        //Memasukkan kunci attrName
        if (distribution.get(attrName) == null) {
            distribution.put(attrName, new HashMap<String, HashMap<Double, Double>>());
        }

        //Enumerasi dari seluruh instance pada Instances masukan
        Enumeration<Instance> enumInst = m_Instances.enumerateInstances();
        //Looping untuk seluruh instance
        while (enumInst.hasMoreElements()) {
            //Mengambil Instance selanjutnya
            Instance tempInst = enumInst.nextElement();
            //Nilai domain untuk atribut saat ini
            String nilaiDomain = tempInst.stringValue(temp);
            //Class dari instance ini
            double classAttr = tempInst.classValue();

            if (distribution.get(attrName).get(nilaiDomain) == null) {
                //Membuat hashmap baru jika domainNilai pertama kali muncul
                distribution.get(attrName).put(nilaiDomain, new HashMap<Double, Double>());
            }
            if (distribution.get(attrName).get(nilaiDomain).get(classAttr) == null) {
                //Membuat hashmap baru jika untuk pasangan domain nilai dan 
                //kelas ini baru pertama kali muncul
                for (int i = 0; i < m_Instances.numClasses(); i++) {
                    distribution.get(attrName).get(nilaiDomain).put(i + 0.0, 0.0);

                }

            }
            //Menambahkan frekuensi kemunculan +1
            distribution.get(attrName).get(nilaiDomain).put(classAttr,
                    distribution.get(attrName).get(nilaiDomain).get(classAttr)
                            + (1.0 / classCount.get(classAttr)));
        }

        attrIndex++;

    }
    System.out.println(distribution.toString());
    System.out.println(classCount.toString());
}

From source file:naivebayes.NBTubesAI.java

@Override
public double classifyInstance(Instance instance) throws Exception {
    int jumlahKelas = instance.classAttribute().numValues();
    double[] classifyResult = new double[jumlahKelas];

    //iterasi menghitung probabilitas untuk seluruh kelas
    for (int i = 0; i < jumlahKelas; i++) {

        //Rumus probabilitas Naive Bayes here

        classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance;

        Enumeration<Attribute> enumAttr = instance.enumerateAttributes();

        while (enumAttr.hasMoreElements()) {
            Attribute temp = enumAttr.nextElement();

            if (!instance.isMissing(temp)) {

                try {
                    classifyResult[i] = classifyResult[i]
                            * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0);

                } catch (NullPointerException e) {
                    classifyResult[i] = 0;
                }//w  w  w.j a v  a 2s .  co m

            }

        }

    }
    double maxValue = 0;
    int currentIndex = 0;
    for (int i = 0; i < jumlahKelas; i++) {
        if (maxValue < classifyResult[i]) {
            currentIndex = i;
            maxValue = classifyResult[i];
        }
    }
    return currentIndex;

}

From source file:naivebayes.NBTubesAI.java

@Override
public double[] distributionForInstance(Instance instance) throws Exception {
    int jumlahKelas = instance.classAttribute().numValues();

    double[] classifyResult = new double[jumlahKelas];

    //iterasi menghitung probabilitas untuk seluruh kelas
    for (int i = 0; i < jumlahKelas; i++) {

        //Rumus probabilitas Naive Bayes here

        classifyResult[i] = (double) classCount.get(i + 0.0) / numInstance;

        Enumeration<Attribute> enumAttr = instance.enumerateAttributes();

        while (enumAttr.hasMoreElements()) {
            Attribute temp = enumAttr.nextElement();

            if (!instance.isMissing(temp)) {

                try {
                    classifyResult[i] = classifyResult[i]
                            * distribution.get(temp.name()).get(instance.stringValue(temp)).get(i + 0.0);

                } catch (NullPointerException e) {

                }// ww  w. j a  va2s  .c  o  m

            }

        }

    }

    return classifyResult;
}

From source file:NaiveBayesPckge.mushClass.java

@Override
public double classifyInstance(Instance instance) throws java.lang.Exception {
    double classify = 0;
    // banyaknya kesimpulan. Misal T dan F berati ada 2
    int numClasses = instance.numClasses();
    double[] out = new double[numClasses];
    //banyaknya kelas yang diuji
    int class_index = instance.classIndex();
    //banyaknya atribut
    int num_attributes = instance.numAttributes();
    double inputs[] = new double[num_attributes];

    for (int i = 0; i < numClasses; i++) {
        out[i] = probabConclusion[i];//from  w w  w .jav a2  s.  c o m
        for (int j = 0; j < num_attributes - 1; j++) {
            int indexLabel = searchIndexLabel(j, instance.stringValue(j));
            out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i);
        }
    }

    classify = getIndexBiggestProbability(out);

    return classify;
}

From source file:NaiveBayesPckge.mushClass.java

@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    // banyaknya kesimpulan. Misal T dan F berati ada 2
    int numClasses = instance.numClasses();

    double[] out = new double[numClasses];
    //banyaknya kelas yang diuji
    int class_index = instance.classIndex();
    //banyaknya atribut
    int num_attributes = instance.numAttributes();
    double inputs[] = new double[num_attributes];

    for (int i = 0; i < numClasses; i++) {

        out[i] = probabConclusion[i];//from   w ww.  j  av  a2 s . co  m

        //            System.out.print("\n" + maxIterasi +". out["+i+"] = ");
        for (int j = 1; j < num_attributes; j++) {
            int indexLabel = searchIndexLabel(j, instance.stringValue(j));

            out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i);

            //                System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*");
        }

        //            System.out.println("\nout["+i+"] = "+out[i]);
        //            System.out.println(instance.toString());
    }

    //        maxIterasi++;
    return out;
}

From source file:NaiveBayesPckge.NaiveBayesCode.java

@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    // banyaknya kesimpulan. Misal T dan F berati ada 2
    int numClasses = instance.numClasses();
    double[] out = new double[numClasses];
    //banyaknya kelas yang diuji
    int class_index = instance.classIndex();
    //banyaknya atribut
    int num_attributes = instance.numAttributes();
    double inputs[] = new double[num_attributes];

    for (int i = 0; i < numClasses; i++) {

        out[i] = probabConclusion[i];//from  w  ww .ja va 2  s  .c  o  m
        //            System.out.print("\n" + maxIterasi +". out["+i+"] = ");
        for (int j = 0; j < num_attributes - 1; j++) {
            int indexLabel = searchIndexLabel(j, instance.stringValue(j));
            out[i] *= (double) atribNom[j].getAttribObjectType(indexLabel, i);
            //                System.out.print(atribNom[j].getAttribObjectType(indexLabel, i) + "*");
        }
        //            System.out.println("\nout["+i+"] = "+out[i]);
        //            System.out.println(instance.toString());
    }
    //        maxIterasi++;
    return out;
}