Example usage for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:affective.core.ArffLexiconWordLabeller.java

License:Open Source License

/**
 * Processes  all the dictionary files.//www . j  a va 2 s. com
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) {
                this.attributes.add(lexInstances.attribute(i));
            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!attributes.isEmpty()) {
                Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>();
                for (Attribute na : attributes) {
                    wordVals.put(na, inst.value(na));
                }
                this.attValMap.put(word, wordVals);
            }

        }

    }

}

From source file:ann.MyANN.java

/**
 * mengubah Instance menjadi Data/*  w ww.j a v a2s . c om*/
 * @param instance Instance yang akan diubah menjadi kelas Data
 * @return kelas Data dari input
 */
private Data instanceToData(Instance instance) {
    ArrayList<Double> input = new ArrayList<>();
    ArrayList<Double> target = new ArrayList<>();
    for (int j = 0; j < instance.numAttributes() - 1; j++) {
        input.add(0.0);
    }
    if (instance.classAttribute().isNominal()) {
        for (int j = 0; j < instance.classAttribute().numValues(); j++) {
            target.add(0.0);
        }
    } else {
        target.add(0.0);
    }
    for (int j = 0; j < instance.numAttributes(); j++) {
        if (j == instance.classIndex()) {
            if (instance.attribute(j).isNominal())
                target.set((int) instance.value(j), 1.0);
            else
                target.add(instance.value(j));
        } else {
            input.set(j, instance.value(j));
        }
    }
    return new Data(input, target);
}

From source file:assign00.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {
    double distance = 0;
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {
            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {/*  w w w  .  j a va  2s .c  o m*/
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {
                distance += 0;
            } else {
                distance += 1;
            }
        }
    }

    return distance;
}

From source file:assign00.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {
    double distance = 0;
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {
            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {/*from w  w w .j a v  a 2  s.c om*/
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {
                distance += 0;
            } else {
                distance += 1;
            }
        }
    }

    return distance;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Returns the subset an instance falls into.
 * //w ww .j a  va  2 s  . c o  m
 * @param instance the instance to check
 * @return the subset the instance falls into
 * @throws Exception if something goes wrong
 */
private int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_AttIndex)) {
        return 2;
    } else if (instance.attribute(m_AttIndex).isNominal()) {
        if ((int) instance.value(m_AttIndex) == m_SplitPoint) {
            return 0;
        } else {
            return 1;
        }
    } else {
        if (instance.value(m_AttIndex) <= m_SplitPoint) {
            return 0;
        } else {
            return 1;
        }
    }
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.udf.BayesClassifier.java

License:Apache License

@Override
public String exec(Tuple input) throws IOException {
    if (input == null)
        return null;

    if (_trainData == null) {

        //Reads train data
        try {/*from ww  w.  j  av  a2 s . c o m*/

            if (!_trainUrl.isEmpty()) {

                URL url = new URL(_trainUrl);
                URLConnection urlConn = url.openConnection();
                urlConn.connect();
                InputStreamReader inStream = new InputStreamReader(urlConn.getInputStream());
                BufferedReader buff = new BufferedReader(inStream);

                _trainData = _dataParser.parseData(buff);

            }
        } catch (Exception e) {
            throw new IOException("Caught exception reading training data file ", e);
        }

    }

    try {
        Integer numFeatures = input.size();

        double[] testData;
        testData = new double[numFeatures];

        for (int i = 0; i < numFeatures; i++)
            testData[i] = DataType.toDouble(input.get(i));

        Classifier csfr = null;
        csfr = (Classifier) Class.forName("weka.classifiers.bayes.NaiveBayes").newInstance();
        csfr.buildClassifier(_trainData);
        double classification = 0;

        Instance myinstance = _trainData.instance(0);
        for (int i = 0; i < numFeatures; i++)
            myinstance.setValue(i, testData[i]);
        classification = csfr.classifyInstance(myinstance);

        return myinstance.attribute(_trainData.classIndex()).value((int) classification);

    } catch (Exception e) {
        throw new IOException("Caught exception processing input row ", e);
    }
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

public double[] logDensityPerClusterForInstance(Instance inst) throws Exception {

    int i, j;/*w  ww . ja  v  a  2 s.  c om*/
    double logprob;
    double[] wghts = new double[m_num_clusters];

    m_replaceMissing.input(inst);
    inst = m_replaceMissing.output();

    for (i = 0; i < m_num_clusters; i++) {
        // System.err.println("Cluster : "+i);
        logprob = 0.0;

        for (j = 0; j < m_num_attribs; j++) {
            if (!inst.isMissing(j)) {
                if (inst.attribute(j).isNominal()) {
                    logprob += Math.log(m_model[i][j].getProbability(inst.value(j)));
                } else { // numeric attribute
                    logprob += logNormalDens(inst.value(j), m_modelNormal[i][j][0], m_modelNormal[i][j][1]);
                    /*
                     * System.err.println(logNormalDens(inst.value(j),
                     * m_modelNormal[i][j][0], m_modelNormal[i][j][1]) +
                     * " ");
                     */
                }
            }
        }
        // System.err.println("");

        wghts[i] = logprob;
    }
    return wghts;
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Select the best value for k by hold-one-out cross-validation. Hamming
 * Loss is minimized//from w ww  . j a  v  a2  s.  c om
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
private void crossValidate() throws Exception {
    try {
        // the performance for each different k
        double[] hammingLoss = new double[cvMaxK];

        for (int i = 0; i < cvMaxK; i++) {
            hammingLoss[i] = 0;
        }

        Instances dataSet = train;
        Instance instance; // the hold out instance
        Instances neighbours; // the neighboring instances
        double[] origDistances, convertedDistances;
        for (int i = 0; i < dataSet.numInstances(); i++) {
            if (getDebug() && (i % 50 == 0)) {
                debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r");
            }
            instance = dataSet.instance(i);
            neighbours = lnn.kNearestNeighbours(instance, cvMaxK);
            origDistances = lnn.getDistances();

            // gathering the true labels for the instance
            boolean[] trueLabels = new boolean[numLabels];
            for (int counter = 0; counter < numLabels; counter++) {
                int classIdx = labelIndices[counter];
                String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
                trueLabels[counter] = classValue.equals("1");
            }
            // calculate the performance metric for each different k
            for (int j = cvMaxK; j > 0; j--) {
                convertedDistances = new double[origDistances.length];
                System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length);
                double[] confidences = this.getConfidences(neighbours, convertedDistances);
                boolean[] bipartition = null;

                switch (extension) {
                case NONE: // BRknn
                    MultiLabelOutput results;
                    results = new MultiLabelOutput(confidences, 0.5);
                    bipartition = results.getBipartition();
                    break;
                case EXTA: // BRknn-a
                    bipartition = labelsFromConfidences2(confidences);
                    break;
                case EXTB: // BRknn-b
                    bipartition = labelsFromConfidences3(confidences);
                    break;
                }

                double symmetricDifference = 0; // |Y xor Z|
                for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
                    boolean actual = trueLabels[labelIndex];
                    boolean predicted = bipartition[labelIndex];

                    if (predicted != actual) {
                        symmetricDifference++;
                    }
                }
                hammingLoss[j - 1] += (symmetricDifference / numLabels);

                neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1);
            }
        }

        // Display the results of the cross-validation
        if (getDebug()) {
            for (int i = cvMaxK; i > 0; i--) {
                debug("Hold-one-out performance of " + (i) + " neighbors ");
                debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances());
            }
        }

        // Check through the performance stats and select the best
        // k value (or the lowest k if more than one best)
        double[] searchStats = hammingLoss;

        double bestPerformance = Double.NaN;
        int bestK = 1;
        for (int i = 0; i < cvMaxK; i++) {
            if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) {
                bestPerformance = searchStats[i];
                bestK = i + 1;
            }
        }
        numOfNeighbors = bestK;
        if (getDebug()) {
            System.err.println("Selected k = " + bestK);
        }

    } catch (Exception ex) {
        throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage());
    }
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances/*  ww w.j av  a 2 s.c o  m*/
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        weight = 1.0;
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:classify.Classifier.java

public static void missingValuesRows(Instances data) {
    int[] missingValues = new int[data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        missingValues[i] = 0;//from w  w  w . ja  v  a  2 s. c o m
    }
    Instance example;
    String value = "";
    //get number of missing attributes per row
    int missValues = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        example = data.instance(i);
        for (int j = 0; j < 15; j++) {
            if (example.attribute(j).isNominal()) {
                value = example.stringValue(j);
            } else if (example.attribute(j).isNumeric()) {
                value = Double.toString(example.value(j));
            }
            if (value.equals("?") || value.equals("NaN")) {
                missingValues[i]++;
                missValues++;
            }
        }
    }
    System.out.println("Number of Missing Values: " + missValues);
    //get how many times i attributes are missing
    int[] frequency = new int[15];
    for (int i = 0; i < data.numInstances(); i++) {
        frequency[missingValues[i]]++;
    }
    int numRows = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        if (missingValues[i] > 0) {
            numRows++;
        }
    }
    System.out.println("Number of rows with missing values: " + numRows);
    System.out.println("Number of missing attributes per row:");
    for (int i = 0; i < 15; i++) {
        System.out.println(i + ": " + frequency[i]);
    }
}