Example usage for weka.core Instance attribute

List of usage examples for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:affective.core.ArffLexiconWordLabeller.java

License:Open Source License

/**
 * Processes  all the dictionary files.//www . j  a va 2 s. com
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) {
                this.attributes.add(lexInstances.attribute(i));
            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!attributes.isEmpty()) {
                Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>();
                for (Attribute na : attributes) {
                    wordVals.put(na, inst.value(na));
                }
                this.attValMap.put(word, wordVals);
            }

        }

    }

}

From source file:ann.MyANN.java

/**
 * mengubah Instance menjadi Data/*  w ww.j a v a2s . c om*/
 * @param instance Instance yang akan diubah menjadi kelas Data
 * @return kelas Data dari input
 */
private Data instanceToData(Instance instance) {
    ArrayList<Double> input = new ArrayList<>();
    ArrayList<Double> target = new ArrayList<>();
    for (int j = 0; j < instance.numAttributes() - 1; j++) {
        input.add(0.0);
    }
    if (instance.classAttribute().isNominal()) {
        for (int j = 0; j < instance.classAttribute().numValues(); j++) {
            target.add(0.0);
        }
    } else {
        target.add(0.0);
    }
    for (int j = 0; j < instance.numAttributes(); j++) {
        if (j == instance.classIndex()) {
            if (instance.attribute(j).isNominal())
                target.set((int) instance.value(j), 1.0);
            else
                target.add(instance.value(j));
        } else {
            input.set(j, instance.value(j));
        }
    }
    return new Data(input, target);
}

From source file:assign00.KNNClassifier.java

double EuclideanDistance(Instance instanceLHS, Instance instanceRHS) {
    double distance = 0;
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {
            distance += pow(instanceLHS.value(i) - instanceRHS.value(i), 2);
        } else {/*  w w w  .  j a va  2s .c  o m*/
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {
                distance += 0;
            } else {
                distance += 1;
            }
        }
    }

    return distance;
}

From source file:assign00.KNNClassifier.java

double ManhattenDistance(Instance instanceLHS, Instance instanceRHS) {
    double distance = 0;
    for (int i = 0; i < instanceLHS.numAttributes() - 1 && i < instanceRHS.numAttributes() - 1; i++) {
        if (instanceLHS.attribute(i).isNumeric() && instanceRHS.attribute(i).isNumeric()) {
            distance += abs(instanceLHS.value(i) - instanceRHS.value(i));
        } else {/*from w  w w .j a v  a 2  s.c om*/
            if (instanceLHS.stringValue(i).equals(instanceRHS.stringValue(i))) {
                distance += 0;
            } else {
                distance += 1;
            }
        }
    }

    return distance;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Returns the subset an instance falls into.
 * //w ww .j a  va  2 s  . c o  m
 * @param instance the instance to check
 * @return the subset the instance falls into
 * @throws Exception if something goes wrong
 */
private int whichSubset(Instance instance) throws Exception {

    if (instance.isMissing(m_AttIndex)) {
        return 2;
    } else if (instance.attribute(m_AttIndex).isNominal()) {
        if ((int) instance.value(m_AttIndex) == m_SplitPoint) {
            return 0;
        } else {
            return 1;
        }
    } else {
        if (instance.value(m_AttIndex) <= m_SplitPoint) {
            return 0;
        } else {
            return 1;
        }
    }
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.udf.BayesClassifier.java

License:Apache License

@Override
public String exec(Tuple input) throws IOException {
    if (input == null)
        return null;

    if (_trainData == null) {

        //Reads train data
        try {/*from ww  w.  j  av  a2 s . c o m*/

            if (!_trainUrl.isEmpty()) {

                URL url = new URL(_trainUrl);
                URLConnection urlConn = url.openConnection();
                urlConn.connect();
                InputStreamReader inStream = new InputStreamReader(urlConn.getInputStream());
                BufferedReader buff = new BufferedReader(inStream);

                _trainData = _dataParser.parseData(buff);

            }
        } catch (Exception e) {
            throw new IOException("Caught exception reading training data file ", e);
        }

    }

    try {
        Integer numFeatures = input.size();

        double[] testData;
        testData = new double[numFeatures];

        for (int i = 0; i < numFeatures; i++)
            testData[i] = DataType.toDouble(input.get(i));

        Classifier csfr = null;
        csfr = (Classifier) Class.forName("weka.classifiers.bayes.NaiveBayes").newInstance();
        csfr.buildClassifier(_trainData);
        double classification = 0;

        Instance myinstance = _trainData.instance(0);
        for (int i = 0; i < numFeatures; i++)
            myinstance.setValue(i, testData[i]);
        classification = csfr.classifyInstance(myinstance);

        return myinstance.attribute(_trainData.classIndex()).value((int) classification);

    } catch (Exception e) {
        throw new IOException("Caught exception processing input row ", e);
    }
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

public double[] logDensityPerClusterForInstance(Instance inst) throws Exception {

    int i, j;/*w  ww . ja  v  a  2 s.  c om*/
    double logprob;
    double[] wghts = new double[m_num_clusters];

    m_replaceMissing.input(inst);
    inst = m_replaceMissing.output();

    for (i = 0; i < m_num_clusters; i++) {
        // System.err.println("Cluster : "+i);
        logprob = 0.0;

        for (j = 0; j < m_num_attribs; j++) {
            if (!inst.isMissing(j)) {
                if (inst.attribute(j).isNominal()) {
                    logprob += Math.log(m_model[i][j].getProbability(inst.value(j)));
                } else { // numeric attribute
                    logprob += logNormalDens(inst.value(j), m_modelNormal[i][j][0], m_modelNormal[i][j][1]);
                    /*
                     * System.err.println(logNormalDens(inst.value(j),
                     * m_modelNormal[i][j][0], m_modelNormal[i][j][1]) +
                     * " ");
                     */
                }
            }
        }
        // System.err.println("");

        wghts[i] = logprob;
    }
    return wghts;
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Select the best value for k by hold-one-out cross-validation. Hamming
 * Loss is minimized//from w ww  . j a  v  a2  s.  c om
 *
 * @throws Exception Potential exception thrown. To be handled in an upper level.
 */
private void crossValidate() throws Exception {
    try {
        // the performance for each different k
        double[] hammingLoss = new double[cvMaxK];

        for (int i = 0; i < cvMaxK; i++) {
            hammingLoss[i] = 0;
        }

        Instances dataSet = train;
        Instance instance; // the hold out instance
        Instances neighbours; // the neighboring instances
        double[] origDistances, convertedDistances;
        for (int i = 0; i < dataSet.numInstances(); i++) {
            if (getDebug() && (i % 50 == 0)) {
                debug("Cross validating " + i + "/" + dataSet.numInstances() + "\r");
            }
            instance = dataSet.instance(i);
            neighbours = lnn.kNearestNeighbours(instance, cvMaxK);
            origDistances = lnn.getDistances();

            // gathering the true labels for the instance
            boolean[] trueLabels = new boolean[numLabels];
            for (int counter = 0; counter < numLabels; counter++) {
                int classIdx = labelIndices[counter];
                String classValue = instance.attribute(classIdx).value((int) instance.value(classIdx));
                trueLabels[counter] = classValue.equals("1");
            }
            // calculate the performance metric for each different k
            for (int j = cvMaxK; j > 0; j--) {
                convertedDistances = new double[origDistances.length];
                System.arraycopy(origDistances, 0, convertedDistances, 0, origDistances.length);
                double[] confidences = this.getConfidences(neighbours, convertedDistances);
                boolean[] bipartition = null;

                switch (extension) {
                case NONE: // BRknn
                    MultiLabelOutput results;
                    results = new MultiLabelOutput(confidences, 0.5);
                    bipartition = results.getBipartition();
                    break;
                case EXTA: // BRknn-a
                    bipartition = labelsFromConfidences2(confidences);
                    break;
                case EXTB: // BRknn-b
                    bipartition = labelsFromConfidences3(confidences);
                    break;
                }

                double symmetricDifference = 0; // |Y xor Z|
                for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) {
                    boolean actual = trueLabels[labelIndex];
                    boolean predicted = bipartition[labelIndex];

                    if (predicted != actual) {
                        symmetricDifference++;
                    }
                }
                hammingLoss[j - 1] += (symmetricDifference / numLabels);

                neighbours = new IBk().pruneToK(neighbours, convertedDistances, j - 1);
            }
        }

        // Display the results of the cross-validation
        if (getDebug()) {
            for (int i = cvMaxK; i > 0; i--) {
                debug("Hold-one-out performance of " + (i) + " neighbors ");
                debug("(Hamming Loss) = " + hammingLoss[i - 1] / dataSet.numInstances());
            }
        }

        // Check through the performance stats and select the best
        // k value (or the lowest k if more than one best)
        double[] searchStats = hammingLoss;

        double bestPerformance = Double.NaN;
        int bestK = 1;
        for (int i = 0; i < cvMaxK; i++) {
            if (Double.isNaN(bestPerformance) || (bestPerformance > searchStats[i])) {
                bestPerformance = searchStats[i];
                bestK = i + 1;
            }
        }
        numOfNeighbors = bestK;
        if (getDebug()) {
            System.err.println("Selected k = " + bestK);
        }

    } catch (Exception ex) {
        throw new Error("Couldn't optimize by cross-validation: " + ex.getMessage());
    }
}

From source file:Classifiers.BRkNN.java

License:Open Source License

/**
 * Calculates the confidences of the labels, based on the neighboring
 * instances/*  ww w.j av  a 2 s.c o  m*/
 *
 * @param neighbours
 *            the list of nearest neighboring instances
 * @param distances
 *            the distances of the neighbors
 * @return the confidences of the labels
 */
private double[] getConfidences(Instances neighbours, double[] distances) {
    double total, weight;
    double neighborLabels = 0;
    double[] confidences = new double[numLabels];

    // Set up a correction to the estimator
    for (int i = 0; i < numLabels; i++) {
        confidences[i] = 1.0 / Math.max(1, train.numInstances());
    }
    total = (double) numLabels / Math.max(1, train.numInstances());

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / (train.numAttributes() - numLabels));
        weight = 1.0;
        weight *= current.weight();

        for (int j = 0; j < numLabels; j++) {
            double value = Double.parseDouble(
                    current.attribute(labelIndices[j]).value((int) current.value(labelIndices[j])));
            if (Utils.eq(value, 1.0)) {
                confidences[j] += weight;
                neighborLabels += weight;
            }
        }
        total += weight;
    }

    avgPredictedLabels = (int) Math.round(neighborLabels / total);
    // Normalise distribution
    if (total > 0) {
        Utils.normalize(confidences, total);
    }
    return confidences;
}

From source file:classify.Classifier.java

public static void missingValuesRows(Instances data) {
    int[] missingValues = new int[data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        missingValues[i] = 0;//from w  w  w . ja  v  a  2 s. c o m
    }
    Instance example;
    String value = "";
    //get number of missing attributes per row
    int missValues = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        example = data.instance(i);
        for (int j = 0; j < 15; j++) {
            if (example.attribute(j).isNominal()) {
                value = example.stringValue(j);
            } else if (example.attribute(j).isNumeric()) {
                value = Double.toString(example.value(j));
            }
            if (value.equals("?") || value.equals("NaN")) {
                missingValues[i]++;
                missValues++;
            }
        }
    }
    System.out.println("Number of Missing Values: " + missValues);
    //get how many times i attributes are missing
    int[] frequency = new int[15];
    for (int i = 0; i < data.numInstances(); i++) {
        frequency[missingValues[i]]++;
    }
    int numRows = 0;
    for (int i = 0; i < data.numInstances(); i++) {
        if (missingValues[i] > 0) {
            numRows++;
        }
    }
    System.out.println("Number of rows with missing values: " + numRows);
    System.out.println("Number of missing attributes per row:");
    for (int i = 0; i < 15; i++) {
        System.out.println(i + ": " + frequency[i]);
    }
}