Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:cs.man.ac.uk.moawrappers.WekaClassifier.java

License:Open Source License

/**
 * Obtains the features (attributes) belonging to a instance within the ARFF
 * file being classified. Returns these in a comma delimited string.
 * @param instance the instance whose score values should be obtained.
 * @param attributes number of feature attributes to expect.
 * @return a comma delimited string containing the scores.
 *///  www.  ja va2s.co m
public String getFeatures(Instance instance, int attributes) {
    String data = "";
    for (int i = 0; i < attributes; i++)
        data += instance.value(i) + ",";

    return data;
}

From source file:cyber009.udal.functions.LinearFunction.java

public void syntacticLabelFunction(Instance set) {
    double sum = 0.0D;
    for (int n = 0; n < set.numAttributes() - 1; n++) {
        sum += set.value(n) * coefficients[n];
    }// w  ww .java 2 s.  co  m
    if (sum < 0.0D) {
        set.setClassValue("1");
    } else {
        set.setClassValue("0");
    }
}

From source file:cyber009.udal.functions.StatisticalAnalysis.java

public static boolean instanceCMPWithoutClass(Instance a, Instance b) {
    if (a.numAttributes() == b.numAttributes()) {
        for (int i = 0; i < a.numAttributes(); i++) {
            if (a.value(i) != b.value(i)) {
                return false;
            }//from  w ww .ja va 2s. co m
        }
    } else {
        return false;
    }
    return true;
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Convert a pc transformed instance back to the original space
 * // w ww .  j a va 2 s  .  co m
 * @param inst        the instance to convert
 * @return            the processed instance
 * @throws Exception  if something goes wrong
 */
private Instance convertInstanceToOriginal(Instance inst) throws Exception {
    double[] newVals = null;

    if (m_hasClass) {
        newVals = new double[m_numAttribs + 1];
    } else {
        newVals = new double[m_numAttribs];
    }

    if (m_hasClass) {
        // class is always appended as the last attribute
        newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);
    }

    for (int i = 0; i < m_eTranspose[0].length; i++) {
        double tempval = 0.0;
        for (int j = 1; j < m_eTranspose.length; j++) {
            tempval += (m_eTranspose[j][i] * inst.value(j - 1));
        }
        newVals[i] = tempval;
        if (!m_center) {
            newVals[i] *= m_stdDevs[i];
        }
        newVals[i] += m_means[i];
    }

    if (inst instanceof SparseInstance) {
        return new SparseInstance(inst.weight(), newVals);
    } else {
        return new Instance(inst.weight(), newVals);
    }
}

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Transform an instance in original (unormalized) format. Convert back
 * to the original space if requested.// w  w w .  j a v  a 2 s  .  c  om
 * @param instance an instance in the original (unormalized) format
 * @return a transformed instance
 * @throws Exception if instance cant be transformed
 */
public Instance convertInstance(Instance instance) throws Exception {

    if (m_eigenvalues == null) {
        throw new Exception("convertInstance: Principal components not " + "built yet");
    }

    double[] newVals = new double[m_outputNumAtts];
    Instance tempInst = (Instance) instance.copy();
    if (!instance.dataset().equalHeaders(m_trainHeader)) {
        throw new Exception("Can't convert instance: header's don't match: " + "PrincipalComponents\n"
                + "Can't convert instance: header's don't match.");
    }

    m_replaceMissingFilter.input(tempInst);
    m_replaceMissingFilter.batchFinished();
    tempInst = m_replaceMissingFilter.output();

    /*if (m_normalize) {
      m_normalizeFilter.input(tempInst);
      m_normalizeFilter.batchFinished();
      tempInst = m_normalizeFilter.output();
    }*/

    m_nominalToBinFilter.input(tempInst);
    m_nominalToBinFilter.batchFinished();
    tempInst = m_nominalToBinFilter.output();

    if (m_attributeFilter != null) {
        m_attributeFilter.input(tempInst);
        m_attributeFilter.batchFinished();
        tempInst = m_attributeFilter.output();
    }

    if (!m_center) {
        m_standardizeFilter.input(tempInst);
        m_standardizeFilter.batchFinished();
        tempInst = m_standardizeFilter.output();
    } else {
        m_centerFilter.input(tempInst);
        m_centerFilter.batchFinished();
        tempInst = m_centerFilter.output();
    }

    if (m_hasClass) {
        newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
    }

    double cumulative = 0;
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        double tempval = 0.0;
        for (int j = 0; j < m_numAttribs; j++) {
            tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j));
        }
        newVals[m_numAttribs - i - 1] = tempval;
        cumulative += m_eigenvalues[m_sortedEigens[i]];
        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (!m_transBackToOriginal) {
        if (instance instanceof SparseInstance) {
            return new SparseInstance(instance.weight(), newVals);
        } else {
            return new Instance(instance.weight(), newVals);
        }
    } else {
        if (instance instanceof SparseInstance) {
            return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals));
        } else {
            return convertInstanceToOriginal(new Instance(instance.weight(), newVals));
        }
    }
}

From source file:dataHandlers.DataClusterHandler.java

private void saveClusterInforamtion() {
    this.clusters = new String[dataGraph.getNumClusters()];

    for (int index = 0; index < userPoints.numInstances(); index++) {
        Instance i = userPoints.instance(index);
        int clusterID = getRelativeCluster(i);
        if (clusters[clusterID] == null) {
            clusters[clusterID] = "" + (int) i.value(0);
        } else {/*from  w ww .  j a v  a2 s.  co m*/
            clusters[clusterID] = clusters[clusterID] + "," + (int) i.value(0);
        }
    }
}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);/*from ww w  .j  a v a  2s .c o m*/

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(filteredClassifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    // Prepare output classification
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(
            new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"),
            sb.toString());
}

From source file:de.ugoe.cs.cpdp.dataprocessing.CLAMIProcessor.java

License:Apache License

/**
 * <p>//  w ww .ja  v  a2  s.c o  m
 * Applies the CLAMI processor to the data. The test data is also required, in order to
 * guarantee a consistent metric set.
 * </p>
 *
 * @param testdata
 *            test data; the data is not modified, only metrics are dropped
 * @param data
 *            data to which the CLAMI processor is applied
 */
public void applyCLAMI(Instances testdata, Instances data) {

    // first determine medians
    double[] medians = new double[data.numAttributes()];
    // get medians
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex()) {
            medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
        }
    }
    // now determine cluster number for each instance
    double[] clusterNumber = new double[data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        int countHighValues = 0;
        Instance currentInstance = data.get(i);
        for (int j = 0; j < data.numAttributes(); j++) {
            if (j != data.classIndex()) {
                if (currentInstance.value(j) > medians[j]) {
                    countHighValues++;
                }
            }
        }
        clusterNumber[i] = countHighValues;
    }

    // determine median of cluster number
    Median m = new Median();
    double medianClusterNumber = m.evaluate(clusterNumber);

    // now we filter the metrics
    int[] numMetricViolations = new int[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
        int currentViolations = 0;
        for (int i = 0; i < data.numInstances(); i++) {
            Instance currentInstance = data.get(i);
            if (j != data.classIndex()) {
                if (clusterNumber[i] > medianClusterNumber) {
                    // "buggy"
                    if (currentInstance.value(j) <= medians[j]) {
                        currentViolations++;
                    }
                } else {
                    // "not buggy"
                    if (currentInstance.value(j) > medians[j]) {
                        currentViolations++;
                    }
                }
            }
        }
        numMetricViolations[j] = currentViolations;
    }

    SortedSet<Integer> distinctViolationCounts = new TreeSet<>();
    for (int currentViolations : numMetricViolations) {
        distinctViolationCounts.add(currentViolations);
    }
    Iterator<Integer> violationCountInterator = distinctViolationCounts.iterator();

    int violationCutoff = violationCountInterator.next();
    // now we filter the data;
    // this is first tried with the metrics with fewest violations. if no buggy/bugfree
    // instances remain, this is repeated with the next metrics with second fewest violations,
    // and so on.
    // this part is a bit unclear from the description in the paper, but I confirmed with the
    // author that this is how they implemented it
    boolean[] cleanInstances = new boolean[data.numInstances()];
    int numCleanBuggyInstances = 0;
    int numCleanBugfreeInstances = 0;
    do {
        violationCutoff = violationCountInterator.next();
        cleanInstances = new boolean[data.numInstances()];
        numCleanBuggyInstances = 0;
        numCleanBugfreeInstances = 0;
        for (int i = 0; i < data.numInstances(); i++) {
            int currentViolations = 0;
            Instance currentInstance = data.get(i);
            for (int j = 0; j < data.numAttributes(); j++) {
                if (j != data.classIndex() && numMetricViolations[j] == violationCutoff) {
                    if (clusterNumber[i] > medianClusterNumber) {
                        // "buggy"
                        if (currentInstance.value(j) <= medians[j]) {
                            currentViolations++;
                        }
                    } else {
                        // "not buggy"
                        if (currentInstance.value(j) > medians[j]) {
                            currentViolations++;
                        }
                    }
                }
            }
            if (currentViolations == 0) {
                cleanInstances[i] = true;
                if (clusterNumber[i] > medianClusterNumber) {
                    numCleanBuggyInstances++;
                } else {
                    numCleanBugfreeInstances++;
                }
            } else {
                cleanInstances[i] = false;
            }
        }
    } while (numCleanBuggyInstances == 0 || numCleanBugfreeInstances == 0);

    // output some interesting information to provide insights into the CLAMI model
    Console.traceln(Level.FINE, "Selected Metrics and Median-threshold: ");
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex() && numMetricViolations[j] == violationCutoff) {
            Console.traceln(Level.FINE, "\t" + data.attribute(j).name() + ": " + medians[j]);
        }
    }

    // finally modify the instances
    // drop the metrics (also from the testdata)
    for (int j = data.numAttributes() - 1; j >= 0; j--) {
        if (j != data.classIndex() && numMetricViolations[j] != violationCutoff) {
            data.deleteAttributeAt(j);
            testdata.deleteAttributeAt(j);
        }
    }
    // drop the unclean instances
    for (int i = data.numInstances() - 1; i >= 0; i--) {
        if (!cleanInstances[i]) {
            data.delete(i);
        } else {
            // set the classification
            if (clusterNumber[i] > medianClusterNumber) {
                data.get(i).setClassValue(1.0d);
            } else {
                data.get(i).setClassValue(0.0d);
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.CLAProcessor.java

License:Apache License

/**
 * Applies the CLA processor the the data.
 * // www. ja  v a 2  s  . c  o  m
 * @param data
 *            data to which the processor is applied
 */
public void applyCLA(Instances data) {
    // first determine medians
    double[] medians = new double[data.numAttributes()];
    // get medians
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex()) {
            medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1);
        }
    }
    // now determine cluster number for each instance
    double[] clusterNumber = new double[data.numInstances()];
    for (int i = 0; i < data.numInstances(); i++) {
        int countHighValues = 0;
        Instance currentInstance = data.get(i);
        for (int j = 0; j < data.numAttributes(); j++) {
            if (j != data.classIndex()) {
                if (currentInstance.value(j) > medians[j]) {
                    countHighValues++;
                }
            }
        }
        clusterNumber[i] = countHighValues;
    }

    // determine median of cluster number
    Median m = new Median();
    double medianClusterNumber = m.evaluate(Arrays.stream(clusterNumber).distinct().toArray());

    // finally modify the instances
    // drop the unclean instances
    for (int i = data.numInstances() - 1; i >= 0; i--) {
        // set the classification
        if (clusterNumber[i] > medianClusterNumber) {
            data.get(i).setClassValue(1.0d);
        } else {
            data.get(i).setClassValue(0.0d);
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java

License:Apache License

/**
 * <p>//from ww  w .  j  av  a 2s  .c o m
 * Applies MORPH to a single instance
 * </p>
 *
 * @param instance
 *            instance that is morphed
 * @param data
 *            data based on which the instance is morphed
 */
public void morphInstance(Instance instance, Instances data) {
    Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data);
    if (nearestUnlikeNeighbor == null) {
        throw new RuntimeException(
                "could not find nearest unlike neighbor within the data: " + data.relationName());
    }
    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
            double randVal = rand.nextDouble() * (beta - alpha) + alpha;
            instance.setValue(j,
                    instance.value(j) + randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j)));
        }
    }
}