Example usage for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java

License:Open Source License

public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception {
    for (Dataset dataset : datasets) {
        // Set parameters
        int folds = 10;
        Classifier baseClassifier = new LinearRegression();

        // Set up the random number generator
        long seed = new Date().getTime();
        Random random = new Random(seed);

        // Add IDs to the instances
        AddID.main(new String[] { "-i",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o",
                MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                        + "-plusIDs.arff" });

        String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString()
                + "-plusIDs.arff";

        Instances data = DataSource.read(location);

        if (data == null) {
            throw new IOException("Could not load data from: " + location);
        }//from  w  ww .j ava 2s.c  o m

        data.setClassIndex(data.numAttributes() - 1);

        // Instantiate the Remove filter
        Remove removeIDFilter = new Remove();
        removeIDFilter.setAttributeIndices("first");

        // Randomize the data
        data.randomize(random);

        // Perform cross-validation
        Instances predictedData = null;
        Evaluation eval = new Evaluation(data);

        for (int n = 0; n < folds; n++) {
            Instances train = data.trainCV(folds, n, random);
            Instances test = data.testCV(folds, n);

            // Apply log filter
            Filter logFilter = new LogFilter();
            logFilter.setInputFormat(train);
            train = Filter.useFilter(train, logFilter);
            logFilter.setInputFormat(test);
            test = Filter.useFilter(test, logFilter);

            // Copy the classifier
            Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

            // Instantiate the FilteredClassifier
            FilteredClassifier filteredClassifier = new FilteredClassifier();
            filteredClassifier.setFilter(removeIDFilter);
            filteredClassifier.setClassifier(classifier);

            // Build the classifier
            filteredClassifier.buildClassifier(train);

            // Evaluate
            eval.evaluateModel(classifier, test);

            // Add predictions
            AddClassification filter = new AddClassification();
            filter.setClassifier(classifier);
            filter.setOutputClassification(true);
            filter.setOutputDistribution(false);
            filter.setOutputErrorFlag(true);
            filter.setInputFormat(train);
            Filter.useFilter(train, filter); // trains the classifier

            Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
            if (predictedData == null) {
                predictedData = new Instances(pred, 0);
            }
            for (int j = 0; j < pred.numInstances(); j++) {
                predictedData.add(pred.instance(j));
            }
        }

        // Prepare output scores
        double[] scores = new double[predictedData.numInstances()];

        for (Instance predInst : predictedData) {
            int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

            int valueIdx = predictedData.numAttributes() - 2;

            double value = predInst.value(predInst.attribute(valueIdx));

            scores[id] = value;

            // Limit to interval [0;5]
            if (scores[id] > 5.0) {
                scores[id] = 5.0;
            }
            if (scores[id] < 0.0) {
                scores[id] = 0.0;
            }
        }

        // Output
        StringBuilder sb = new StringBuilder();
        for (Double score : scores) {
            sb.append(score.toString() + LF);
        }

        FileUtils.writeStringToFile(
                new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"),
                sb.toString());
    }
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//from www. ja  va2s  . c om
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:edu.drexel.psal.jstylo.verifiers.WLSVM.java

License:Open Source License

/**
 * converts an ARFF dataset into sparse format
 * /*  w ww .java 2  s . c  om*/
 * @param instances
 * @return
 */
@SuppressWarnings({ "rawtypes", "unchecked" })
protected Vector DataToSparse(Instances data) {
    Vector sparse = new Vector(data.numInstances() + 1);

    for (int i = 0; i < data.numInstances(); i++) { // for each instance
        sparse.add(InstanceToSparse(data.instance(i)));
    }
    return sparse;
}

From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java

License:Open Source License

public void saveInstances(String s) throws IOException {
    if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) {
        BufferedWriter bw = new BufferedWriter(new FileWriter(
                Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_"
                        + s));//from  w w w .  j av a  2 s  .  c om
        System.out.println("Saving: " + s);

        //bw.write(getInstances().toString());

        Instances inst = getInstances();
        StringBuffer text = new StringBuffer();

        text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n");
        for (int i = 0; i < inst.numAttributes(); i++) {
            text.append(inst.attribute(i)).append("\n");
        }
        text.append("\n").append("@data").append("\n");
        bw.write(text.toString());

        for (int i = 0; i < inst.numInstances(); i++) {
            text = new StringBuffer();
            text.append(inst.instance(i));
            if (i < inst.numInstances() - 1) {
                text.append('\n');
            }
            bw.write(text.toString());
        }
        bw.flush();
        bw.close();
        System.out.println("Done.");
    }
}

From source file:edu.uga.cs.fluxbuster.classification.Classifier.java

License:Open Source License

/**
 * Executes the classifier./*w w  w .java 2  s  . c  om*/
 * 
 * @param prepfeatures the prepared features in arff format
 * @param modelfile the path to the serialized model
 * @param clusters the clusters to classify
 * @return a map of the classified clusters, the keys are the classes
 *       and the values are lists of cluster id's belonging to those classes
 */
private Map<ClusterClass, List<StoredDomainCluster>> executeClassifier(String prepfeatures, String modelfile,
        List<StoredDomainCluster> clusters) {
    Map<ClusterClass, List<StoredDomainCluster>> retval = new HashMap<ClusterClass, List<StoredDomainCluster>>();
    try {
        DataSource source = new DataSource(new ByteArrayInputStream(prepfeatures.getBytes()));
        Instances data = source.getDataSet();
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        String[] options = weka.core.Utils.splitOptions("-p 0");
        J48 cls = (J48) weka.core.SerializationHelper.read(modelfile);
        cls.setOptions(options);
        for (int i = 0; i < data.numInstances(); i++) {
            double pred = cls.classifyInstance(data.instance(i));
            ClusterClass clusClass = ClusterClass
                    .valueOf(data.classAttribute().value((int) pred).toUpperCase());
            if (!retval.containsKey(clusClass)) {
                retval.put(clusClass, new ArrayList<StoredDomainCluster>());
            }
            retval.get(clusClass).add(clusters.get(i));
        }
    } catch (Exception e) {
        if (log.isErrorEnabled()) {
            log.error("Error executing classifier.", e);
        }
    }
    return retval;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Merge two instance sets.
 * @param instances1//  w  w  w.  ja  v  a  2 s.co m
 * @param instances2
 * @return the merged instance sets
 */
public static Instances mergeInstances(Instances instances1, Instances instances2) {
    if (instances1 == null)
        return instances2;
    if (instances2 == null)
        return instances1;
    if (!instances1.checkInstance(instances2.firstInstance()))
        throw new IllegalArgumentException("The instance sets are incompatible.");
    Instances mergedInstances = new Instances(instances1);
    Instances tempInstances = new Instances(instances2);
    for (int i = 0; i < tempInstances.numInstances(); i++) {
        mergedInstances.add(tempInstances.instance(i));
    }
    return mergedInstances;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/**
 * Converts a set of instances to an array of vectors
 * @param instances The set of instances.
 * @return The array of feature vectors.
 *///  w  w  w . ja  va  2s .  com
public static double[][] instancesToDoubleArrays(Instances instances) {
    double[][] vectors = new double[instances.numInstances()][];
    for (int instIdx = 0; instIdx < instances.numInstances(); instIdx++) {
        vectors[instIdx] = instanceToDoubleArray(instances.instance(instIdx));
    }
    return vectors;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Uses the given model to predict the classes of the data.
 * @param model//from w w  w.j a  v  a2s  .  c  o m
 * @param data
 * @return An array of the class predictions.
 */
public static int[] predictClasses(Classifier model, Instances data) {
    int[] predictions = new int[data.numInstances()];
    int numInstances = data.numInstances();
    for (int instIdx = 0; instIdx < numInstances; instIdx++) {
        try {
            predictions[instIdx] = (int) model.classifyInstance(data.instance(instIdx));
        } catch (Exception e) {
            predictions[instIdx] = -1;
        }
    }
    return predictions;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the class labels for a set of instances.
 * @param data/*from  ww  w . ja v  a  2 s. c o  m*/
 * @return a vector of the class labels for the data set, with one entry per instance
 */
public static int[] getLabels(Instances data) {
    int[] classLabels = new int[data.numInstances()];
    for (int instIdx = 0; instIdx < classLabels.length; instIdx++) {
        classLabels[instIdx] = (int) data.instance(instIdx).classValue();
    }
    return classLabels;
}

From source file:edu.umbc.cs.maple.utils.WekaUtils.java

License:Open Source License

/** Gets the class values for a set of instances.
 * @param data//w ww.  ja va  2 s.c o  m
 * @return a vector of the class values for the data set, with one entry per instance
 */
public static double[] getClassValues(Instances data) {
    double[] classLabels = new double[data.numInstances()];
    for (int instIdx = 0; instIdx < classLabels.length; instIdx++) {
        classLabels[instIdx] = data.instance(instIdx).classValue();
    }
    return classLabels;
}