Example usage for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Sets the class prior probabilities//ww w. jav a2 s  . co  m
 *
 * @param train the training instances used to determine
 * the prior probabilities
 * @exception Exception if the class attribute of the instances is not
 * set
 */
public void setPriors(Instances train) throws Exception {

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Prints the predictions for the given dataset into a String variable.
 *//*from w  ww .  j a va 2  s  .c o  m*/
protected static String printClassifications(Classifier classifier, Instances train, String testFileName,
        int classIndex, Range attributesToOutput) throws Exception {

    StringBuffer text = new StringBuffer();
    if (testFileName.length() != 0) {
        BufferedReader testReader = null;
        try {
            testReader = new BufferedReader(new FileReader(testFileName));
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        Instances test = new Instances(testReader, 1);
        if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
        } else {
            test.setClassIndex(test.numAttributes() - 1);
        }
        int i = 0;
        while (test.readInstance(testReader)) {
            Instance instance = test.instance(0);
            Instance withMissing = (Instance) instance.copy();
            withMissing.setDataset(test);
            double predValue = ((Classifier) classifier).classifyInstance(withMissing);
            if (test.classAttribute().isNumeric()) {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + predValue + " ");
                }
                if (instance.classIsMissing()) {
                    text.append("missing");
                } else {
                    text.append(instance.classValue());
                }
                text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
            } else {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + test.classAttribute().value((int) predValue) + " ");
                }
                if (Instance.isMissingValue(predValue)) {
                    text.append("missing ");
                } else {
                    text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
                }
                text.append(instance.toString(instance.classIndex()) + " "
                        + attributeValuesString(withMissing, attributesToOutput) + "\n");
            }
            test.delete(0);
            i++;
        }
        testReader.close();
    }
    return text.toString();
}

From source file:general.Util.java

/**
 * show learning statistic result by percentage split
 * @param data training data//  w  ww .ja  v a  2 s  .  c  om
 * @param trainPercent percentage of the training data
 * @param Classifier model
 */
public static void PercentageSplit(Instances data, double trainPercent, String Classifier) {
    try {
        int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100);
        int testSize = data.numInstances() - trainSize;

        data.randomize(new Random(1));

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(test.numAttributes() - 1);

        switch (Classifier.toLowerCase()) {
        case "naivebayes":
            classifier = new NaiveBayes();
            break;
        case "j48-prune":
            classifier = new MyJ48(true, 0.25f);
            break;
        case "j48-unprune":
            classifier = new MyJ48(false, 0f);
            break;
        case "id3":
            classifier = new MyID3();
            break;
        default:
            break;
        }
        classifier.buildClassifier(train);

        for (int i = 0; i < test.numInstances(); i++) {
            try {
                double pred = classifier.classifyInstance(test.instance(i));
                System.out.print("ID: " + test.instance(i));
                System.out
                        .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
                System.out.println(", predicted: " + test.classAttribute().value((int) pred));
            } catch (Exception ex) {
                Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        // Start evaluate model using instances test and print results
        try {
            Evaluation eval = new Evaluation(train);
            eval.evaluateModel(classifier, test);
            System.out.println(eval.toSummaryString("\nResults\n\n", false));
        } catch (Exception e) {
            e.printStackTrace();
        }

    } catch (Exception ex) {
        Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:general.Util.java

/**
 * Classify test set using pre-build model
 * @param model model pathfile//from   w w  w  . java 2s . c  o  m
 * @param test test file
 */
public static void doClassify(Classifier model, Instances test) {
    test.setClassIndex(test.numAttributes() - 1);
    for (int i = 0; i < test.numInstances(); i++) {
        try {
            double pred = model.classifyInstance(test.instance(i));
            System.out.print("ID: " + test.instance(i));
            System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
            System.out.println(", predicted: " + test.classAttribute().value((int) pred));
        } catch (Exception ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception // w  ww  . j  a v a 2 s  . co  m
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception //w  w w  .j  a va2  s . c o m
 * 
 * @param trainSet
 *             the type of Instances train set
 * */

public ClassifierSet initializePopulation(final Instances trainset) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = trainset;

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, trainset);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset);

    /*
    * Instead of having multiple positions for the same label combination, use only one.
    * This is the one that will be used to "cover" the centroids.
    */

    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
    * Delete the labels from the partitions.
    */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
    }
    // partitions now contains only attributes

    /*
    * delete the attributes from partitionsWithCLasses
    */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();

            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            //System.out.println(centroids);
            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomCoveringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation.java

License:Open Source License

/**
 * Build the representation for some instances.
 * /* w  w w . j  a v a2s.  c  om*/
 * @param instances
 *            the instances
 */

protected void buildRepresentationFromInstance(final Instances instances) {

    for (int i = 0; i < (instances.numAttributes() - numberOfLabels); i++) {

        final String attributeName = instances.attribute(i).name();

        if (instances.attribute(i).isNominal()) {

            String[] attributeNames = new String[instances.attribute(i).numValues()];

            final Enumeration<?> values = instances.attribute(i).enumerateValues();

            for (int j = 0; j < attributeNames.length; j++) {
                attributeNames[j] = (String) values.nextElement();
            }

            // Create boolean or generic nominal
            if (attributeNames.length > 2)
                attributeList[i] = new ComplexRepresentation.NominalAttribute(this.chromosomeSize,
                        attributeName, attributeNames, attributeGeneralizationRate);
            else
                attributeList[i] = new ComplexRepresentation.BooleanAttribute(chromosomeSize, attributeName,
                        attributeGeneralizationRate);

        } else if (instances.attribute(i).isNumeric()) {
            float minValue, maxValue;
            minValue = (float) instances.instance(0).toDoubleArray()[i];
            maxValue = minValue;
            for (int sample = 0; sample < instances.numInstances(); sample++) {
                final float currentVal = (float) instances.instance(sample).toDoubleArray()[i];
                if (currentVal > maxValue)
                    maxValue = currentVal;
                if (currentVal < minValue)
                    minValue = currentVal;
            }
            attributeList[i] = new ComplexRepresentation.IntervalAttribute(this.chromosomeSize, attributeName,
                    minValue, maxValue, precision, attributeGeneralizationRate);
        }
    }

    createClassRepresentation(instances);
}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * Perform the conversion.//from w  w  w . j a  va2  s .  c  o  m
 * 
 * @param set
 *            the set containing the instances
 * @return a double[][] containing the instances and their respective
 *         attributes
 */
public static double[][] convertIntancesToDouble(final Instances set) {
    if (set == null)
        return null;

    final double[][] result = new double[set.numInstances()][set.numAttributes()];
    for (int i = 0; i < set.numInstances(); i++) {

        for (int j = 0; j < set.numAttributes(); j++) {
            result[i][j] = set.instance(i).value(j);
        }
    }

    return result;

}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * Returns the label cardinality of the specified set.
 * //  w  w w .  ja  va  2  s  . c  o  m
 */
public static double getLabelCardinality(final Instances set) {
    if (set == null)
        return -1;

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);
    double sumOfLabels = 0;

    for (int i = 0; i < set.numInstances(); i++) {
        for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) {
            sumOfLabels += set.instance(i).value(j);
        }
    }

    if (set.numInstances() != 0) {

        return (double) (sumOfLabels / set.numInstances());
    }
    return 0;
}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * The number of instances are multiple of the number of folds.
 * From a se t of instances, it returns a chunk whose length is instances.numInstances / numberOfFolds
 * with index = index. Index starts at zero.
 * //from   w  ww.j  ava2 s . com
 * In essencem this is used when splitting a partition of instances to a train and test set.
 * 
 * One chunk is the test set and the rest is the train set.
 * We provide the index for the test set and the rest will automatically become the train set
        
 * see splitPartitionIntoFolds
 * 
 * _____
 * |_6_| index = 0
 * |_6_|       1
 * |_6_|       2 
 * |_6_|       3
 * |_6_|       4   
 * |_6_|       5
 * |_6_|       6
 * |_6_|       7      
 * |_6_|       8
 * |_6_|       9
 * 
 * */
public static Instances getPartitionSegment(Instances instances, int index, int numberOfFolds) {

    if (instances.numInstances() % numberOfFolds != 0) {
        System.out.println("Number of instances not a multiple of " + numberOfFolds);
        return null;
    }

    int numberOfInstancesToGet = instances.numInstances() / numberOfFolds;
    Instances segment = new Instances(instances, numberOfInstancesToGet);

    for (int i = index * numberOfInstancesToGet; i < (index + 1) * numberOfInstancesToGet; i++) {
        segment.add(instances.instance(i));
    }
    return segment;
}