Example usage for weka.core Instances setClassIndex

List of usage examples for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex) 

Source Link

Document

Sets the class index of the set.

Usage

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Prints the predictions for the given dataset into a String variable.
 *//*w w  w . j  a  va 2 s .  co  m*/
protected static String printClassifications(Classifier classifier, Instances train, String testFileName,
        int classIndex, Range attributesToOutput) throws Exception {

    StringBuffer text = new StringBuffer();
    if (testFileName.length() != 0) {
        BufferedReader testReader = null;
        try {
            testReader = new BufferedReader(new FileReader(testFileName));
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        Instances test = new Instances(testReader, 1);
        if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
        } else {
            test.setClassIndex(test.numAttributes() - 1);
        }
        int i = 0;
        while (test.readInstance(testReader)) {
            Instance instance = test.instance(0);
            Instance withMissing = (Instance) instance.copy();
            withMissing.setDataset(test);
            double predValue = ((Classifier) classifier).classifyInstance(withMissing);
            if (test.classAttribute().isNumeric()) {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + predValue + " ");
                }
                if (instance.classIsMissing()) {
                    text.append("missing");
                } else {
                    text.append(instance.classValue());
                }
                text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
            } else {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + test.classAttribute().value((int) predValue) + " ");
                }
                if (Instance.isMissingValue(predValue)) {
                    text.append("missing ");
                } else {
                    text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
                }
                text.append(instance.toString(instance.classIndex()) + " "
                        + attributeValuesString(withMissing, attributesToOutput) + "\n");
            }
            test.delete(0);
            i++;
        }
        testReader.close();
    }
    return text.toString();
}

From source file:general.Util.java

/**
 * show learning statistic result by using test sets
 * @param testPath test path file/*from  w  ww  .ja v a  2  s . co  m*/
 * @param typeTestFile test file
 */
public static void TestSchema(String testPath, String typeTestFile) {
    Instances testsets = null;
    // Load test instances based on file type and path
    if (typeTestFile.equals("arff")) {
        FileReader file = null;
        try {
            file = new FileReader(testPath);
            try (BufferedReader reader = new BufferedReader(file)) {
                testsets = new Instances(reader);
            }
            // setting class attribute
            testsets.setClassIndex(data.numAttributes() - 1);
        } catch (IOException ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        } finally {
            try {
                if (file != null) {
                    file.close();
                }
            } catch (IOException ex) {
                Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
    } else if (typeTestFile.equals("csv")) {
        try {
            CSVLoader csv = new CSVLoader();
            csv.setFile(new File(testPath));
            data = csv.getDataSet();

            // setting class attribute
            data.setClassIndex(data.numAttributes() - 1);
        } catch (IOException ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    // Start evaluate model using instances test and print results
    try {
        Evaluation eval = new Evaluation(Util.getData());
        eval.evaluateModel(Util.getClassifier(), testsets);
        System.out.println(eval.toSummaryString("\nResults\n\n", false));
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:general.Util.java

/**
 * show learning statistic result by percentage split
 * @param data training data/*from   w  w  w .jav a 2 s.c  om*/
 * @param trainPercent percentage of the training data
 * @param Classifier model
 */
public static void PercentageSplit(Instances data, double trainPercent, String Classifier) {
    try {
        int trainSize = (int) Math.round(data.numInstances() * trainPercent / 100);
        int testSize = data.numInstances() - trainSize;

        data.randomize(new Random(1));

        Instances train = new Instances(data, 0, trainSize);
        Instances test = new Instances(data, trainSize, testSize);
        train.setClassIndex(train.numAttributes() - 1);
        test.setClassIndex(test.numAttributes() - 1);

        switch (Classifier.toLowerCase()) {
        case "naivebayes":
            classifier = new NaiveBayes();
            break;
        case "j48-prune":
            classifier = new MyJ48(true, 0.25f);
            break;
        case "j48-unprune":
            classifier = new MyJ48(false, 0f);
            break;
        case "id3":
            classifier = new MyID3();
            break;
        default:
            break;
        }
        classifier.buildClassifier(train);

        for (int i = 0; i < test.numInstances(); i++) {
            try {
                double pred = classifier.classifyInstance(test.instance(i));
                System.out.print("ID: " + test.instance(i));
                System.out
                        .print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
                System.out.println(", predicted: " + test.classAttribute().value((int) pred));
            } catch (Exception ex) {
                Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
            }
        }

        // Start evaluate model using instances test and print results
        try {
            Evaluation eval = new Evaluation(train);
            eval.evaluateModel(classifier, test);
            System.out.println(eval.toSummaryString("\nResults\n\n", false));
        } catch (Exception e) {
            e.printStackTrace();
        }

    } catch (Exception ex) {
        Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:general.Util.java

/**
 * Classify test set using pre-build model
 * @param model model pathfile/*from   w ww  .j  av  a 2s  .c o  m*/
 * @param test test file
 */
public static void doClassify(Classifier model, Instances test) {
    test.setClassIndex(test.numAttributes() - 1);
    for (int i = 0; i < test.numInstances(); i++) {
        try {
            double pred = model.classifyInstance(test.instance(i));
            System.out.print("ID: " + test.instance(i));
            System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue()));
            System.out.println(", predicted: " + test.classAttribute().value((int) pred));
        } catch (Exception ex) {
            Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

public static Instances createDataset(final Collection<Document> documents) {

    // Key = feature name | Value = number representing NUMERIC, NOMINAL, etc.
    final Map<String, Integer> featureType = new TreeMap<String, Integer>();

    // Key = feature name | Values = distinct values for NOMINAL values
    final Map<String, Set<String>> nominalFeatureMap = new HashMap<String, Set<String>>();

    final Set<String> labels = new TreeSet<String>();
    final Set<String> docIds = new TreeSet<String>();

    // First scan -- determine attribute values
    for (Document document : documents) {
        processFeatures(document.getFeatures(), featureType, nominalFeatureMap);
        docIds.add(document.getName());/*from  www. ja va  2s .  c om*/

        for (Line line : document.getLines()) {
            processFeatures(line.getFeatures(), featureType, nominalFeatureMap);

            labels.add(line.getLabel());
        }
    }

    final ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    // Add Document and Line IDs as first two attributes
    //final Attribute docId = new Attribute(DOC_ID, (ArrayList<String>) null);
    final Attribute docId = new Attribute(DOC_ID, new ArrayList<String>(docIds));
    final Attribute lineId = new Attribute(LINE_ID);

    attributes.add(docId);
    attributes.add(lineId);

    // Build attributes
    for (String feature : featureType.keySet()) {
        final int type = featureType.get(feature);

        if (type == Attribute.NUMERIC) {
            attributes.add(new Attribute(feature));
        } else {
            if (nominalFeatureMap.containsKey(feature)) {
                attributes.add(new Attribute(feature, new ArrayList<String>(nominalFeatureMap.get(feature))));
            }
        }
    }

    // Add class attribute
    Attribute classAttr = new Attribute(LABEL, new ArrayList<String>(labels));
    attributes.add(classAttr);

    final Instances instances = new Instances("train", attributes, documents.size());

    // Second scan -- add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attributes.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());
            instance.setValue(classAttr, line.getLabel());

            for (Attribute attribute : attributes) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attributes.size() - 1);

    return instances;
}

From source file:gov.va.chir.tagline.dao.DatasetUtil.java

License:Open Source License

@SuppressWarnings("unchecked")
public static Instances createDataset(final Instances header, final Collection<Document> documents)
        throws Exception {

    // Update header to include all docIDs from the passed in documents
    // (Weka requires all values for nominal features)
    final Set<String> docIds = new TreeSet<String>();

    for (Document document : documents) {
        docIds.add(document.getName());//from ww  w .ja  va  2s.co  m
    }

    final AddValues avf = new AddValues();
    avf.setLabels(StringUtils.join(docIds, ","));

    // Have to add 1 because SingleIndex.setValue() has a bug, expecting
    // the passed in index to be 1-based rather than 0-based. Why? I have 
    // no idea.
    // Calling path: AddValues.setInputFormat() -->
    //               SingleIndex.setUpper() -->
    //               SingleIndex.setValue()
    avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1));

    avf.setInputFormat(header);
    final Instances newHeader = Filter.useFilter(header, avf);

    final Instances instances = new Instances(newHeader, documents.size());

    // Map attributes
    final Map<String, Attribute> attrMap = new HashMap<String, Attribute>();

    final Enumeration<Attribute> en = newHeader.enumerateAttributes();

    while (en.hasMoreElements()) {
        final Attribute attr = en.nextElement();

        attrMap.put(attr.name(), attr);
    }

    attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute());

    final Attribute docId = attrMap.get(DOC_ID);
    final Attribute lineId = attrMap.get(LINE_ID);
    final Attribute classAttr = attrMap.get(LABEL);

    // Add data
    for (Document document : documents) {
        final Map<String, Object> docFeatures = document.getFeatures();

        for (Line line : document.getLines()) {
            final Instance instance = new DenseInstance(attrMap.size());

            final Map<String, Object> lineFeatures = line.getFeatures();
            lineFeatures.putAll(docFeatures);

            instance.setValue(docId, document.getName());
            instance.setValue(lineId, line.getLineId());

            if (line.getLabel() == null) {
                instance.setMissing(classAttr);
            } else {
                instance.setValue(classAttr, line.getLabel());
            }

            for (Attribute attribute : attrMap.values()) {
                if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) {
                    final String name = attribute.name();
                    final Object obj = lineFeatures.get(name);

                    if (obj instanceof Double) {
                        instance.setValue(attribute, ((Double) obj).doubleValue());
                    } else if (obj instanceof Integer) {
                        instance.setValue(attribute, ((Integer) obj).doubleValue());
                    } else {
                        instance.setValue(attribute, obj.toString());
                    }
                }
            }

            instances.add(instance);
        }
    }

    // Set last attribute as class
    instances.setClassIndex(attrMap.size() - 1);

    return instances;
}

From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java

License:Open Source License

/**
 * Load instances into the global train store and create test set.
 * /*  w ww.j  a va  2 s.  c  om*/
 * @param filename
 *            the .arff filename to be used
 * @param generateTestSet
 *            true if a test set is going to be generated
 * @throws IOException
 *             if the input file is not found
 */
public final void loadInstances(final String filename, final boolean generateTestSet) throws IOException {
    // Open .arff
    final Instances set = InstancesUtility.openInstance(filename);
    if (set.classIndex() < 0) {
        set.setClassIndex(set.numAttributes() - 1);
    }
    set.randomize(new Random());

    if (generateTestSet) {
        final int numOfFolds = (int) SettingsLoader.getNumericSetting("NumberOfFolds", 10);
        final int fold = (int) Math.floor(Math.random() * numOfFolds);
        trainSet = set.trainCV(numOfFolds, fold);
        testSet = set.testCV(numOfFolds, fold);
    } else {
        trainSet = set;
    }

    myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet);
    myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet);

}

From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java

License:Open Source License

/**
 * Load instances into the global train store and create test set.
 * //w  ww. j ava  2  s  .com
 * @param filename
 *            the .arff filename to be used
 * @param testFile
 *            the test file to be loaded
 * @throws IOException
 *             if the input file is not found
 */
public final void loadInstancesWithTest(final String filename, final String testFile) throws IOException {

    // Open .arff
    final Instances set = InstancesUtility.openInstance(filename);

    if (set.classIndex() < 0)
        set.setClassIndex(set.numAttributes() - 1);
    set.randomize(new Random());
    trainSet = set;

    myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet);
    myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet);
    testSet = InstancesUtility.openInstance(testFile);

    myLcs.trainSet = trainSet;
    myLcs.testSet = testSet;

    myLcs.testInstances = InstancesUtility.convertIntancesToDouble(testSet);

    System.out.println("Label cardinality: " + myLcs.labelCardinality);

}

From source file:gr.auth.ee.lcs.data.representations.complex.SingleClassRepresentation.java

License:Open Source License

@Override
protected void createClassRepresentation(final Instances instances) {

    if (instances.classIndex() < 0)
        instances.setClassIndex(instances.numAttributes() - 1);

    // Rule Consequents
    final Enumeration<?> classNames = instances.classAttribute().enumerateValues();
    final String[] ruleConsequents = new String[instances.numClasses()];
    this.ruleConsequents = ruleConsequents;
    for (int i = 0; i < instances.numClasses(); i++)
        ruleConsequents[i] = (String) classNames.nextElement();

    attributeList[attributeList.length - 1] = new UniLabel(chromosomeSize, "class", ruleConsequents);

}

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

/**
 * Splits the .arff input dataset to |number-of-distinct-label-combinations| Instances which are stored in the partitions[] array. 
 * Called by initializePopulation() as a preparatory step to clustering.
 * @throws Exception /*www.  j  ava 2  s . c o m*/
 * 
 * */

public static Instances[] partitionInstances(final AbstractLearningClassifierSystem lcs, final String filename)
        throws Exception {

    // Open .arff
    final Instances set = InstancesUtility.openInstance(filename);
    if (set.classIndex() < 0) {
        set.setClassIndex(set.numAttributes() - 1);
    }
    //set.randomize(new Random());
    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    // the partitions vector holds the indices      
    String stringsArray[] = new String[lcs.instances.length];
    int indicesArray[] = new int[lcs.instances.length];

    // convert each instance's labelset into a string and store it in the stringsArray array
    for (int i = 0; i < set.numInstances(); i++) {
        stringsArray[i] = "";
        indicesArray[i] = i;

        for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) {
            stringsArray[i] += (int) set.instance(i).value(j);
        }
    }

    // contains the indicesVector(s)
    Vector<Vector> mothershipVector = new Vector<Vector>();

    String baseString = "";
    for (int i = 0; i < set.numInstances(); i++) {

        baseString = stringsArray[i];
        if (baseString.equals(""))
            continue;
        Vector<Integer> indicesVector = new Vector<Integer>();

        for (int j = 0; j < set.numInstances(); j++) {
            if (baseString.equals(stringsArray[j])) {
                stringsArray[j] = "";
                indicesVector.add(j);
            }
        }
        mothershipVector.add(indicesVector);
    }

    Instances[] partitions = new Instances[mothershipVector.size()];

    for (int i = 0; i < mothershipVector.size(); i++) {
        partitions[i] = new Instances(set, mothershipVector.elementAt(i).size());
        for (int j = 0; j < mothershipVector.elementAt(i).size(); j++) {
            Instance instanceToAdd = set.instance((Integer) mothershipVector.elementAt(i).elementAt(j));
            partitions[i].add(instanceToAdd);
        }
    }
    /*
     * up to here, the partitions array has been formed. it contains the split dataset by label combinations
     * it holds both the attributes and the labels, but for clustering the input should only be the attributes,
     * so we need to delete the labels. this is taken care of by initializePopulation()
     */
    return partitions;
}