Example usage for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute()

Source Link

Document

Returns the class attribute.

Usage

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Performs a (stratified if class is nominal) cross-validation 
 * for a classifier on a set of instances. Now performs
 * a deep copy of the classifier before each call to 
 * buildClassifier() (just in case the classifier is not
 * initialized properly)./*  w  ww .  j  ava  2s  .  c  o m*/
 *
 * @param classifier the classifier with any options set.
 * @param data the data on which the cross-validation is to be 
 * performed 
 * @param numFolds the number of folds for the cross-validation
 * @param random random number generator for randomization 
 * @param forPredictionsString varargs parameter that, if supplied, is
 * expected to hold a StringBuffer to print predictions to, 
 * a Range of attributes to output and a Boolean (true if the distribution
 * is to be printed)
 * @throws Exception if a classifier could not be generated 
 * successfully or the class is not defined
 */
public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random,
        Object... forPredictionsPrinting) throws Exception {

    // Make a copy of the data we can reorder
    data = new Instances(data);
    data.randomize(random);
    if (data.classAttribute().isNominal()) {
        data.stratify(numFolds);
    }

    // We assume that the first element is a StringBuffer, the second a Range (attributes
    // to output) and the third a Boolean (whether or not to output a distribution instead
    // of just a classification)
    if (forPredictionsPrinting.length > 0) {
        // print the header first
        StringBuffer buff = (StringBuffer) forPredictionsPrinting[0];
        Range attsToOutput = (Range) forPredictionsPrinting[1];
        boolean printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
        printClassificationsHeader(data, attsToOutput, printDist, buff);
    }

    // Do the folds
    for (int i = 0; i < numFolds; i++) {
        Instances train = data.trainCV(numFolds, i, random);
        setPriors(train);
        Classifier copiedClassifier = Classifier.makeCopy(classifier);
        copiedClassifier.buildClassifier(train);
        Instances test = data.testCV(numFolds, i);
        evaluateModel(copiedClassifier, test, forPredictionsPrinting);
    }
    m_NumFolds = numFolds;
}

From source file:cotraining.copy.Evaluation_D.java

License:Open Source License

/**
 * Prints the header for the predictions output into a supplied StringBuffer
 *
 * @param test structure of the test set to print predictions for
 * @param attributesToOutput indices of the attributes to output
 * @param printDistribution prints the complete distribution for nominal
 * attributes, not just the predicted value
 * @param text the StringBuffer to print to
 */// w w w  .j a v  a 2s.  c  o  m
protected static void printClassificationsHeader(Instances test, Range attributesToOutput,
        boolean printDistribution, StringBuffer text) {
    // print header
    if (test.classAttribute().isNominal())
        if (printDistribution)
            text.append(" inst#     actual  predicted error distribution");
        else
            text.append(" inst#     actual  predicted error prediction");
    else
        text.append(" inst#     actual  predicted      error");
    if (attributesToOutput != null) {
        attributesToOutput.setUpper(test.numAttributes() - 1);
        text.append(" (");
        boolean first = true;
        for (int i = 0; i < test.numAttributes(); i++) {
            if (i == test.classIndex())
                continue;

            if (attributesToOutput.isInRange(i)) {
                if (!first)
                    text.append(",");
                text.append(test.attribute(i).name());
                first = false;
            }
        }
        text.append(")");
    }
    text.append("\n");
}

From source file:cs.man.ac.uk.classifiers.GetAUC.java

License:Open Source License

/**
 * Computes the AUC for the supplied stream learner.
 * @return the AUC as a double value./*from ww  w  .j a  v a  2  s.c  o m*/
 */
private static double validate5x2CVStream() {
    try {
        // Other options
        int runs = 5;
        int folds = 2;
        double AUC_SUM = 0;

        // perform cross-validation
        for (int i = 0; i < runs; i++) {
            // randomize data
            int seed = i + 1;
            Random rand = new Random(seed);
            Instances randData = new Instances(data);
            randData.randomize(rand);

            if (randData.classAttribute().isNominal()) {
                System.out.println("Stratifying...");
                randData.stratify(folds);
            }

            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                Distribution testDistribution = new Distribution(test);

                ArffSaver trainSaver = new ArffSaver();
                trainSaver.setInstances(train);
                trainSaver.setFile(new File(trainPath));
                trainSaver.writeBatch();

                ArffSaver testSaver = new ArffSaver();
                testSaver.setInstances(test);

                double[][] dist = testDistribution.matrix();
                int negativeClassSize = (int) dist[0][0];
                int positiveClassSize = (int) dist[0][1];
                double balance = (double) positiveClassSize / (double) negativeClassSize;

                String tempTestPath = testPath.replace(".arff",
                        "_" + positiveClassSize + "_" + negativeClassSize + "_" + balance + "_1.0.arff");// [Test-n-Set-n]_[+]_[-]_[K]_[L];
                testSaver.setFile(new File(tempTestPath));
                testSaver.writeBatch();

                ARFFFile file = new ARFFFile(tempTestPath, CLASS_INDEX, new DebugLogger(false));
                file.createMetaData();

                HoeffdingTreeTester streamClassifier = new HoeffdingTreeTester(trainPath, tempTestPath,
                        CLASS_INDEX, new String[] { "0", "1" }, new DebugLogger(true));

                streamClassifier.train();

                System.in.read();

                //AUC_SUM += streamClassifier.getROCExternalData("",(int)testDistribution.perClass(1),(int)testDistribution.perClass(0));
                streamClassifier.testStatic(homeDirectory + "/FuckSakeTest.txt");

                String[] files = Common.getFilePaths(scratch);
                for (int j = 0; j < files.length; j++)
                    Common.fileDelete(files[j]);
            }
        }

        return AUC_SUM / ((double) runs * (double) folds);
    } catch (Exception e) {
        System.out.println("Exception validating data!");
        e.printStackTrace();
        return 0;
    }
}

From source file:cs.man.ac.uk.classifiers.GetAUC.java

License:Open Source License

/**
 * Computes the AUC for the supplied learner.
 * @return the AUC as a double value.//  w w w .j  a v  a2 s .  c om
 */
@SuppressWarnings("unused")
private static double validate5x2CV() {
    try {
        // other options
        int runs = 5;
        int folds = 2;
        double AUC_SUM = 0;

        // perform cross-validation
        for (int i = 0; i < runs; i++) {
            // randomize data
            int seed = i + 1;
            Random rand = new Random(seed);
            Instances randData = new Instances(data);
            randData.randomize(rand);

            if (randData.classAttribute().isNominal()) {
                System.out.println("Stratifying...");
                randData.stratify(folds);
            }

            Evaluation eval = new Evaluation(randData);

            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                // the above code is used by the StratifiedRemoveFolds filter, the
                // code below by the Explorer/Experimenter:
                // Instances train = randData.trainCV(folds, n, rand);

                // build and evaluate classifier
                String[] options = { "-U", "-A" };
                J48 classifier = new J48();
                //HTree classifier = new HTree();

                classifier.setOptions(options);
                classifier.buildClassifier(train);
                eval.evaluateModel(classifier, test);

                // generate curve
                ThresholdCurve tc = new ThresholdCurve();
                int classIndex = 0;
                Instances result = tc.getCurve(eval.predictions(), classIndex);

                // plot curve
                vmc = new ThresholdVisualizePanel();
                AUC_SUM += ThresholdCurve.getROCArea(result);
                System.out.println("AUC: " + ThresholdCurve.getROCArea(result) + " \tAUC SUM: " + AUC_SUM);
            }
        }

        return AUC_SUM / ((double) runs * (double) folds);
    } catch (Exception e) {
        System.out.println("Exception validating data!");
        return 0;
    }
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();//from  ww  w .  j  a v  a 2  s.c om

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();/* ww  w  .  j a  va  2 s . c  om*/

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java

License:Apache License

@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
    Collection<Token> tokens = JCasUtil.select(jcas, Token.class);

    if (isTest) {

        Instances trainData = null;
        Classifier cl = null;/*from w ww.j  a  va2  s.  c  o  m*/
        try {
            trainData = getInstances(trainingArff);

            cl = getClassifier();

            //                SpreadSubsample spread = new SpreadSubsample();
            //                spread.setDistributionSpread(1.0);
            //                    
            //                FilteredClassifier fc = new FilteredClassifier();
            //                fc.setFilter(spread);
            //                fc.setClassifier(cl);

            cl.buildClassifier(trainData);
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }

        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                List<String> classValues = new ArrayList<String>();
                for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) {
                    classValues.add(e.nextElement().toString());
                }

                // build classifier from training arff and classify
                try {
                    weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance,
                            classValues);
                    System.out.println(wekaInstance);
                    double prediction = cl.classifyInstance(wekaInstance);

                    // prediction is the index in the class labels, not the class label itself!
                    String outcome = trainData.classAttribute().value(new Double(prediction).intValue());

                    if (!tokenString.equals(outcome)) {
                        SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd());
                        ann.setCategory(errorClass);
                        ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome));
                        ann.addToIndexes();
                    }
                } catch (Exception e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
    } else {
        for (Token token : tokens) {
            String tokenString = token.getCoveredText();
            if (tokenString.length() > 0 && confusionSet.contains(tokenString)) {
                Instance<String> instance = new Instance<String>();
                for (SimpleFeatureExtractor featExt : featureExtractors) {
                    instance.addAll(featExt.extract(jcas, token));
                }

                instance.setOutcome(tokenString);

                // we also need to add a negative example
                // choose it randomly from the confusion set without the actual token
                // TODO implement negative examples

                this.dataWriter.write(instance);
            }
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java

License:Apache License

/**
 * <p>/*from   w ww.j av a  2 s. co  m*/
 * Applies MORPH to a single instance
 * </p>
 *
 * @param instance
 *            instance that is morphed
 * @param data
 *            data based on which the instance is morphed
 */
public void morphInstance(Instance instance, Instances data) {
    Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data);
    if (nearestUnlikeNeighbor == null) {
        throw new RuntimeException(
                "could not find nearest unlike neighbor within the data: " + data.relationName());
    }
    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
            double randVal = rand.nextDouble() * (beta - alpha) + alpha;
            instance.setValue(j,
                    instance.value(j) + randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j)));
        }
    }
}

From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java

License:Apache License

/**
 * <p>/*from ww w . java 2  s .c om*/
 * Determines the nearest unlike neighbor of an instance.
 * </p>
 *
 * @param instance
 *            instance to which the nearest unlike neighbor is determined
 * @param data
 *            data where the nearest unlike neighbor is determined from
 * @return nearest unlike instance
 */
public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
    Instance nearestUnlikeNeighbor = null;

    double[] instanceVector = new double[data.numAttributes() - 1];
    int tmp = 0;
    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
            instanceVector[tmp] = instance.value(j);
        }
    }

    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < data.numInstances(); i++) {
        if (instance.classValue() != data.instance(i).classValue()) {
            double[] otherVector = new double[data.numAttributes() - 1];
            tmp = 0;
            for (int j = 0; j < data.numAttributes(); j++) {
                if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
                    otherVector[tmp++] = data.instance(i).value(j);
                }
            }
            if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
                minDistance = MathArrays.distance(instanceVector, otherVector);
                nearestUnlikeNeighbor = data.instance(i);
            }
        }
    }
    return nearestUnlikeNeighbor;
}

From source file:de.ugoe.cs.cpdp.dataselection.CLIFF.java

License:Apache License

/**
 * <p>/*  w w w  .  jav  a 2  s  .  c  o m*/
 * Applies the CLIFF relevancy filter to the data.
 * </p>
 *
 * @param data
 *            the data
 * @return CLIFF-filtered data
 */
protected Instances applyCLIFF(Instances data) {
    final double[][] powerAttributes = new double[data.size()][data.numAttributes()];
    final double[] powerEntity = new double[data.size()];

    final int[] counts = data.attributeStats(data.classIndex()).nominalCounts;
    final double probDefect = data.numInstances() / (double) counts[1];

    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute()) {
            final double[] ranges = getRanges(data, j);
            final double[] probDefectRange = getRangeProbabilities(data, j, ranges);

            for (int i = 0; i < data.numInstances(); i++) {
                final double value = data.instance(i).value(j);
                final int range = determineRange(ranges, value);
                double probClass, probNotClass, probRangeClass, probRangeNotClass;
                if (data.instance(i).classValue() == 1) {
                    probClass = probDefect;
                    probNotClass = 1.0 - probDefect;
                    probRangeClass = probDefectRange[range];
                    probRangeNotClass = 1.0 - probDefectRange[range];
                } else {
                    probClass = 1.0 - probDefect;
                    probNotClass = probDefect;
                    probRangeClass = 1.0 - probDefectRange[range];
                    probRangeNotClass = probDefectRange[range];
                }
                powerAttributes[i][j] = Math.pow(probRangeClass, 2.0)
                        / (probRangeClass * probClass + probRangeNotClass * probNotClass);
            }
        }
    }

    for (int i = 0; i < data.numInstances(); i++) {
        powerEntity[i] = 1.0;
        for (int j = 0; j < data.numAttributes(); j++) {
            powerEntity[i] *= powerAttributes[i][j];
        }
    }
    double[] sortedPower = powerEntity.clone();
    Arrays.sort(sortedPower);
    double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))];

    final Instances selected = new Instances(data);
    selected.delete();
    for (int i = 0; i < data.numInstances(); i++) {
        if (powerEntity[i] >= cutOff) {
            selected.add(data.instance(i));
        }
    }
    return selected;
}