Example usage for weka.core Instances instance

Introduction

In this page you can find the example usage for weka.core Instances instance.

Prototype



publicInstance instance(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:data.generation.target.utils.PrincipalComponents.java

License:Open Source License

/**
 * Gets the transformed training data./*w w w  .  jav  a2s .c o  m*/
 * @return the transformed training data
 * @throws Exception if transformed data can't be returned
 */
public Instances transformedData(Instances data) throws Exception {
    if (m_eigenvalues == null) {
        throw new Exception("Principal components hasn't been built yet");
    }

    Instances output = null;

    if (m_transBackToOriginal) {
        output = new Instances(m_originalSpaceFormat);
    } else {
        output = new Instances(m_transformedFormat);
    }
    for (int i = 0; i < data.numInstances(); i++) {
        Instance converted = convertInstance(data.instance(i));
        output.add(converted);
    }

    return output;
}

From source file:data.statistics.MILStatistics.java

License:Open Source License

/**
 * Calculates various MIML statistics, such as instancesPerBag and
 * attributesPerBag// w  w w.  j  ava 2s  .  c om
 * 
 * @param dataSet
 *            A MIL dataset
 */
public void calculateStats(Instances dataSet) {
    numBags = dataSet.numInstances();
    attributesPerBag = dataSet.instance(0).relationalValue(1).numAttributes();
    minInstancesPerBag = Integer.MAX_VALUE;
    maxInstancesPerBag = Integer.MIN_VALUE;

    // Each pair <Integer, Integer> stores <numberOfInstances, numberOfBags>
    distributionBags = new HashMap<Integer, Integer>();
    for (int i = 0; i < numBags; i++) {
        int nInstances = dataSet.instance(i).relationalValue(1).numInstances();
        if (nInstances < minInstancesPerBag) {
            minInstancesPerBag = nInstances;
        }
        if (nInstances > maxInstancesPerBag) {
            maxInstancesPerBag = nInstances;
        }
        if (distributionBags.containsKey(nInstances)) {
            distributionBags.put(nInstances, distributionBags.get(nInstances) + 1);
        } else {
            distributionBags.put(nInstances, 1);
        }
    }

    avgInstancesPerBag = 0.0;
    for (Integer set : distributionBags.keySet()) {
        avgInstancesPerBag += set * distributionBags.get(set);
    }
    avgInstancesPerBag = avgInstancesPerBag / numBags;
}

From source file:data.statistics.MLStatistics.java

License:Open Source License

/**
 * Calculates Phi and Chi-square correlation matrix.
 *
 * @param dataSet//w w  w . j  av a  2  s  .  c o  m
 *            A multi-label dataset.
 * @throws java.lang.Exception
 *             To be handled in an upper level.
 */
public void calculatePhiChi2(MultiLabelInstances dataSet) throws Exception {
    numLabels = dataSet.getNumLabels();

    // The indices of the label attributes
    int[] labelIndices;

    labelIndices = dataSet.getLabelIndices();
    numLabels = dataSet.getNumLabels();
    phi = new double[numLabels][numLabels];
    chi2 = new double[numLabels][numLabels];

    Remove remove = new Remove();
    remove.setInvertSelection(true);
    remove.setAttributeIndicesArray(labelIndices);
    remove.setInputFormat(dataSet.getDataSet());
    Instances result = Filter.useFilter(dataSet.getDataSet(), remove);
    result.setClassIndex(result.numAttributes() - 1);

    for (int i = 0; i < numLabels; i++) {
        int a[] = new int[numLabels];
        int b[] = new int[numLabels];
        int c[] = new int[numLabels];
        int d[] = new int[numLabels];
        double e[] = new double[numLabels];
        double f[] = new double[numLabels];
        double g[] = new double[numLabels];
        double h[] = new double[numLabels];
        for (int j = 0; j < result.numInstances(); j++) {
            for (int l = 0; l < numLabels; l++) {
                if (result.instance(j).stringValue(i).equals("0")) {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        a[l]++;
                    } else {
                        c[l]++;
                    }
                } else {
                    if (result.instance(j).stringValue(l).equals("0")) {
                        b[l]++;
                    } else {
                        d[l]++;
                    }
                }
            }
        }
        for (int l = 0; l < numLabels; l++) {
            e[l] = a[l] + b[l];
            f[l] = c[l] + d[l];
            g[l] = a[l] + c[l];
            h[l] = b[l] + d[l];
            double mult = e[l] * f[l] * g[l] * h[l];
            double denominator = Math.sqrt(mult);
            double nominator = a[l] * d[l] - b[l] * c[l];
            phi[i][l] = nominator / denominator;
            chi2[i][l] = phi[i][l] * phi[i][l] * (a[l] + b[l] + c[l] + d[l]);
        }
    }
}

From source file:DataMiningLogHistoriKIRI.DecisionTree.java

public String[] id3(Instances arff) {
    J48 tree = new J48();
    try {//from   w w  w. j  ava 2  s. c  om
        tree.buildClassifier(arff);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }
    System.out.println(tree.toString());

    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6));
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances());
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    System.out.println("Confident = " + confident + "%");

    String[] result2 = new String[5];
    return result2;
}

From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java

public double calculateConfiden(Instances arff) {
    // mengecek confiden
    int nilaiBenar = 0, resultInt;
    float result = 0;
    for (int i = 0; i < arff.numInstances(); i++) {
        try {/*  www. j  a  v a  2s . co  m*/
            result = (float) tree.classifyInstance(arff.instance(i));
            resultInt = Math.round(result);
            if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) {
                nilaiBenar++;
            }
        } catch (Exception ex) {
            Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    double confident = nilaiBenar * 1.0 / arff.numInstances() * 100;
    return confident;
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();/*from w  ww. j  a  v a  2 s  . c om*/

    Classifier classifier = getInferenceModel().getClassifier();
    HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (TrackSegment segment : inferenceDataSet) {
            Instance instance = getInstance(segment);
            unlabeledInstances.add(instance);
            segmentList.add(segment);
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java

License:Open Source License

@Override
protected void handle() {
    clearResults();/*from  w  w  w .  j av  a  2 s.  c om*/

    Classifier classifier = getInferenceModel().getClassifier();
    Collection<Attribute> attributeList = getInferenceModel().getAttributes();

    if (!attributeList.isEmpty()) {
        Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet();
        setClassesToView(keySet);

        Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList),
                0); //NO18N
        unlabeledInstances.setClassIndex(0);

        ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>();
        for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet()
                .entrySet()) {
            for (TrackSegment segment : entry.getValue()) {
                segment.setLabel(entry.getKey());
                Instance instance = getInstance(segment);
                unlabeledInstances.add(instance);
                segmentList.add(segment);
            }
        }

        // create copy
        Instances labeledInstances = new Instances(unlabeledInstances);

        for (int index = 0; index < labeledInstances.numInstances(); index++) {
            try {
                Instance instance = labeledInstances.instance(index);

                // classify instance
                double classifyed = classifier.classifyInstance(instance);
                instance.setClassValue(classifyed);

                // get class label
                String value = unlabeledInstances.classAttribute().value((int) classifyed);

                if (index < segmentList.size()) {
                    instanceToTrackSegmentMap.put(instance, segmentList.get(index));
                }

                // put label and instance to result map
                put(value, instance);

            } catch (Exception ex) {
                Exceptions.printStackTrace(ex);
            }
        }

        // update visw
        updateVisualRepresentation();

        // update result set of the inferenceModel
        for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) {
            HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>();
            for (Instance instance : entry.getValue()) {
                TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance);
                if (trackSegment != null) {
                    trackSegmentList.add(trackSegment);
                }
            }

            // only those classes are put into  the result data set, which are not empty
            if (!trackSegmentList.isEmpty()) {
                getInferenceModel().getResult().put(entry.getKey(), trackSegmentList);
            }
        }
    } else {
        throw new InferenceModelClassifyException(MessageFormat
                .format("No attributes available. Attribute list lengeth == {0}", attributeList.size()));
    }
    resultMap.clear();
    instanceToTrackSegmentMap.clear();
}

From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java

License:Apache License

/**
 *
 * This method applies a serialized WEKA model file to an unlabeld .arff file for classification
 *
 *
 * @param input_arff the annotated gold standard in an .arff file
 * @param model output file for the model
 * @param output output file for evaluation of trained classifier (10-fold cross validation)
 * @throws Exception/*from w w w.  j  a v a2  s  . com*/
 */

public static void applyModelToUnlabeledArff(String input_arff, String model, String output) throws Exception {
    DataSource source = new DataSource(input_arff);
    Instances unlabeled = source.getDataSet();
    if (unlabeled.classIndex() == -1) {
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);
    }

    Remove rm = new Remove();
    rm.setAttributeIndices("1"); // remove ID  attribute

    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(model));
    Classifier cls = (Classifier) ois.readObject();
    ois.close();
    // create copy
    Instances labeled = new Instances(unlabeled);

    // label instances
    for (int i = 0; i < unlabeled.numInstances(); i++) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }
    // save labeled data
    BufferedWriter writer = new BufferedWriter(new FileWriter(output));
    writer.write(labeled.toString());
    writer.newLine();
    writer.flush();
    writer.close();

}

From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java

License:Open Source License

public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception {
    // Set parameters
    int folds = 10;
    Classifier baseClassifier = getClassifier(wekaClassifier);

    // Set up the random number generator
    long seed = new Date().getTime();
    Random random = new Random(seed);

    // Add IDs to the instances
    AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o",
            MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" });
    Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff");
    data.setClassIndex(data.numAttributes() - 1);

    // Instantiate the Remove filter
    Remove removeIDFilter = new Remove();
    removeIDFilter.setAttributeIndices("first");

    // Randomize the data
    data.randomize(random);//from   w  ww .  j ava 2s .com

    // Perform cross-validation
    Instances predictedData = null;
    Evaluation eval = new Evaluation(data);

    for (int n = 0; n < folds; n++) {
        Instances train = data.trainCV(folds, n, random);
        Instances test = data.testCV(folds, n);

        // Apply log filter
        //          Filter logFilter = new LogFilter();
        //           logFilter.setInputFormat(train);
        //           train = Filter.useFilter(train, logFilter);        
        //           logFilter.setInputFormat(test);
        //           test = Filter.useFilter(test, logFilter);

        // Copy the classifier
        Classifier classifier = AbstractClassifier.makeCopy(baseClassifier);

        // Instantiate the FilteredClassifier
        FilteredClassifier filteredClassifier = new FilteredClassifier();
        filteredClassifier.setFilter(removeIDFilter);
        filteredClassifier.setClassifier(classifier);

        // Build the classifier
        filteredClassifier.buildClassifier(train);

        // Evaluate
        eval.evaluateModel(filteredClassifier, test);

        // Add predictions
        AddClassification filter = new AddClassification();
        filter.setClassifier(filteredClassifier);
        filter.setOutputClassification(true);
        filter.setOutputDistribution(false);
        filter.setOutputErrorFlag(true);
        filter.setInputFormat(train);
        Filter.useFilter(train, filter); // trains the classifier

        Instances pred = Filter.useFilter(test, filter); // performs predictions on test set
        if (predictedData == null)
            predictedData = new Instances(pred, 0);
        for (int j = 0; j < pred.numInstances(); j++)
            predictedData.add(pred.instance(j));
    }

    // Prepare output classification
    String[] scores = new String[predictedData.numInstances()];

    for (Instance predInst : predictedData) {
        int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1;

        int valueIdx = predictedData.numAttributes() - 2;

        String value = predInst.stringValue(predInst.attribute(valueIdx));

        scores[id] = value;
    }

    // Output
    StringBuilder sb = new StringBuilder();
    for (String score : scores)
        sb.append(score.toString() + LF);

    FileUtils.writeStringToFile(
            new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"),
            sb.toString());
}

From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java

License:Apache License

/**
 * <p>/* w ww  . j ava  2  s . com*/
 * Determines the nearest unlike neighbor of an instance.
 * </p>
 *
 * @param instance
 *            instance to which the nearest unlike neighbor is determined
 * @param data
 *            data where the nearest unlike neighbor is determined from
 * @return nearest unlike instance
 */
public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) {
    Instance nearestUnlikeNeighbor = null;

    double[] instanceVector = new double[data.numAttributes() - 1];
    int tmp = 0;
    for (int j = 0; j < data.numAttributes(); j++) {
        if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
            instanceVector[tmp] = instance.value(j);
        }
    }

    double minDistance = Double.MAX_VALUE;
    for (int i = 0; i < data.numInstances(); i++) {
        if (instance.classValue() != data.instance(i).classValue()) {
            double[] otherVector = new double[data.numAttributes() - 1];
            tmp = 0;
            for (int j = 0; j < data.numAttributes(); j++) {
                if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) {
                    otherVector[tmp++] = data.instance(i).value(j);
                }
            }
            if (MathArrays.distance(instanceVector, otherVector) < minDistance) {
                minDistance = MathArrays.distance(instanceVector, otherVector);
                nearestUnlikeNeighbor = data.instance(i);
            }
        }
    }
    return nearestUnlikeNeighbor;
}