Example usage for weka.core Instance setValue

List of usage examples for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {/*w w  w .  j a  v  a 2 s.  com*/
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*  w w w  .  j  a  v  a  2s.  c  o  m*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.// w w  w.  java2  s .c  o  m
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * // w w w . ja va 2  s .c  o m
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:examples.TrainerFrame.java

private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed
    //This is a temporary fix to make it appear like its finished

    pBar.setMaximum(7);/*from   ww w .ja v a  2  s.  c om*/
    pBar.setValue(0);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Target Features");
    //Generate Target Features
    String featuresTarget = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(1);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Other Features");

    //Generate Non-targe features Features
    String featuresOther = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(2);
    pBar.repaint();
    jLabelTrainerStatus.setText("Parsing Features");

    //Load Target Arrf File
    BufferedReader readerTarget;
    Instances dataTarget = null;
    try {
        readerTarget = new BufferedReader(new FileReader(featuresTarget));
        dataTarget = new Instances(readerTarget);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(3);
    pBar.repaint();
    //Load Other Arrf File
    BufferedReader readerOther;
    Instances dataOther = null;
    try {
        readerOther = new BufferedReader(new FileReader(featuresOther));
        dataOther = new Instances(readerOther);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(4);
    pBar.repaint();
    jLabelTrainerStatus.setText("Training Classifier");

    Instances newData = new Instances(dataTarget);
    FastVector typeList = new FastVector() {
    };
    typeList.add("target");
    typeList.add("other");
    newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            newData.numAttributes());
    for (Instance instance : newData) {
        instance.setValue(newData.numAttributes() - 1, "target");
    }

    dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            dataOther.numAttributes());
    for (Instance instance : dataOther) {
        instance.setValue(newData.numAttributes() - 1, "other");
        newData.add(instance);
    }

    newData.setClassIndex(newData.numAttributes() - 1);
    pBar.setValue(5);
    pBar.repaint();
    ArffSaver saver = new ArffSaver();
    saver.setInstances(newData);
    try {
        saver.setFile(new File("AnimalCallTrainingFile.arff"));
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(6);
    pBar.repaint();
    //Train a classifier
    String[] options = new String[1];
    options[0] = "-U";
    J48 tree = new J48();
    try {
        tree.setOptions(options);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree);
    System.out.println("classifier saved");
    MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText());
    GlobalData.classifierList.addElement(tempClass.name);
    pBar.setValue(7);
    pBar.repaint();
    jLabelTrainerStatus.setText("Finished");

}

From source file:expshell.NeuralClass.java

public static void main(String[] args) throws Exception {
    Instance testInst = new Instance(5);
    testInst.setValue(0, 2.1);
    testInst.setValue(1, 3.1);//from w ww . j  a  v a 2 s .  c  o  m
    testInst.setValue(2, 4.1);
    testInst.setValue(3, 5.1);
    //the class
    testInst.setValue(4, 0.0);

    Layer l = new Layer(5, testInst.numAttributes() - 1);

    l.computeNode(testInst);

}

From source file:eyetracker.ServerCommunicator.java

public Instance getInput() {
    // For all the attribute, initialize them.
    int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes();
    Instance instance = new SparseInstance(totalAttribute);
    instance.setDataset(MLPProcessor.inst);
    String[] attributes = unifiedData.split(",");
    //String[] attributes = examData.split(",");
    for (int i = 0; i < totalAttribute - 1; i++) {
        instance.setValue(i, Double.valueOf(attributes[i]));
    }/*  w  w w .j  a v a2  s .c  om*/
    return instance;
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

public Instances createReliefInput(ArrayList<ArrayList<Double>> dataset, String[] featureNames_Arr,
        ArrayList<Double> labels) {

    this.featureNames_Arr = featureNames_Arr;
    // create attributes
    FastVector fv = new FastVector();
    for (int i = 0; i <= featureNames_Arr.length; i++) {
        if (i == featureNames_Arr.length) {
            fv.addElement(new Attribute("@@class@@"));
            continue;
        }//w ww  .  ja v a 2  s.  c o  m
        fv.addElement(new Attribute(featureNames_Arr[i]));
    }

    // transform dataset so that each line represents each window - add
    // class label as well
    ArrayList<ArrayList<Double>> ReliefInput = new ArrayList<ArrayList<Double>>();
    for (int i = 0; i < dataset.get(0).size(); i++) {
        ArrayList<Double> featT = new ArrayList<Double>();
        for (int j = 0; j < dataset.size(); j++) {
            featT.add(dataset.get(j).get(i));
        }
        featT.add(labels.get(i));
        ReliefInput.add(featT);
    }

    // transform dataset into Instances type
    Instances ReliefInstances = new Instances("Features", fv, dataset.size());
    for (int i = 0; i < ReliefInput.size(); i++) {
        double[] vals = CollectionUtilities.listToArray(ReliefInput.get(i));

        Instance instWeka = new Instance(vals.length);

        for (int j = 0; j < vals.length; j++) {
            instWeka.setValue(j, vals[j]);
        }
        ReliefInstances.add(instWeka);
    }
    ReliefInstances.setClassIndex(ReliefInstances.numAttributes() - 1);

    return ReliefInstances;

}

From source file:ffnn.TucilWeka.java

public static Instances createInstances(int max) {
    //List of Attributes dan List of Class untuk Header
    //Jumlah attributes: 4 jika tanpa class, 5 jika dengan class
    ArrayList<Attribute> attrs = new ArrayList<Attribute>(5);
    ArrayList<String> classVal = new ArrayList<String>();

    //Menambahkkan class yang mungkin ke List
    classVal.add("Iris-setosa");
    classVal.add("Iris-versicolor");
    classVal.add("Iris-virginica");

    //Menambahkan attributes ke List
    Attribute sepallength = new Attribute("sepallength");
    attrs.add(sepallength); //Numeric Attributes
    Attribute sepalwidth = new Attribute("sepalwidth");
    attrs.add(sepalwidth); //Numeric Attributes
    Attribute petallength = new Attribute("petallength");
    attrs.add(petallength); //Numeric Attributes
    Attribute petalwidth = new Attribute("petalwidth");
    attrs.add(petalwidth); //Numeric Attributes
    Attribute classValue = new Attribute("@@class@@", classVal);
    attrs.add(classValue); //String Attributes

    //Pembuatan/* ww w  . ja  v a2 s. c  o  m*/
    //Constructor dengan param Nama, List of Attribute, size
    Instances dataRaw = new Instances("irisNew", attrs, 0); //Instances kosong
    dataRaw.setClassIndex(dataRaw.numAttributes() - 1);
    Scanner scan = new Scanner(System.in);

    for (int i = 0; i < max; i++) {
        //Weka mennyimpan instance sebagai double
        double temp;
        Instance inst = new DenseInstance(dataRaw.numAttributes());
        System.out.println("Sepallength:");
        temp = scan.nextDouble();
        inst.setValue(sepallength, temp);
        System.out.println("Sepalwidth:");
        temp = scan.nextDouble();
        inst.setValue(sepalwidth, temp);
        System.out.println("Petallegth:");
        temp = scan.nextDouble();
        inst.setValue(petallength, temp);
        System.out.println("Petalwidth:");
        temp = scan.nextDouble();
        inst.setValue(petalwidth, temp);

        //System.out.println("Masukan kelima:");
        //temp = scan.nextDouble();
        //0 -> setosa, 1 -> vesicolor, 2-> virginica
        //instS.setValue(classValue, temp); //tidak dibutuhkan sebenarnya

        //Menambahkan instance ke instances
        dataRaw.add(inst);
    }
    return dataRaw;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance.//from   ww w. j a  v  a2 s  .com
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}