Example usage for weka.core Instance setValue

Introduction

In this page you can find the example usage for weka.core Instance setValue.

Prototype

public void setValue(Attribute att, String value);

Source Link

Document

Sets a value of an nominal or string attribute to the given value.

Usage

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {/*w w  w .  j a  v  a 2 s.  com*/
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering./*  w w w  .  j  a  v  a  2s.  c  o  m*/
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(5);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());

        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    System.out.println("Instances: " + instances.toSummaryString());
    System.out.println("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This is an auxiliary function that prepares the clustering data set. The
 * events must be translated to instances of the data set that can be used for
 * clustering.// w w  w.  java2  s .c  o  m
 * 
 * @param isolated
 *          The list of the events containing an isolated appliance.
 * @return The instances of the data
 * @throws Exception
 */
private Instances createInstances(ArrayList<Event> isolated) throws Exception {
    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiffRise");
    Attribute qDiffRise = new Attribute("qDiffRise");
    Attribute pDiffReduce = new Attribute("pDiffReduce");
    Attribute qDiffReduce = new Attribute("qDiffReduce");
    Attribute duration = new Attribute("duration");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);
    attr.add(qDiffRise);
    attr.add(pDiffReduce);
    attr.add(qDiffReduce);
    attr.add(duration);

    Instances instances = new Instances("Isolated", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (Event event : isolated) {

        Instance inst = new DenseInstance(6);
        inst.setValue(id, event.getId());
        inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff());
        inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff());
        inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff());
        inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff());
        inst.setValue(duration, event.getEndMinute() - event.getStartMinute());
        instances.add(inst);

    }

    int n = Constants.MAX_CLUSTERS_NUMBER;
    Instances newInst = null;

    log.info("Instances: " + instances.toSummaryString());
    log.info("Max Clusters: " + n);

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) {

        HierarchicalClusterer clusterer = new HierarchicalClusterer();

        String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" };

        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setNumClusters(n);
        clusterer.setOptions(opt);
        clusterer.setPrintNewick(true);
        clusterer.setDebug(true);

        // clusterer.getOptions();

        addcluster.setClusterer(clusterer);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    } else {

        SimpleKMeans kmeans = new SimpleKMeans();

        kmeans.setSeed(10);

        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(n);
        kmeans.buildClusterer(instances);

        addcluster.setClusterer(kmeans);
        addcluster.setInputFormat(instances);
        addcluster.setIgnoredAttributeIndices("1");

        // Cluster data set
        newInst = Filter.useFilter(instances, addcluster);

    }

    return newInst;

}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * // w w w . ja va 2  s .c  o m
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:examples.TrainerFrame.java

private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed
    //This is a temporary fix to make it appear like its finished

    pBar.setMaximum(7);/*from   ww w .ja v a  2  s.  c om*/
    pBar.setValue(0);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Target Features");
    //Generate Target Features
    String featuresTarget = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(1);
    pBar.repaint();
    jLabelTrainerStatus.setText("Extracting Other Features");

    //Generate Non-targe features Features
    String featuresOther = null;
    new Thread(new TrainerFrame.thread1()).start();
    try {
        featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText());
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(2);
    pBar.repaint();
    jLabelTrainerStatus.setText("Parsing Features");

    //Load Target Arrf File
    BufferedReader readerTarget;
    Instances dataTarget = null;
    try {
        readerTarget = new BufferedReader(new FileReader(featuresTarget));
        dataTarget = new Instances(readerTarget);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(3);
    pBar.repaint();
    //Load Other Arrf File
    BufferedReader readerOther;
    Instances dataOther = null;
    try {
        readerOther = new BufferedReader(new FileReader(featuresOther));
        dataOther = new Instances(readerOther);
    } catch (FileNotFoundException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(4);
    pBar.repaint();
    jLabelTrainerStatus.setText("Training Classifier");

    Instances newData = new Instances(dataTarget);
    FastVector typeList = new FastVector() {
    };
    typeList.add("target");
    typeList.add("other");
    newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            newData.numAttributes());
    for (Instance instance : newData) {
        instance.setValue(newData.numAttributes() - 1, "target");
    }

    dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList),
            dataOther.numAttributes());
    for (Instance instance : dataOther) {
        instance.setValue(newData.numAttributes() - 1, "other");
        newData.add(instance);
    }

    newData.setClassIndex(newData.numAttributes() - 1);
    pBar.setValue(5);
    pBar.repaint();
    ArffSaver saver = new ArffSaver();
    saver.setInstances(newData);
    try {
        saver.setFile(new File("AnimalCallTrainingFile.arff"));
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        saver.writeBatch();
    } catch (IOException ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    pBar.setValue(6);
    pBar.repaint();
    //Train a classifier
    String[] options = new String[1];
    options[0] = "-U";
    J48 tree = new J48();
    try {
        tree.setOptions(options);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }
    try {
        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex);
    }

    Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree);
    System.out.println("classifier saved");
    MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText());
    GlobalData.classifierList.addElement(tempClass.name);
    pBar.setValue(7);
    pBar.repaint();
    jLabelTrainerStatus.setText("Finished");

}

From source file:expshell.NeuralClass.java

public static void main(String[] args) throws Exception {
    Instance testInst = new Instance(5);
    testInst.setValue(0, 2.1);
    testInst.setValue(1, 3.1);//from w ww . j  a  v a 2 s .  c  o  m
    testInst.setValue(2, 4.1);
    testInst.setValue(3, 5.1);
    //the class
    testInst.setValue(4, 0.0);

    Layer l = new Layer(5, testInst.numAttributes() - 1);

    l.computeNode(testInst);

}

From source file:eyetracker.ServerCommunicator.java

public Instance getInput() {
    // For all the attribute, initialize them.
    int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes();
    Instance instance = new SparseInstance(totalAttribute);
    instance.setDataset(MLPProcessor.inst);
    String[] attributes = unifiedData.split(",");
    //String[] attributes = examData.split(",");
    for (int i = 0; i < totalAttribute - 1; i++) {
        instance.setValue(i, Double.valueOf(attributes[i]));
    }/*  w  w w .j  a v a2  s .c  om*/
    return instance;
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

public Instances createReliefInput(ArrayList<ArrayList<Double>> dataset, String[] featureNames_Arr,
        ArrayList<Double> labels) {

    this.featureNames_Arr = featureNames_Arr;
    // create attributes
    FastVector fv = new FastVector();
    for (int i = 0; i <= featureNames_Arr.length; i++) {
        if (i == featureNames_Arr.length) {
            fv.addElement(new Attribute("@@class@@"));
            continue;
        }//w ww  .  ja v a 2  s.  c o  m
        fv.addElement(new Attribute(featureNames_Arr[i]));
    }

    // transform dataset so that each line represents each window - add
    // class label as well
    ArrayList<ArrayList<Double>> ReliefInput = new ArrayList<ArrayList<Double>>();
    for (int i = 0; i < dataset.get(0).size(); i++) {
        ArrayList<Double> featT = new ArrayList<Double>();
        for (int j = 0; j < dataset.size(); j++) {
            featT.add(dataset.get(j).get(i));
        }
        featT.add(labels.get(i));
        ReliefInput.add(featT);
    }

    // transform dataset into Instances type
    Instances ReliefInstances = new Instances("Features", fv, dataset.size());
    for (int i = 0; i < ReliefInput.size(); i++) {
        double[] vals = CollectionUtilities.listToArray(ReliefInput.get(i));

        Instance instWeka = new Instance(vals.length);

        for (int j = 0; j < vals.length; j++) {
            instWeka.setValue(j, vals[j]);
        }
        ReliefInstances.add(instWeka);
    }
    ReliefInstances.setClassIndex(ReliefInstances.numAttributes() - 1);

    return ReliefInstances;

}

From source file:ffnn.TucilWeka.java

public static Instances createInstances(int max) {
    //List of Attributes dan List of Class untuk Header
    //Jumlah attributes: 4 jika tanpa class, 5 jika dengan class
    ArrayList<Attribute> attrs = new ArrayList<Attribute>(5);
    ArrayList<String> classVal = new ArrayList<String>();

    //Menambahkkan class yang mungkin ke List
    classVal.add("Iris-setosa");
    classVal.add("Iris-versicolor");
    classVal.add("Iris-virginica");

    //Menambahkan attributes ke List
    Attribute sepallength = new Attribute("sepallength");
    attrs.add(sepallength); //Numeric Attributes
    Attribute sepalwidth = new Attribute("sepalwidth");
    attrs.add(sepalwidth); //Numeric Attributes
    Attribute petallength = new Attribute("petallength");
    attrs.add(petallength); //Numeric Attributes
    Attribute petalwidth = new Attribute("petalwidth");
    attrs.add(petalwidth); //Numeric Attributes
    Attribute classValue = new Attribute("@@class@@", classVal);
    attrs.add(classValue); //String Attributes

    //Pembuatan/* ww w  . ja  v a2 s. c  o  m*/
    //Constructor dengan param Nama, List of Attribute, size
    Instances dataRaw = new Instances("irisNew", attrs, 0); //Instances kosong
    dataRaw.setClassIndex(dataRaw.numAttributes() - 1);
    Scanner scan = new Scanner(System.in);

    for (int i = 0; i < max; i++) {
        //Weka mennyimpan instance sebagai double
        double temp;
        Instance inst = new DenseInstance(dataRaw.numAttributes());
        System.out.println("Sepallength:");
        temp = scan.nextDouble();
        inst.setValue(sepallength, temp);
        System.out.println("Sepalwidth:");
        temp = scan.nextDouble();
        inst.setValue(sepalwidth, temp);
        System.out.println("Petallegth:");
        temp = scan.nextDouble();
        inst.setValue(petallength, temp);
        System.out.println("Petalwidth:");
        temp = scan.nextDouble();
        inst.setValue(petalwidth, temp);

        //System.out.println("Masukan kelima:");
        //temp = scan.nextDouble();
        //0 -> setosa, 1 -> vesicolor, 2-> virginica
        //instS.setValue(classValue, temp); //tidak dibutuhkan sebenarnya

        //Menambahkan instance ke instances
        dataRaw.add(inst);
    }
    return dataRaw;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance.//from   ww w. j a  v  a2 s  .com
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}