List of usage examples for weka.core Instance setValue
public void setValue(Attribute att, String value);
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
public static void main(String[] args) { //Create a test dataset ArrayList<Attribute> attributes = new ArrayList<Attribute>(); attributes.add(new Attribute("message", (ArrayList<String>) null)); attributes.add(new Attribute("id")); {/*w w w . j a v a 2 s. com*/ ArrayList<String> classValues = new ArrayList<String>(); classValues.add("0"); classValues.add("1"); attributes.add(new Attribute("class", classValues)); } Instances instances = new Instances("test", attributes, 0); instances.setClassIndex(2); String[] messages = new String[] { "No emoticons here", "I have a smiley :)", "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" }; for (int i = 0; i < messages.length; i++) { Instance instance = new DenseInstance(instances.numAttributes()); instance.setValue(instances.attribute(0), messages[i]); instance.setValue(instances.attribute(1), i); instance.setValue(instances.attribute(2), Integer.toString(i % 2)); instances.add(instance); } System.out.println("Before filter:"); for (int i = 0; i < instances.size(); i++) { System.out.println(instances.instance(i).toString()); } try { String dictionaryName = "emoticons.txt"; StringToDictionaryVector filter = new StringToDictionaryVector(); List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName)); filter.setTermList(termList); filter.setMinTermFreq(1); filter.setTFTransform(true); filter.setIDFTransform(true); filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER)); filter.setOutputWordCounts(true); filter.setStringAttribute("message"); filter.setInputFormat(instances); Instances trans1 = Filter.useFilter(instances, filter); Instances trans2 = Filter.useFilter(instances, filter); System.out.println("\nFirst application:"); System.out.println(trans1.toString()); System.out.println("\nSecond application:"); System.out.println(trans2.toString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering./* w w w . j a v a 2s. c o m*/ * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(5); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; System.out.println("Instances: " + instances.toSummaryString()); System.out.println("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java
License:Apache License
/** * This is an auxiliary function that prepares the clustering data set. The * events must be translated to instances of the data set that can be used for * clustering.// w w w. java2 s .c o m * * @param isolated * The list of the events containing an isolated appliance. * @return The instances of the data * @throws Exception */ private Instances createInstances(ArrayList<Event> isolated) throws Exception { // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiffRise"); Attribute qDiffRise = new Attribute("qDiffRise"); Attribute pDiffReduce = new Attribute("pDiffReduce"); Attribute qDiffReduce = new Attribute("qDiffReduce"); Attribute duration = new Attribute("duration"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); attr.add(qDiffRise); attr.add(pDiffReduce); attr.add(qDiffReduce); attr.add(duration); Instances instances = new Instances("Isolated", attr, 0); // Each event is translated to an instance with the above attributes for (Event event : isolated) { Instance inst = new DenseInstance(6); inst.setValue(id, event.getId()); inst.setValue(pDiffRise, event.getRisingPoints().get(0).getPDiff()); inst.setValue(qDiffRise, event.getRisingPoints().get(0).getQDiff()); inst.setValue(pDiffReduce, event.getReductionPoints().get(0).getPDiff()); inst.setValue(qDiffReduce, event.getReductionPoints().get(0).getQDiff()); inst.setValue(duration, event.getEndMinute() - event.getStartMinute()); instances.add(inst); } int n = Constants.MAX_CLUSTERS_NUMBER; Instances newInst = null; log.info("Instances: " + instances.toSummaryString()); log.info("Max Clusters: " + n); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); if (instances.size() > Constants.KMEANS_LIMIT_NUMBER || instances.size() == 0) { HierarchicalClusterer clusterer = new HierarchicalClusterer(); String[] opt = { "-N", "" + n + "", "-P", "-D", "-L", "AVERAGE" }; clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setNumClusters(n); clusterer.setOptions(opt); clusterer.setPrintNewick(true); clusterer.setDebug(true); // clusterer.getOptions(); addcluster.setClusterer(clusterer); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } else { SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(n); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); } return newInst; }
From source file:eu.cassandra.utils.Utils.java
License:Apache License
/** * This function is used in order to create clusters of points of interest * based on the active power difference they have. * // w w w . ja va 2 s .c o m * @param pois * The list of points of interest that will be clustered. * @return The newly created clusters with the points that are comprising * them. * @throws Exception */ public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias) throws Exception { // Initialize the auxiliary variables ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>(); // Estimating the number of clusters that will be created int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST)) + bias; log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = " + numberOfClusters); // Create a new empty list of points for each cluster for (int i = 0; i < numberOfClusters; i++) result.add(new ArrayList<PointOfInterest>()); // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiff"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); Instances instances = new Instances("Points of Interest", attr, 0); // Each event is translated to an instance with the above attributes for (int i = 0; i < pois.size(); i++) { Instance inst = new DenseInstance(2); inst.setValue(id, i); inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff())); instances.add(inst); } // System.out.println(instances.toString()); Instances newInst = null; log.debug("Instances: " + instances.toSummaryString()); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(numberOfClusters); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); // System.out.println(newInst.toString()); // Parse through the dataset to see where each point is placed in the // clusters. for (int i = 0; i < newInst.size(); i++) { String cluster = newInst.get(i).stringValue(newInst.attribute(2)); cluster = cluster.replace("cluster", ""); log.debug("Point of Interest: " + i + " Cluster: " + cluster); result.get(Integer.parseInt(cluster) - 1).add(pois.get(i)); } // Sorting the each cluster points by their minutes. for (int i = result.size() - 1; i >= 0; i--) { if (result.get(i).size() == 0) result.remove(i); else Collections.sort(result.get(i), Constants.comp); } // Sorting the all clusters by their active power. Collections.sort(result, Constants.comp5); return result; }
From source file:examples.TrainerFrame.java
private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed //This is a temporary fix to make it appear like its finished pBar.setMaximum(7);/*from ww w .ja v a 2 s. c om*/ pBar.setValue(0); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Target Features"); //Generate Target Features String featuresTarget = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(1); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Other Features"); //Generate Non-targe features Features String featuresOther = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(2); pBar.repaint(); jLabelTrainerStatus.setText("Parsing Features"); //Load Target Arrf File BufferedReader readerTarget; Instances dataTarget = null; try { readerTarget = new BufferedReader(new FileReader(featuresTarget)); dataTarget = new Instances(readerTarget); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(3); pBar.repaint(); //Load Other Arrf File BufferedReader readerOther; Instances dataOther = null; try { readerOther = new BufferedReader(new FileReader(featuresOther)); dataOther = new Instances(readerOther); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(4); pBar.repaint(); jLabelTrainerStatus.setText("Training Classifier"); Instances newData = new Instances(dataTarget); FastVector typeList = new FastVector() { }; typeList.add("target"); typeList.add("other"); newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), newData.numAttributes()); for (Instance instance : newData) { instance.setValue(newData.numAttributes() - 1, "target"); } dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), dataOther.numAttributes()); for (Instance instance : dataOther) { instance.setValue(newData.numAttributes() - 1, "other"); newData.add(instance); } newData.setClassIndex(newData.numAttributes() - 1); pBar.setValue(5); pBar.repaint(); ArffSaver saver = new ArffSaver(); saver.setInstances(newData); try { saver.setFile(new File("AnimalCallTrainingFile.arff")); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(6); pBar.repaint(); //Train a classifier String[] options = new String[1]; options[0] = "-U"; J48 tree = new J48(); try { tree.setOptions(options); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { tree.buildClassifier(newData); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree); System.out.println("classifier saved"); MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText()); GlobalData.classifierList.addElement(tempClass.name); pBar.setValue(7); pBar.repaint(); jLabelTrainerStatus.setText("Finished"); }
From source file:expshell.NeuralClass.java
public static void main(String[] args) throws Exception { Instance testInst = new Instance(5); testInst.setValue(0, 2.1); testInst.setValue(1, 3.1);//from w ww . j a v a 2 s . c o m testInst.setValue(2, 4.1); testInst.setValue(3, 5.1); //the class testInst.setValue(4, 0.0); Layer l = new Layer(5, testInst.numAttributes() - 1); l.computeNode(testInst); }
From source file:eyetracker.ServerCommunicator.java
public Instance getInput() { // For all the attribute, initialize them. int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes(); Instance instance = new SparseInstance(totalAttribute); instance.setDataset(MLPProcessor.inst); String[] attributes = unifiedData.split(","); //String[] attributes = examData.split(","); for (int i = 0; i < totalAttribute - 1; i++) { instance.setValue(i, Double.valueOf(attributes[i])); }/* w w w .j a v a2 s .c om*/ return instance; }
From source file:FeatureSelection.ReliefFAttributeEval.java
License:Open Source License
public Instances createReliefInput(ArrayList<ArrayList<Double>> dataset, String[] featureNames_Arr, ArrayList<Double> labels) { this.featureNames_Arr = featureNames_Arr; // create attributes FastVector fv = new FastVector(); for (int i = 0; i <= featureNames_Arr.length; i++) { if (i == featureNames_Arr.length) { fv.addElement(new Attribute("@@class@@")); continue; }//w ww . ja v a 2 s. c o m fv.addElement(new Attribute(featureNames_Arr[i])); } // transform dataset so that each line represents each window - add // class label as well ArrayList<ArrayList<Double>> ReliefInput = new ArrayList<ArrayList<Double>>(); for (int i = 0; i < dataset.get(0).size(); i++) { ArrayList<Double> featT = new ArrayList<Double>(); for (int j = 0; j < dataset.size(); j++) { featT.add(dataset.get(j).get(i)); } featT.add(labels.get(i)); ReliefInput.add(featT); } // transform dataset into Instances type Instances ReliefInstances = new Instances("Features", fv, dataset.size()); for (int i = 0; i < ReliefInput.size(); i++) { double[] vals = CollectionUtilities.listToArray(ReliefInput.get(i)); Instance instWeka = new Instance(vals.length); for (int j = 0; j < vals.length; j++) { instWeka.setValue(j, vals[j]); } ReliefInstances.add(instWeka); } ReliefInstances.setClassIndex(ReliefInstances.numAttributes() - 1); return ReliefInstances; }
From source file:ffnn.TucilWeka.java
public static Instances createInstances(int max) { //List of Attributes dan List of Class untuk Header //Jumlah attributes: 4 jika tanpa class, 5 jika dengan class ArrayList<Attribute> attrs = new ArrayList<Attribute>(5); ArrayList<String> classVal = new ArrayList<String>(); //Menambahkkan class yang mungkin ke List classVal.add("Iris-setosa"); classVal.add("Iris-versicolor"); classVal.add("Iris-virginica"); //Menambahkan attributes ke List Attribute sepallength = new Attribute("sepallength"); attrs.add(sepallength); //Numeric Attributes Attribute sepalwidth = new Attribute("sepalwidth"); attrs.add(sepalwidth); //Numeric Attributes Attribute petallength = new Attribute("petallength"); attrs.add(petallength); //Numeric Attributes Attribute petalwidth = new Attribute("petalwidth"); attrs.add(petalwidth); //Numeric Attributes Attribute classValue = new Attribute("@@class@@", classVal); attrs.add(classValue); //String Attributes //Pembuatan/* ww w . ja v a2 s. c o m*/ //Constructor dengan param Nama, List of Attribute, size Instances dataRaw = new Instances("irisNew", attrs, 0); //Instances kosong dataRaw.setClassIndex(dataRaw.numAttributes() - 1); Scanner scan = new Scanner(System.in); for (int i = 0; i < max; i++) { //Weka mennyimpan instance sebagai double double temp; Instance inst = new DenseInstance(dataRaw.numAttributes()); System.out.println("Sepallength:"); temp = scan.nextDouble(); inst.setValue(sepallength, temp); System.out.println("Sepalwidth:"); temp = scan.nextDouble(); inst.setValue(sepalwidth, temp); System.out.println("Petallegth:"); temp = scan.nextDouble(); inst.setValue(petallength, temp); System.out.println("Petalwidth:"); temp = scan.nextDouble(); inst.setValue(petalwidth, temp); //System.out.println("Masukan kelima:"); //temp = scan.nextDouble(); //0 -> setosa, 1 -> vesicolor, 2-> virginica //instS.setValue(classValue, temp); //tidak dibutuhkan sebenarnya //Menambahkan instance ke instances dataRaw.add(inst); } return dataRaw; }
From source file:filters.MauiFilter.java
License:Open Source License
/** * Converts an instance.//from ww w. j a v a2 s .com */ private FastVector convertInstance(Instance instance, boolean training) throws Exception { FastVector vector = new FastVector(); String fileName = instance.stringValue(fileNameAtt); if (debugMode) { System.err.println("-- Converting instance for document " + fileName); } // Get the key phrases for the document HashMap<String, Counter> hashKeyphrases = null; if (!instance.isMissing(keyphrasesAtt)) { String keyphrases = instance.stringValue(keyphrasesAtt); hashKeyphrases = getGivenKeyphrases(keyphrases); } // Get the document text String documentText = instance.stringValue(documentAtt); // Compute the candidate topics HashMap<String, Candidate> candidateList; if (allCandidates != null && allCandidates.containsKey(instance)) { candidateList = allCandidates.get(instance); } else { candidateList = getCandidates(documentText); } if (debugMode) { System.err.println(candidateList.size() + " candidates "); } // Set indices for key attributes int tfidfAttIndex = documentAtt + 2; int distAttIndex = documentAtt + 3; int probsAttIndex = documentAtt + numFeatures; int countPos = 0; int countNeg = 0; // Go through the phrases and convert them into instances for (Candidate candidate : candidateList.values()) { if (candidate.getFrequency() < minOccurFrequency) { continue; } String name = candidate.getName(); String orig = candidate.getBestFullForm(); if (!vocabularyName.equals("none")) { orig = candidate.getTitle(); } double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList); Instance inst = new Instance(instance.weight(), vals); inst.setDataset(classifierData); // Get probability of a phrase being key phrase double[] probs = classifier.distributionForInstance(inst); double prob = probs[0]; if (nominalClassValue) { prob = probs[1]; } // Compute attribute values for final instance double[] newInst = new double[instance.numAttributes() + numFeatures + 2]; int pos = 0; for (int i = 1; i < instance.numAttributes(); i++) { if (i == documentAtt) { // output of values for a given phrase: // Add phrase int index = outputFormatPeek().attribute(pos).addStringValue(name); newInst[pos++] = index; // Add original version if (orig != null) { index = outputFormatPeek().attribute(pos).addStringValue(orig); } else { index = outputFormatPeek().attribute(pos).addStringValue(name); } newInst[pos++] = index; // Add features newInst[pos++] = inst.value(tfIndex); newInst[pos++] = inst.value(idfIndex); newInst[pos++] = inst.value(tfidfIndex); newInst[pos++] = inst.value(firstOccurIndex); newInst[pos++] = inst.value(lastOccurIndex); newInst[pos++] = inst.value(spreadOccurIndex); newInst[pos++] = inst.value(domainKeyphIndex); newInst[pos++] = inst.value(lengthIndex); newInst[pos++] = inst.value(generalityIndex); newInst[pos++] = inst.value(nodeDegreeIndex); newInst[pos++] = inst.value(semRelIndex); newInst[pos++] = inst.value(wikipKeyphrIndex); newInst[pos++] = inst.value(invWikipFreqIndex); newInst[pos++] = inst.value(totalWikipKeyphrIndex); // Add probability probsAttIndex = pos; newInst[pos++] = prob; // Set rank to missing (computed below) newInst[pos++] = Instance.missingValue(); } else if (i == keyphrasesAtt) { newInst[pos++] = inst.classValue(); } else { newInst[pos++] = instance.value(i); } } Instance ins = new Instance(instance.weight(), newInst); ins.setDataset(outputFormatPeek()); vector.addElement(ins); if (inst.classValue() == 0) { countNeg++; } else { countPos++; } } if (debugMode) { System.err.println(countPos + " positive; " + countNeg + " negative instances"); } // Sort phrases according to their distance (stable sort) double[] vals = new double[vector.size()]; for (int i = 0; i < vals.length; i++) { vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex); } FastVector newVector = new FastVector(vector.size()); int[] sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their tfxidf value (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Sort phrases according to their probability (stable sort) for (int i = 0; i < vals.length; i++) { vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex); } newVector = new FastVector(vector.size()); sortedIndices = Utils.stableSort(vals); for (int i = 0; i < vals.length; i++) { newVector.addElement(vector.elementAt(sortedIndices[i])); } vector = newVector; // Compute rank of phrases. Check for subphrases that are ranked // lower than superphrases and assign probability -1 and set the // rank to Integer.MAX_VALUE int rank = 1; for (int i = 0; i < vals.length; i++) { Instance currentInstance = (Instance) vector.elementAt(i); // Short cut: if phrase very unlikely make rank very low and // continue if (Utils.grOrEq(vals[i], 1.0)) { currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE); continue; } // Otherwise look for super phrase starting with first phrase // in list that has same probability, TFxIDF value, and distance as // current phrase. We do this to catch all superphrases // that have same probability, TFxIDF value and distance as current // phrase. int startInd = i; while (startInd < vals.length) { Instance inst = (Instance) vector.elementAt(startInd); if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex)) || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex)) || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) { break; } startInd++; } currentInstance.setValue(probsAttIndex + 1, rank++); } return vector; }