List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * get output format/*w ww . j a va 2s .com*/ * * @param p_Instances * input format */ protected void getOutputFormat(Instances p_Instances) { FastVector newAtts, newVals; // Compute new attributes newAtts = new FastVector(p_Instances.numAttributes()); for (int j = 0; j < p_Instances.numAttributes(); j++) { Attribute att = p_Instances.attribute(j); if (j != p_Instances.classIndex()) { newAtts.addElement(att.copy()); } else { if (p_Instances.classAttribute().isNumeric()) { newAtts.addElement(new Attribute(att.name())); } else { newVals = new FastVector(2); newVals.addElement("negative"); newVals.addElement("positive"); newAtts.addElement(new Attribute(att.name(), newVals)); } } } // Construct new header this.m_OutputFormat = new Instances(p_Instances.relationName(), newAtts, 0); this.m_OutputFormat.setClassIndex(p_Instances.classIndex()); if (this.m_IndexOfID >= 0) { this.m_OutputFormat.deleteAttributeAt(this.m_IndexOfID); } }
From source file:sg.edu.nus.comp.nlp.ims.classifiers.CMultiClassesSVM.java
License:Open Source License
/** * generate instances for classifier classIdx * * @param p_Instances/*w w w . j a va 2 s . c o m*/ * input instances * @param p_ClassIndex * class index * @param p_ID2Classes * instance ids * @return new instances */ protected Instances genInstances(Instances p_Instances, double p_ClassIndex, Hashtable<String, ArrayList<Double>> p_ID2Classes) { Instances newInsts = new Instances(this.m_OutputFormat, 0); for (int i = 0; i < p_Instances.numInstances(); i++) { Instance inst = p_Instances.instance(i); Instance newInst = null; if (SparseInstance.class.isInstance(inst)) { newInst = new SparseInstance(inst); } else { newInst = new Instance(inst); } if (newInst.value(p_Instances.classIndex()) == p_ClassIndex) { newInst.setValue(inst.classIndex(), 1); } else { if (p_ID2Classes == null || !p_ID2Classes.get(inst.stringValue(this.m_IndexOfID)) .contains(new Double(p_ClassIndex))) { newInst.setValue(inst.classIndex(), 0); } else { continue; } } newInst.deleteAttributeAt(this.m_IndexOfID); newInst.setDataset(newInsts); newInsts.add(newInst); } return newInsts; }
From source file:sirius.clustering.main.ClustererClassificationPane.java
License:Open Source License
private void start() { //Run Classifier if (this.inputDirectoryTextField.getText().length() == 0) { JOptionPane.showMessageDialog(parent, "Please set Input Directory to where the clusterer output are!", "Evaluate Classifier", JOptionPane.ERROR_MESSAGE); return;/*from w ww . j a v a 2s . c o m*/ } if (m_ClassifierEditor.getValue() == null) { JOptionPane.showMessageDialog(parent, "Please choose Classifier!", "Evaluate Classifier", JOptionPane.ERROR_MESSAGE); return; } if (validateStatsSettings(1) == false) { return; } if (this.clusteringClassificationThread == null) { startButton.setEnabled(false); stopButton.setEnabled(true); tabbedClassifierPane.setSelectedIndex(0); this.clusteringClassificationThread = (new Thread() { public void run() { //Clear the output text area levelOneClassifierOutputTextArea.setText(""); resultsTableModel.reset(); //double threshold = Double.parseDouble(classifierOneThresholdTextField.getText()); //cross-validation int numFolds; if (jackKnifeRadioButton.isSelected()) numFolds = -1; else numFolds = Integer.parseInt(foldsField.getText()); StringTokenizer st = new StringTokenizer(inputDirectoryTextField.getText(), File.separator); String filename = ""; while (st.hasMoreTokens()) { filename = st.nextToken(); } StringTokenizer st2 = new StringTokenizer(filename, "_."); numOfCluster = 0; if (st2.countTokens() >= 2) { st2.nextToken(); String numOfClusterString = st2.nextToken().replaceAll("cluster", ""); try { numOfCluster = Integer.parseInt(numOfClusterString); } catch (NumberFormatException e) { JOptionPane.showMessageDialog(parent, "Please choose the correct file! (Output from Utilize Clusterer)", "ERROR", JOptionPane.ERROR_MESSAGE); } } Classifier template = (Classifier) m_ClassifierEditor.getValue(); for (int x = 0; x <= numOfCluster && clusteringClassificationThread != null; x++) {//Test each cluster try { long totalTimeStart = 0, totalTimeElapsed = 0; totalTimeStart = System.currentTimeMillis(); statusLabel.setText("Reading in cluster" + x + " file.."); String inputFilename = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".arff"); String outputScoreFilename = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score"); BufferedWriter output = new BufferedWriter(new FileWriter(outputScoreFilename)); Instances inst = new Instances(new FileReader(inputFilename)); //Assume that class attribute is the last attribute - This should be the case for all Sirius produced Arff files inst.setClassIndex(inst.numAttributes() - 1); Random random = new Random(1);//Simply set to 1, shall implement the random seed option later inst.randomize(random); if (inst.attribute(inst.classIndex()).isNominal()) inst.stratify(numFolds); // for timing ClassifierResults classifierResults = new ClassifierResults(false, 0); String classifierName = m_ClassifierEditor.getValue().getClass().getName(); classifierResults.updateList(classifierResults.getClassifierList(), "Classifier: ", classifierName); classifierResults.updateList(classifierResults.getClassifierList(), "Training Data: ", inputFilename); classifierResults.updateList(classifierResults.getClassifierList(), "Time Used: ", "NA"); //ArrayList<Double> resultList = new ArrayList<Double>(); if (jackKnifeRadioButton.isSelected() || numFolds > inst.numInstances() - 1) numFolds = inst.numInstances() - 1; for (int fold = 0; fold < numFolds && clusteringClassificationThread != null; fold++) {//Doing cross-validation statusLabel.setText("Cluster: " + x + " - Training Fold " + (fold + 1) + ".."); Instances train = inst.trainCV(numFolds, fold, random); Classifier current = null; try { current = Classifier.makeCopy(template); current.buildClassifier(train); Instances test = inst.testCV(numFolds, fold); statusLabel.setText("Cluster: " + x + " - Testing Fold " + (fold + 1) + ".."); for (int jj = 0; jj < test.numInstances(); jj++) { double[] result = current.distributionForInstance(test.instance(jj)); output.write("Cluster: " + x); output.newLine(); output.newLine(); output.write(test.instance(jj).stringValue(test.classAttribute()) + ",0=" + result[0]); output.newLine(); } } catch (Exception ex) { ex.printStackTrace(); statusLabel.setText("Error in cross-validation!"); startButton.setEnabled(true); stopButton.setEnabled(false); } } output.close(); totalTimeElapsed = System.currentTimeMillis() - totalTimeStart; classifierResults.updateList(classifierResults.getResultsList(), "Total Time Used: ", Utils.doubleToString(totalTimeElapsed / 60000, 2) + " minutes " + Utils.doubleToString((totalTimeElapsed / 1000.0) % 60.0, 2) + " seconds"); double threshold = validateFieldAsThreshold(classifierOneThresholdTextField.getText(), "Threshold Field", classifierOneThresholdTextField); String filename2 = inputDirectoryTextField.getText() .replaceAll("_cluster" + numOfCluster + ".arff", "_cluster" + x + ".score"); PredictionStats classifierStats = new PredictionStats(filename2, 0, threshold); resultsTableModel.add("Cluster " + x, classifierResults, classifierStats); resultsTable.setRowSelectionInterval(x, x); computeStats(numFolds);//compute and display the results } catch (Exception e) { e.printStackTrace(); statusLabel.setText("Error in reading file!"); startButton.setEnabled(true); stopButton.setEnabled(false); } } //end of cluster for loop resultsTableModel.add("Summary - Equal Weightage", null, null); resultsTable.setRowSelectionInterval(numOfCluster + 1, numOfCluster + 1); computeStats(numFolds); resultsTableModel.add("Summary - Weighted Average", null, null); resultsTable.setRowSelectionInterval(numOfCluster + 2, numOfCluster + 2); computeStats(numFolds); if (clusteringClassificationThread != null) statusLabel.setText("Done!"); else statusLabel.setText("Interrupted.."); startButton.setEnabled(true); stopButton.setEnabled(false); if (classifierOne != null) { levelOneClassifierOutputScrollPane.getVerticalScrollBar() .setValue(levelOneClassifierOutputScrollPane.getVerticalScrollBar().getMaximum()); } clusteringClassificationThread = null; } }); this.clusteringClassificationThread.setPriority(Thread.MIN_PRIORITY); this.clusteringClassificationThread.start(); } else { JOptionPane.showMessageDialog(parent, "Cannot start new job as previous job still running. Click stop to terminate previous job", "ERROR", JOptionPane.ERROR_MESSAGE); } }
From source file:sirius.clustering.main.TrainClustererPane.java
License:Open Source License
private Instances removeClass(Instances inst) {//Copied directly from Weka's ClustererPanel.java Remove af = new Remove(); Instances retI = null;//ww w. j av a 2s.c o m try { if (inst.classIndex() < 0) { //do nothing since classindex is not set retI = inst; } else { //remove class attribute af.setAttributeIndices("" + (inst.classIndex() + 1)); af.setInvertSelection(false); af.setInputFormat(inst); retI = Filter.useFilter(inst, af); } } catch (Exception e) { e.printStackTrace(); } return retI; }
From source file:sirius.clustering.main.TrainClustererPane.java
License:Open Source License
public void setInstances(Instances inst) {//Directly copied with slight modification from weka's ClustererPanel.java String[] attribNames = new String[inst.numAttributes()]; for (int i = 0; i < inst.numAttributes(); i++) { String type = ""; switch (inst.attribute(i).type()) { case Attribute.NOMINAL: type = "(Nom) "; break; case Attribute.NUMERIC: type = "(Num) "; break; case Attribute.STRING: type = "(Str) "; break; case Attribute.DATE: type = "(Dat) "; break; case Attribute.RELATIONAL: type = "(Rel) "; break; default:/*w w w. ja v a2 s. com*/ type = "(???) "; } String attnm = inst.attribute(i).name(); attribNames[i] = type + attnm; } m_ClassCombo.setModel(new DefaultComboBoxModel(attribNames)); if (inst.classIndex() == -1) m_ClassCombo.setSelectedIndex(attribNames.length - 1); else m_ClassCombo.setSelectedIndex(inst.classIndex()); }
From source file:smo2.SMO.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one wrapper * for multi-class problems./*from w w w . jav a2s. c o m*/ * * @param insts * the set of training instances * @exception Exception * if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { if (insts.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (insts.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException( "mySMO can't handle a numeric class! Use" + "SMOreg for performing regression."); } insts = new Instances(insts); insts.deleteWithMissingClass(); if (insts.numInstances() == 0) { throw new Exception("No training instances without a missing class!"); } /* * Removes all the instances with weight equal to 0. MUST be done * since condition (8) of Keerthi's paper is made with the assertion * Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instance with either a weight null or a missing class!"); } insts = data; } m_onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { m_onlyNumeric = false; break; } } } } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (!m_onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarymySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarymySMO(); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:svmal.SVMStrategymulti.java
public static Instances PatternsToInstances2(Pattern[] patts, int positiveLabel) { Instances result = new Instances(patts[0].dataset(), 0, 0); int classIndex = result.classIndex(); for (Pattern orig : patts) { double[] vals = orig.toDoubleArray(); Instance2 inst2;//www . j a v a 2s .c om if (vals[classIndex] == positiveLabel) vals[classIndex] = 1; else vals[classIndex] = 0; inst2 = new Instance2(orig.weight(), vals); inst2.setIndex(orig.id()); inst2.setDataset(result); result.add(inst2); } return result; }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public Instances clusteredInstances(Instances data) { if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); }//from ww w .j av a 2s . c o m Instances sampled_data = data; for (int i = 0; i < sampled_data.numInstances(); i++) { sampled_data.remove(i); } SimpleKMeans sKmeans = new SimpleKMeans(); data.setClassIndex(data.numAttributes() - 1); Remove filter = new Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); List assignments = new ArrayList(); try { filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter); String[] options = new String[3]; options[0] = "-I"; // max. iterations options[1] = "500"; options[2] = "-O"; sKmeans.setNumClusters(data.numClasses()); sKmeans.setOptions(options); sKmeans.buildClusterer(dataClusterer); System.out.println("Kmeans\n:" + sKmeans); System.out.println(Arrays.toString(sKmeans.getAssignments())); assignments = Arrays.asList(sKmeans.getAssignments()); } catch (Exception e) { e.printStackTrace(); } System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(sKmeans); try { eval.evaluateClusterer(data); } catch (Exception e) { e.printStackTrace(); } int classesToClustersMap[] = eval.getClassesToClusters(); for (int i = 0; i < classesToClustersMap.length; i++) { if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) { ((Instances) sampled_data).add(data.get(i)); } } return ((Instances) sampled_data); }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public static Instances clusterInstances(Instances data) { XMeans xmeans = new XMeans(); Remove filter = new Remove(); Instances dataClusterer = null;/*w w w . ja v a 2 s .co m*/ if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); } //Get the attributes from the data for creating the sampled_data object ArrayList<Attribute> attrList = new ArrayList<Attribute>(); Enumeration attributes = data.enumerateAttributes(); while (attributes.hasMoreElements()) { attrList.add((Attribute) attributes.nextElement()); } Instances sampled_data = new Instances(data.relationName(), attrList, 0); data.setClassIndex(data.numAttributes() - 1); sampled_data.setClassIndex(data.numAttributes() - 1); filter.setAttributeIndices("" + (data.classIndex() + 1)); data.remove(0);//In Wavelet Stream of MOA always the first element comes without class try { filter.setInputFormat(data); dataClusterer = Filter.useFilter(data, filter); String[] options = new String[4]; options[0] = "-L"; // max. iterations options[1] = Integer.toString(noOfClassesInPool - 1); if (noOfClassesInPool > 2) { options[1] = Integer.toString(noOfClassesInPool - 1); xmeans.setMinNumClusters(noOfClassesInPool - 1); } else { options[1] = Integer.toString(noOfClassesInPool); xmeans.setMinNumClusters(noOfClassesInPool); } xmeans.setMaxNumClusters(data.numClasses() + 1); System.out.println("No of classes in the pool: " + noOfClassesInPool); xmeans.setUseKDTree(true); //xmeans.setOptions(options); xmeans.buildClusterer(dataClusterer); System.out.println("Xmeans\n:" + xmeans); } catch (Exception e) { e.printStackTrace(); } //System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xmeans); try { eval.evaluateClusterer(data); int classesToClustersMap[] = eval.getClassesToClusters(); //check the classes to cluster map int clusterNo = 0; for (int i = 0; i < data.size(); i++) { clusterNo = xmeans.clusterInstance(dataClusterer.get(i)); //Check if the class value of instance and class value of cluster matches if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) { sampled_data.add(data.get(i)); } } } catch (Exception e) { e.printStackTrace(); } return ((Instances) sampled_data); }
From source file:tubes2ai.AIJKNaiveBayes.java
@Override public void buildClassifier(Instances i) throws Exception { // throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. getCapabilities().testWithFail(i);/* w w w . j av a2 s. c o m*/ nAttribute = i.numAttributes(); classIndex = i.classIndex(); Instance inst; Attribute att; int n_instance = i.numInstances(); //inisialisasi matrix 3x3; //pertama cari ada berapa value di kelas nClassValue = i.attribute(classIndex).numValues(); freq = new int[nAttribute][][]; prob = new double[nAttribute][][]; int a = 0; while (a < nAttribute) { int nValue = i.attribute(a).numValues(); if (a != classIndex) { freq[a] = new int[nValue][nClassValue]; prob[a] = new double[nValue][nClassValue]; } else { freq[a] = new int[1][nClassValue]; prob[a] = new double[1][nClassValue]; } a++; } //System.out.println("beres buat matriks"); //inisialisasi matriks sama nilai 0 a = 0; int b; int c; while (a < nAttribute) { //outlook dkk b = 0; int nValue = i.attribute(a).numValues(); //System.out.println("row "+a); while (b < nValue) { c = 0; //System.out.println("row1 "+b); if (a == classIndex) { //System.out.println("row2 "+c); freq[a][0][b] = 0; } else { while (c < nClassValue) { //System.out.println("row2 "+c); freq[a][b][c] = 0; c++; } } b++; } a++; } //System.out.println("beres inisialisasi 0"); a = 0; int val; int classValue; while (a < n_instance) { inst = i.get(a); b = 0; classValue = (int) inst.value(classIndex); while (b < nAttribute) { val = (int) inst.value(b); if (b == classIndex) { freq[b][0][classValue]++; } else { freq[b][val][classValue]++; } b++; } a++; } //System.out.println("beres frekuensi!!!!"); a = 0; while (a < nAttribute) { b = 0; int nValue = i.attribute(a).numValues(); //System.out.println("row "+a); while (b < nValue) { //System.out.println("row1 "+b); if (a != classIndex) { c = 0; while (c < nClassValue) { //System.out.println("freq "+freq[a][b][c]); //System.out.println("freq_index "+freq[classIndex][0][c]); prob[a][b][c] = (double) (freq[a][b][c]) / (double) (freq[classIndex][0][c]); //System.out.println("prob ["+a+"]["+b+"]["+c+"] "+ prob[a][b][c]); c++; } } else { prob[a][0][b] = (double) freq[a][0][b] / i.numInstances(); //System.out.println("prob ["+a+"][0]["+b+"] "+ prob[a][0][b]); } b++; } a++; } //System.out.println("beres prob!!!!"); }