List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:ca.uqac.florentinth.speakerauthentication.Learning.Learning.java
License:Apache License
public Map<String, String> makePrediction(String username, FileInputStream trainingModel, FileReader testingDataset) throws Exception { Map<String, String> predictions = new HashMap<>(); ObjectInputStream inputStream = new ObjectInputStream(trainingModel); weka.classifiers.Classifier classifier = (weka.classifiers.Classifier) inputStream.readObject(); inputStream.close();/*from ww w . j a v a 2s. c o m*/ Instances instances = new Instances(new BufferedReader(testingDataset)); if (instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } int last = instances.numInstances() - 1; if (instances.instance(last).stringValue(instances.classIndex()).equals(username)) { double label = classifier.classifyInstance(instances.instance(last)); instances.instance(last).setClassValue(label); predictions.put(username, instances.instance(last).stringValue(instances.classIndex())); } return predictions; }
From source file:categorization.SpectralWEKA.java
License:Open Source License
/** * Generates a clusterer by the mean of spectral clustering algorithm. * * @param data set of instances serving as training data * @exception Exception if the clusterer has not been generated successfully *///from www . j a va 2 s.co m public void buildClusterer(Instances data) throws java.lang.Exception { m_Sequences = new Instances(data); int n = data.numInstances(); int k = data.numAttributes(); DoubleMatrix2D w; if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n); else w = DoubleFactory2D.dense.make(n, n); double[][] v1 = new double[n][]; for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray(); v = DoubleFactory2D.dense.make(v1); double sigma_sq = sigma * sigma; //Sets up similarity matrix for (int i = 0; i < n; i++) for (int j = i; j < n; j++) { /*double dist = distnorm2(v.viewRow(i), v.viewRow(j)); if((r == -1) || (dist < r)) { double sim = Math.exp(- (dist * dist) / (2 * sigma_sq)); w.set(i, j, sim); w.set(j, i, sim); }*/ /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)}; System.out.println(key[0]); System.out.println(key[1]); System.out.println(simScoreMap.containsKey(key)); Double simValue = simScoreMap.get(key);*/ double sim = sim_matrix[i][j]; w.set(i, j, sim); w.set(j, i, sim); } //Partitions points int[][] p = partition(w, alpha_star); //Deploys results numOfClusters = p.length; cluster = new int[n]; for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i; //System.out.println("Final partition:"); // UtilsJS.printMatrix(p); // System.out.println("Cluster:\n"); // UtilsJS.printArray(cluster); this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1; // System.out.println("Num clusters:\t"+this.numOfClusters); }
From source file:cba.Apriori.java
License:Open Source License
/** * Method that generates all large itemsets with a minimum support, and from * these all association rules with a minimum confidence. * * @param instances the instances to be used for generating the associations * @throws Exception if rules can't be built successfully *//*from www . ja v a 2 s . c om*/ public void buildAssociations(Instances instances) throws Exception { double[] confidences, supports; int[] indices; FastVector[] sortedRuleSet; int necSupport = 0; instances = new Instances(instances); if (m_removeMissingCols) { instances = removeMissingColumns(instances); } if (m_car && m_metricType != CONFIDENCE) throw new Exception("For CAR-Mining metric type has to be confidence!"); // only set class index if CAR is requested if (m_car) { if (m_classIndex == -1) { instances.setClassIndex(instances.numAttributes() - 1); } else if (m_classIndex <= instances.numAttributes() && m_classIndex > 0) { instances.setClassIndex(m_classIndex - 1); } else { throw new Exception("Invalid class index."); } } // can associator handle the data? getCapabilities().testWithFail(instances); m_cycles = 0; if (m_car) { //m_instances does not contain the class attribute m_instances = LabeledItemSet.divide(instances, false); //m_onlyClass contains only the class attribute m_onlyClass = LabeledItemSet.divide(instances, true); } else m_instances = instances; if (m_car && m_numRules == Integer.MAX_VALUE) { // Set desired minimum support m_minSupport = m_lowerBoundMinSupport; } else { // Decrease minimum support until desired number of rules found. m_minSupport = m_upperBoundMinSupport - m_delta; m_minSupport = (m_minSupport < m_lowerBoundMinSupport) ? m_lowerBoundMinSupport : m_minSupport; } do { // Reserve space for variables m_Ls = new FastVector(); m_hashtables = new FastVector(); m_allTheRules = new FastVector[6]; m_allTheRules[0] = new FastVector(); m_allTheRules[1] = new FastVector(); m_allTheRules[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3] = new FastVector(); m_allTheRules[4] = new FastVector(); m_allTheRules[5] = new FastVector(); } sortedRuleSet = new FastVector[6]; sortedRuleSet[0] = new FastVector(); sortedRuleSet[1] = new FastVector(); sortedRuleSet[2] = new FastVector(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3] = new FastVector(); sortedRuleSet[4] = new FastVector(); sortedRuleSet[5] = new FastVector(); } if (!m_car) { // Find large itemsets and rules findLargeItemSets(); if (m_significanceLevel != -1 || m_metricType != CONFIDENCE) findRulesBruteForce(); else findRulesQuickly(); } else { findLargeCarItemSets(); findCarRulesQuickly(); } // Sort rules according to their support /* supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < m_allTheRules[2].size(); i++) supports[i] = (double)((AprioriItemSet)m_allTheRules[1].elementAt(i)).support(); indices = Utils.stableSort(supports); for (int i = 0; i < m_allTheRules[2].size(); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[i])); } }*/ int j = m_allTheRules[2].size() - 1; supports = new double[m_allTheRules[2].size()]; for (int i = 0; i < (j + 1); i++) supports[j - i] = ((double) ((ItemSet) m_allTheRules[1].elementAt(j - i)).support()) * (-1); indices = Utils.stableSort(supports); for (int i = 0; i < (j + 1); i++) { sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[j - i])); sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[j - i])); sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[j - i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { sortedRuleSet[3].addElement(m_allTheRules[3].elementAt(indices[j - i])); sortedRuleSet[4].addElement(m_allTheRules[4].elementAt(indices[j - i])); sortedRuleSet[5].addElement(m_allTheRules[5].elementAt(indices[j - i])); } } // Sort rules according to their confidence m_allTheRules[0].removeAllElements(); m_allTheRules[1].removeAllElements(); m_allTheRules[2].removeAllElements(); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].removeAllElements(); m_allTheRules[4].removeAllElements(); m_allTheRules[5].removeAllElements(); } confidences = new double[sortedRuleSet[2].size()]; int sortType = 2 + m_metricType; for (int i = 0; i < sortedRuleSet[2].size(); i++) confidences[i] = ((Double) sortedRuleSet[sortType].elementAt(i)).doubleValue(); indices = Utils.stableSort(confidences); for (int i = sortedRuleSet[0].size() - 1; (i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) { m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i])); m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i])); m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i])); if (m_metricType != CONFIDENCE || m_significanceLevel != -1) { m_allTheRules[3].addElement(sortedRuleSet[3].elementAt(indices[i])); m_allTheRules[4].addElement(sortedRuleSet[4].elementAt(indices[i])); m_allTheRules[5].addElement(sortedRuleSet[5].elementAt(indices[i])); } } if (m_verbose) { if (m_Ls.size() > 1) { System.out.println(toString()); } } if (m_minSupport == m_lowerBoundMinSupport || m_minSupport - m_delta > m_lowerBoundMinSupport) m_minSupport -= m_delta; else m_minSupport = m_lowerBoundMinSupport; necSupport = Math.round((float) ((m_minSupport * (double) m_instances.numInstances()) + 0.5)); m_cycles++; } while ((m_allTheRules[0].size() < m_numRules) && (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) /* (necSupport >= lowerBoundNumInstancesSupport)*/ /* (Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)) */ && (necSupport >= 1)); m_minSupport += m_delta; }
From source file:cba.AssociatorEvaluation.java
License:Open Source License
/** * Evaluates the associator with the given commandline options and returns * the evaluation string.//from w w w.j a v a 2s . c o m * * @param associator the Associator to evaluate * @param options the commandline options * @return the generated output string * @throws Exception if evaluation fails */ public static String evaluate(Associator associator, String[] options) throws Exception { String trainFileString = ""; String graphFileName = ""; AssociatorEvaluation eval; DataSource loader; // help? if (Utils.getFlag('h', options)) throw new Exception("\nHelp requested.\n" + makeOptionString(associator)); try { // general options trainFileString = Utils.getOption('t', options); if (trainFileString.length() == 0) throw new Exception("No training file given!"); loader = new DataSource(trainFileString); graphFileName = Utils.getOption('g', options); // associator specific options if (associator instanceof OptionHandler) { ((OptionHandler) associator).setOptions(options); } // left-over options? Utils.checkForRemainingOptions(options); } catch (Exception e) { throw new Exception("\nWeka exception: " + e.getMessage() + "\n" + makeOptionString(associator)); } // load file and build associations eval = new AssociatorEvaluation(); String results = eval.evaluate(associator, new Instances(loader.getDataSet())); // If associator is drawable output string describing graph if ((associator instanceof Drawable) && (graphFileName.length() != 0)) { BufferedWriter writer = new BufferedWriter(new FileWriter(graphFileName)); writer.write(((Drawable) associator).graph()); writer.newLine(); writer.flush(); writer.close(); } return results; }
From source file:CEP.GenerateStream.java
public void MakeStream() { File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff"); String pc = System.getProperty("user.dir").toString(); if (pc.contains("gs023850")) { file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff"); }//from ww w . j av a2 s.c om try { ArffLoader loader = new ArffLoader(); loader.setFile(file); Instances structure = loader.getStructure(); int j = structure.numAttributes(); HeaderManager.SetStructure(new Instances(structure)); Instance current; long previousTimeStamp = 0; String timeStamp = "0"; long wait = 0; while ((current = loader.getNextInstance(structure)) != null) { timeStamp = current.stringValue(0); cepRT.sendEvent(current); System.out.println("Sending event"); previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait); } } catch (Exception e) { if (e.equals(new FileNotFoundException())) { System.out.println("File not found - could not generate stream"); return; } else if (e.equals(new IOException())) { System.out.println("Unable to read file"); } else if (e.equals(new NumberFormatException())) { System.out.println("Unable to convert to time to number - bad time"); } else { System.out.println(e.toString()); } } }
From source file:cezeri.evaluater.FactoryEvaluation.java
public static Evaluation performCrossValidate(Classifier model, Instances datax, int folds, boolean show_text, boolean show_plot, TFigureAttribute attr) { Random rand = new Random(1); Instances randData = new Instances(datax); randData.randomize(rand);// w w w . j av a2 s . c o m if (randData.classAttribute().isNominal()) { randData.stratify(folds); } Evaluation eval = null; try { // perform cross-validation eval = new Evaluation(randData); // double[] simulated = new double[0]; // double[] observed = new double[0]; // double[] sim = new double[0]; // double[] obs = new double[0]; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n, rand); Instances validation = randData.testCV(folds, n); // build and evaluate classifier Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); // sim = eval.evaluateModel(clsCopy, validation); // obs = validation.attributeToDoubleArray(validation.classIndex()); // if (show_plot) { // double[][] d = new double[2][sim.length]; // d[0] = obs; // d[1] = sim; // CMatrix f1 = CMatrix.getInstance(d); // f1.transpose().plot(attr); // } // if (show_text) { // // output evaluation // System.out.println(); // System.out.println("=== Setup for each Cross Validation fold==="); // System.out.println("Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); // System.out.println("Dataset: " + randData.relationName()); // System.out.println("Folds: " + folds); // System.out.println("Seed: " + 1); // System.out.println(); // System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); // } simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation)); observed = FactoryUtils.concatenate(observed, validation.attributeToDoubleArray(validation.classIndex())); // simulated = FactoryUtils.mean(simulated,eval.evaluateModel(clsCopy, validation)); // observed = FactoryUtils.mean(observed,validation.attributeToDoubleArray(validation.classIndex())); } if (show_plot) { double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); attr.figureCaption = "overall performance"; f1.transpose().plot(attr); } if (show_text) { // output evaluation System.out.println(); System.out.println("=== Setup for Overall Cross Validation==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + randData.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + 1); System.out.println(); System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.evaluater.FactoryEvaluation.java
public static Evaluation performCrossValidateTestAlso(Classifier model, Instances datax, Instances test, boolean show_text, boolean show_plot) { TFigureAttribute attr = new TFigureAttribute(); Random rand = new Random(1); Instances randData = new Instances(datax); randData.randomize(rand);/* w ww . j a v a 2 s . co m*/ Evaluation eval = null; int folds = randData.numInstances(); try { eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { // randData.randomize(rand); // Instances train = randData; Instances train = randData.trainCV(folds, n); // Instances train = randData.trainCV(folds, n, rand); Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); Instances validation = randData.testCV(folds, n); // Instances validation = test.testCV(test.numInstances(), n%test.numInstances()); // CMatrix.fromInstances(train).showDataGrid(); // CMatrix.fromInstances(validation).showDataGrid(); simulated = FactoryUtils.concatenate(simulated, eval.evaluateModel(clsCopy, validation)); observed = FactoryUtils.concatenate(observed, validation.attributeToDoubleArray(validation.classIndex())); } if (show_plot) { double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); attr.figureCaption = "overall performance"; f1.transpose().plot(attr); } if (show_text) { // output evaluation System.out.println(); System.out.println("=== Setup for Overall Cross Validation==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + randData.relationName()); System.out.println("Folds: " + folds); System.out.println("Seed: " + 1); System.out.println(); System.out.println(eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.evaluater.FactoryEvaluation.java
private static Evaluation doTest(boolean isTrained, Classifier model, Instances train, Instances test, boolean show_text, boolean show_plot, TFigureAttribute attr) { Instances data = new Instances(train); Random rand = new Random(1); data.randomize(rand);// www. j av a2s . co m Evaluation eval = null; try { // double[] simulated = null; eval = new Evaluation(train); if (isTrained) { simulated = eval.evaluateModel(model, test); } else { Classifier clsCopy = Classifier.makeCopy(model); clsCopy.buildClassifier(train); simulated = eval.evaluateModel(clsCopy, test); } if (show_plot) { observed = test.attributeToDoubleArray(test.classIndex()); double[][] d = new double[2][simulated.length]; d[0] = observed; d[1] = simulated; CMatrix f1 = CMatrix.getInstance(d); String[] items = { "Observed", "Simulated" }; attr.items = items; attr.figureCaption = model.getClass().getCanonicalName(); f1.transpose().plot(attr); // if (attr.axis[0].isEmpty() && attr.axis[1].isEmpty()) { // f1.transpose().plot(attr); // } else { // f1.transpose().plot(model.getClass().getCanonicalName(), attr.items, attr.axis); // } } if (show_text) { System.out.println(); System.out.println("=== Setup for Test ==="); System.out.println( "Classifier: " + model.getClass().getName() + " " + Utils.joinOptions(model.getOptions())); System.out.println("Dataset: " + test.relationName()); System.out.println(); System.out.println(eval.toSummaryString("=== Test Results ===", false)); } } catch (Exception ex) { Logger.getLogger(FactoryEvaluation.class.getName()).log(Level.SEVERE, null, ex); } return eval; }
From source file:cezeri.feature.selection.FeatureSelectionInfluence.java
public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) { Influence[] ret = null;//from w w w . j a va 2 s . com try { Instances data = DataSource.read(filePath); ret = new Influence[data.numAttributes() - 1]; data.setClassIndex(data.numAttributes() - 1); // other options int seed = 1; int folds = 10; // randomize data Instances randData = new Instances(data); Random rand = new Random(seed); randData.randomize(rand); Evaluation evalBase = getEvaluation(randData, model, folds); double accBase = evalBase.correct() / evalBase.numInstances() * 100; double nf = randData.numAttributes(); for (int j = 0; j < nf - 1; j++) { ret[j] = new Influence(); String str = randData.attribute(j).name(); Attribute att = randData.attribute(j); randData.deleteAttributeAt(j); Evaluation evalTemp = getEvaluation(randData, model, folds); double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100; double tempInfluence = accBase - accTemp; ret[j].attributeName = str; ret[j].infVal = tempInfluence; randData.insertAttributeAt(att, j); } sortInfluenceArray(ret); } catch (Exception ex) { Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex); } return ret; }
From source file:cezeri.utils.FactoryInstance.java
public static Instances getSubsetData(Instances data, String[] attList) { Instances temp = new Instances(data); for (int i = 0; i < data.numAttributes(); i++) { if (!temp.attribute(0).equals(temp.classAttribute())) { temp.deleteAttributeAt(0);//www. j av a 2 s . com } } double[][] m = new double[attList.length + 1][data.numInstances()]; for (int i = 0; i < attList.length; i++) { int n = attList.length - 1 - i; String str = attList[n]; Attribute t = data.attribute(str); double[] d = data.attributeToDoubleArray(t.index()); m[n] = d; temp.insertAttributeAt(t, 0); } m[attList.length] = data.attributeToDoubleArray(data.classIndex()); m = CMatrix.getInstance(m).transpose().get2DArrayDouble(); FastVector att = new FastVector(); for (int i = 0; i < temp.numAttributes(); i++) { att.addElement(temp.attribute(i)); } Instances ret = new Instances(temp.relationName(), att, m.length); for (int i = 0; i < m.length; i++) { Instance ins = new Instance(m[0].length); for (int j = 0; j < m[0].length; j++) { ins.setValue(j, m[i][j]); } ret.add(ins); } ret.setClassIndex(temp.classIndex()); return ret; }