List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static int findBestPerfIndex(Instances data) { int idx = data.numAttributes() - 1; double bestPerf = data.attributeStats(idx).numericStats.max; for (int i = 0; i < data.numInstances(); i++) if (data.get(i).value(idx) == bestPerf) return i; return -1;//should never return -1 }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static void testCOMT2() throws Exception { BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff"); trainingSet.setClassIndex(trainingSet.numAttributes() - 1); Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false); samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes()); samplePoints.setClassIndex(samplePoints.numAttributes() - 1); COMT2 comt = new COMT2(samplePoints, COMT2Iteration); comt.buildClassifier(trainingSet);//ww w . j av a2 s . c o m Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(comt, trainingSet); System.err.println(eval.toSummaryString()); Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance()); Instances bestInstances = new Instances(trainingSet, 2); bestInstances.add(best); DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances); //now we output the training set with the class value updated as the predicted value Instances output = new Instances(trainingSet, trainingSet.numInstances()); Enumeration<Instance> enu = trainingSet.enumerateInstances(); while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); double[] values = ins.toDoubleArray(); values[values.length - 1] = comt.classifyInstance(ins); output.add(ins.copy(values)); } DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output); }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public Instances runExp(Instances samplePoints, String perfAttName) { Instances retVal = null;//from w w w.j a v a 2s. com if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println( "There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); return retVal; }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
@Override public Instances collectPerfs(Instances samplePoints, String perfAttName) { Instances retVal = null;/* w ww .j av a2 s.c o m*/ if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } File perfFolder = new File(perfsfilepath); int tot = 0; if (perfFolder.exists()) { //let's get all the name set for the sample points Iterator<Instance> itr = samplePoints.iterator(); TreeSet<String> insNameSet = new TreeSet<String>(); HashMap<String, Integer> mapping = new HashMap<String, Integer>(); int pos = 0; while (itr.hasNext()) { String mdstr = getMD5(itr.next()); insNameSet.add(mdstr); mapping.put(mdstr, new Integer(pos++)); } //now we collect File[] perfFiles = perfFolder.listFiles(new PerfsFileFilter(insNameSet)); tot = perfFiles.length; if (tot > 0) isInterrupt = true; for (int i = 0; i < tot; i++) { Instance ins = samplePoints.get(mapping.get(perfFiles[i].getName())); double[] results = getPerf(perfFiles[i].getAbsolutePath()); if (results != null) { ins.setValue(samplePoints.numAttributes() - 1, results[0]); } } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); System.out.println("Total number of collected performances is : " + tot); return retVal; }
From source file:cn.ict.zyq.bestConf.COMT2.Branch2.java
License:Open Source License
public Instance maxPoint(Instances dataset) throws Exception { Instance max = new DenseInstance(dataset.numAttributes()); max.setDataset(dataset);/*from w ww.jav a 2s .c o m*/ double[] combinedCoefs = null; int len = 0; for (PreConstructedLinearModel model : linearModelList) { //initialization if (combinedCoefs == null) { len = model.coefficients().length; combinedCoefs = new double[len]; for (int i = 0; i < len; i++) combinedCoefs[i] = 0; } for (int i = 0; i < len; i++) combinedCoefs[i] += model.coefficients()[i]; } //the max value is obtained at ends of a range for (Map.Entry<Attribute, Range<Double>> ent : rangeMap.entrySet()) { int attIdx = ent.getKey().index(); if (combinedCoefs[attIdx] > 0) { //use the upper bound if (ent.getValue().hasUpperBound()) max.setValue(attIdx, ent.getValue().upperEndpoint()); } else if (combinedCoefs[attIdx] < 0) { //use the lower bound if (ent.getValue().hasLowerBound()) max.setValue(attIdx, ent.getValue().lowerEndpoint()); } } //now we set the predicted values double y = 0; for (PreConstructedLinearModel model : linearModelList) { y += model.classifyInstance(max); } y /= linearModelList.size(); max.setClassValue(y); return max; }
From source file:com.daniel.convert.IncrementalClassifier.java
License:Open Source License
/** * Expects an ARFF file as first argument (class attribute is assumed to be * the last attribute).//from w ww. java2 s . com * * @param args * the commandline arguments * @throws Exception * if something goes wrong */ public static BayesNet treinar(String[] args) throws Exception { // load data ArffLoader loader = new ArffLoader(); loader.setFile(new File(args[0])); Instances structure = loader.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); // train NaiveBayes BayesNet BayesNet = new BayesNet(); Instance current; while ((current = loader.getNextInstance(structure)) != null) { structure.add(current); } BayesNet.buildClassifier(structure); // output generated model // System.out.println(nb); // test set BayesNet BayesNetTest = new BayesNet(); // test the model Evaluation eTest = new Evaluation(structure); // eTest.evaluateModel(nb, structure); eTest.crossValidateModel(BayesNetTest, structure, 15, new Random(1)); // Print the result la Weka explorer: String strSummary = eTest.toSummaryString(); System.out.println(strSummary); return BayesNet; }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java
License:Apache License
/** * Method which contructs the arff file for weka with the training data */// w ww. jav a2 s . c o m public static void constructModel() { Instances instdata = null; try { FastVector atts; atts = new FastVector(); atts.addElement(new Attribute("content", (FastVector) null)); FastVector fvClassVal = new FastVector(4); fvClassVal.addElement(""); fvClassVal.addElement("neutral"); fvClassVal.addElement("negative"); fvClassVal.addElement("positive"); Attribute ClassAttribute = new Attribute("Class", fvClassVal); atts.addElement(ClassAttribute); instdata = new Instances("tweetData", atts, 0); CsvReader data = new CsvReader("../classified data/traindata.csv"); int i = 0; while (data.readRecord()) { double[] vals = new double[instdata.numAttributes()]; String class_id = data.get(0); switch (Integer.parseInt(class_id)) { case 0: class_id = "negative"; break; case 2: class_id = "neutral"; break; case 4: class_id = "positive"; break; } String tweet_content = data.get(5); Instance iInst = new Instance(2); iInst.setValue((Attribute) atts.elementAt(0), tweet_content); iInst.setValue((Attribute) atts.elementAt(1), class_id); instdata.add(iInst); System.out.println("[" + i + "] " + class_id + ":" + tweet_content); i++; } data.close(); StringToWordVector filter = new StringToWordVector(); instdata.setClassIndex(instdata.numAttributes() - 1); filter.setInputFormat(instdata); Instances newdata = Filter.useFilter(instdata, filter); ArffSaver saver = new ArffSaver(); saver.setInstances(newdata); saver.setFile(new File("./data/train2data.arff")); saver.writeBatch(); } catch (Exception ex) { Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];//from w w w . j av a 2s. c o m String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:com.github.fracpete.multisearch.ExampleHelper.java
License:Open Source License
/** * Loads the dataset from disk./* ww w.j av a2 s . c o m*/ * * @param filename the file to load * @param classIndex the 1-based class index (first and last accepted as well), * uses last attribute if null * @return the dataset * @throws Exception if loading of data fails */ public static Instances loadData(String filename, String classIndex) throws Exception { Instances data = DataSource.read(filename); if (classIndex != null) { if (classIndex.equals("first")) data.setClassIndex(0); else if (classIndex.equals("last")) data.setClassIndex(data.numAttributes() - 1); else data.setClassIndex(Integer.parseInt(classIndex) - 1); } else { data.setClassIndex(data.numAttributes() - 1); } return data; }
From source file:com.guidefreitas.locator.services.PredictionService.java
public Evaluation train() { try {/* www . j a v a 2 s . c om*/ String arffData = this.generateTrainData(); InputStream stream = new ByteArrayInputStream(arffData.getBytes(StandardCharsets.UTF_8)); DataSource source = new DataSource(stream); Instances data = source.getDataSet(); data.setClassIndex(data.numAttributes() - 1); this.classifier = new LibSVM(); this.classifier.setKernelType(new SelectedTag(LibSVM.KERNELTYPE_POLYNOMIAL, LibSVM.TAGS_KERNELTYPE)); this.classifier.setSVMType(new SelectedTag(LibSVM.SVMTYPE_C_SVC, LibSVM.TAGS_SVMTYPE)); Evaluation eval = new Evaluation(data); eval.crossValidateModel(this.classifier, data, 10, new Random(1)); this.classifier.buildClassifier(data); return eval; } catch (Exception ex) { Logger.getLogger(PredictionService.class.getName()).log(Level.SEVERE, null, ex); } return null; }