List of usage examples for weka.core Instances setClassIndex
public void setClassIndex(int classIndex)
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public Instances runExp(Instances samplePoints, String perfAttName) { Instances retVal = null; if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); }//from w w w . j ava 2s .com int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println( "There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); return retVal; }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
@Override public Instances collectPerfs(Instances samplePoints, String perfAttName) { Instances retVal = null; if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); }/*from w ww. j a v a2 s . c om*/ File perfFolder = new File(perfsfilepath); int tot = 0; if (perfFolder.exists()) { //let's get all the name set for the sample points Iterator<Instance> itr = samplePoints.iterator(); TreeSet<String> insNameSet = new TreeSet<String>(); HashMap<String, Integer> mapping = new HashMap<String, Integer>(); int pos = 0; while (itr.hasNext()) { String mdstr = getMD5(itr.next()); insNameSet.add(mdstr); mapping.put(mdstr, new Integer(pos++)); } //now we collect File[] perfFiles = perfFolder.listFiles(new PerfsFileFilter(insNameSet)); tot = perfFiles.length; if (tot > 0) isInterrupt = true; for (int i = 0; i < tot; i++) { Instance ins = samplePoints.get(mapping.get(perfFiles[i].getName())); double[] results = getPerf(perfFiles[i].getAbsolutePath()); if (results != null) { ins.setValue(samplePoints.numAttributes() - 1, results[0]); } } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); System.out.println("Total number of collected performances is : " + tot); return retVal; }
From source file:com.actelion.research.orbit.imageAnalysis.models.OrbitModel.java
License:Open Source License
/** * convert models from old weka version/* w w w . ja v a 2s . c o m*/ * * @param model */ public static void fixOldModelVersion(final OrbitModel model) { if (model == null) return; // nothing to fix boolean oldWekaVersion = false; try { model.getStructure().classAttribute().numValues(); } catch (NullPointerException ne) { oldWekaVersion = true; } // apply old model fix? if (oldWekaVersion) { logger.info("model from old weka version (< 3.7.11) detected, trying to apply fixes"); int numClasses = model.getClassShapes().size(); TissueFeatures tf = new TissueFeatures(model.getFeatureDescription(), null); int numFeatures = tf.getFeaturesPerSample() * model.getFeatureDescription().getSampleSize() + 1; ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(numFeatures); for (int a = 0; a < numFeatures - 1; a++) { Attribute attr = new Attribute("a" + a); attrInfo.add(attr); } List<String> classValues = new ArrayList<String>(numClasses); for (int i = 0; i < numClasses; i++) { classValues.add((i + 1) + ".0"); // "1.0", "2.0", ... } Attribute classAttr = new Attribute("class", classValues); attrInfo.add(classAttr); Instances structure = new Instances("trainSet pattern classes", attrInfo, 0); structure.setClassIndex(numFeatures - 1); model.setStructure(structure); try { if (model.getClassifier() != null && model.getClassifier().getClassifier() != null && model.getClassifier().getClassifier() instanceof SMO) { SMO smo = ((SMO) model.getClassifier().getClassifier()); Field field = smo.getClass().getDeclaredField("m_classAttribute"); field.setAccessible(true); field.set(smo, classAttr); // missing values ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(structure); Field missing = smo.getClass().getDeclaredField("m_Missing"); missing.setAccessible(true); missing.set(smo, rmv); // filter Field filter = smo.getClass().getDeclaredField("m_Filter"); filter.setAccessible(true); Filter normalize = (Filter) filter.get(smo); RelationalLocator relLoc = new RelationalLocator(structure); StringLocator strLoc = new StringLocator(structure); Field outputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputRelAtts"); outputRelAtts.setAccessible(true); outputRelAtts.set(normalize, relLoc); Field inputRelAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputRelAtts"); inputRelAtts.setAccessible(true); inputRelAtts.set(normalize, relLoc); Field outputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputStringAtts"); outputStrAtts.setAccessible(true); outputStrAtts.set(normalize, strLoc); Field inputStrAtts = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_InputStringAtts"); inputStrAtts.setAccessible(true); inputStrAtts.set(normalize, strLoc); Field outputFormat = normalize.getClass().getSuperclass().getSuperclass() .getDeclaredField("m_OutputFormat"); outputFormat.setAccessible(true); outputFormat.set(normalize, structure); logger.info("fixes applied, the model should work with a weka version >= 3.7.11 now"); } // else: good luck... } catch (Exception e) { e.printStackTrace(); logger.error("new weka version fixes could not be applied: " + e.getMessage()); } } // old weka version fixOldModelVersion(model.getSegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getSecondarySegmentationModel()); // fixOldModelVersion can handle null fixOldModelVersion(model.getExclusionModel()); // fixOldModelVersion can handle null }
From source file:com.daniel.convert.IncrementalClassifier.java
License:Open Source License
/** * Expects an ARFF file as first argument (class attribute is assumed to be * the last attribute).// w w w .jav a 2 s. c o m * * @param args * the commandline arguments * @throws Exception * if something goes wrong */ public static BayesNet treinar(String[] args) throws Exception { // load data ArffLoader loader = new ArffLoader(); loader.setFile(new File(args[0])); Instances structure = loader.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); // train NaiveBayes BayesNet BayesNet = new BayesNet(); Instance current; while ((current = loader.getNextInstance(structure)) != null) { structure.add(current); } BayesNet.buildClassifier(structure); // output generated model // System.out.println(nb); // test set BayesNet BayesNetTest = new BayesNet(); // test the model Evaluation eTest = new Evaluation(structure); // eTest.evaluateModel(nb, structure); eTest.crossValidateModel(BayesNetTest, structure, 15, new Random(1)); // Print the result la Weka explorer: String strSummary = eTest.toSummaryString(); System.out.println(strSummary); return BayesNet; }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetFeatureExtractor.java
License:Apache License
/** * Method which contructs the arff file for weka with the training data */// w ww .j a v a 2 s. c om public static void constructModel() { Instances instdata = null; try { FastVector atts; atts = new FastVector(); atts.addElement(new Attribute("content", (FastVector) null)); FastVector fvClassVal = new FastVector(4); fvClassVal.addElement(""); fvClassVal.addElement("neutral"); fvClassVal.addElement("negative"); fvClassVal.addElement("positive"); Attribute ClassAttribute = new Attribute("Class", fvClassVal); atts.addElement(ClassAttribute); instdata = new Instances("tweetData", atts, 0); CsvReader data = new CsvReader("../classified data/traindata.csv"); int i = 0; while (data.readRecord()) { double[] vals = new double[instdata.numAttributes()]; String class_id = data.get(0); switch (Integer.parseInt(class_id)) { case 0: class_id = "negative"; break; case 2: class_id = "neutral"; break; case 4: class_id = "positive"; break; } String tweet_content = data.get(5); Instance iInst = new Instance(2); iInst.setValue((Attribute) atts.elementAt(0), tweet_content); iInst.setValue((Attribute) atts.elementAt(1), class_id); instdata.add(iInst); System.out.println("[" + i + "] " + class_id + ":" + tweet_content); i++; } data.close(); StringToWordVector filter = new StringToWordVector(); instdata.setClassIndex(instdata.numAttributes() - 1); filter.setInputFormat(instdata); Instances newdata = Filter.useFilter(instdata, filter); ArffSaver saver = new ArffSaver(); saver.setInstances(newdata); saver.setFile(new File("./data/train2data.arff")); saver.writeBatch(); } catch (Exception ex) { Logger.getLogger(TweetFeatureExtractor.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];/*from ww w .j a v a 2s. c o m*/ String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:com.esda.util.StringToWordVector.java
License:Open Source License
/** * determines the dictionary./*from www . j a v a 2 s. c o m*/ */ private void determineDictionary() { // initialize stopwords Stopwords stopwords = new Stopwords(); if (getUseStoplist()) { try { if (getStopwords().exists() && !getStopwords().isDirectory()) stopwords.read(getStopwords()); } catch (Exception e) { e.printStackTrace(); } } // Operate on a per-class basis if class attribute is set int classInd = getInputFormat().classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = getInputFormat().attribute(classInd).numValues(); } // TreeMap dictionaryArr [] = new TreeMap[values]; TreeMap[] dictionaryArr = new TreeMap[values]; for (int i = 0; i < values; i++) { dictionaryArr[i] = new TreeMap(); } // Make sure we know which fields to convert determineSelectedRange(); // Tokenize all training text into an orderedMap of "words". long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances()); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance instance = getInputFormat().instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } // Iterate through all relevant string attributes of the current // instance Hashtable h = new Hashtable(); for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { // Get tokenizer m_Tokenizer.tokenize(instance.stringValue(j)); // Iterate through tokens, perform stemming, and remove // stopwords // (if required) while (m_Tokenizer.hasMoreElements()) { String word = ((String) m_Tokenizer.nextElement()).intern(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); String[] wordsArr = word.split(" "); StringBuilder stemmedStr = new StringBuilder(); for (String wordStr : wordsArr) { if (!this.m_useStoplist || !stopwords.is(wordStr)) { stemmedStr.append(m_Stemmer.stem(wordStr)); stemmedStr.append(" "); } } /*for (int icounter = 0; icounter < wordsArr.length; icounter++) { stemmedStr += m_Stemmer.stem(wordsArr[icounter]); if (icounter + 1 < wordsArr.length) stemmedStr += " "; }*/ word = stemmedStr.toString().trim(); if (!(h.containsKey(word))) h.put(word, new Integer(0)); Count count = (Count) dictionaryArr[vInd].get(word); if (count == null) { dictionaryArr[vInd].put(word, new Count(1)); } else { count.count++; } } } } // updating the docCount for the words that have occurred in this // instance(document). Enumeration e = h.keys(); while (e.hasMoreElements()) { String word = (String) e.nextElement(); Count c = (Count) dictionaryArr[vInd].get(word); if (c != null) { c.docCount++; } else System.err.println( "Warning: A word should definitely be in the " + "dictionary.Please check the code"); } if (pruneRate > 0) { if (i % pruneRate == 0 && i > 0) { for (int z = 0; z < values; z++) { Vector d = new Vector(1000); Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count <= 1) { d.add(word); } } Iterator iter = d.iterator(); while (iter.hasNext()) { String word = (String) iter.next(); dictionaryArr[z].remove(word); } } } } } // Figure out the minimum required word frequency int totalsize = 0; int prune[] = new int[values]; for (int z = 0; z < values; z++) { totalsize += dictionaryArr[z].size(); int array[] = new int[dictionaryArr[z].size()]; int pos = 0; Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); array[pos] = count.count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Convert the dictionary into an attribute index // and create one attribute per word FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes()); // Add the non-converted attributes int classIndex = -1; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().classIndex() == i) { classIndex = attributes.size(); } attributes.addElement(getInputFormat().attribute(i).copy()); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) TreeMap newDictionary = new TreeMap(); int index = attributes.size(); for (int z = 0; z < values; z++) { Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count >= prune[z]) { if (newDictionary.get(word) == null) { newDictionary.put(word, new Integer(index++)); attributes.addElement(new Attribute(m_Prefix + word)); } } } } // Compute document frequencies m_DocsCounts = new int[attributes.size()]; Iterator it = newDictionary.keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); int idx = ((Integer) newDictionary.get(word)).intValue(); int docsCount = 0; for (int j = 0; j < values; j++) { Count c = (Count) dictionaryArr[j].get(word); if (c != null) docsCount += c.docCount; } m_DocsCounts[idx] = docsCount; } // Trim vector and set instance variables attributes.trimToSize(); m_Dictionary = newDictionary; m_NumInstances = getInputFormat().numInstances(); // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); }
From source file:com.gamerecommendation.Weatherconditions.Clasificacion.java
public String clasificar(String[] testCases) throws Exception { String ruta = "model.model"; InputStream classModelStream; classModelStream = getClass().getResourceAsStream(ruta); Classifier clasify = (Classifier) SerializationHelper.read(classModelStream); FastVector condition = new FastVector(); condition.addElement("Cloudy"); condition.addElement("Clear"); condition.addElement("Sunny"); condition.addElement("Fair"); condition.addElement("Partly_Cloudy"); condition.addElement("Mostly_Cloudy"); condition.addElement("Showers"); condition.addElement("Haze"); condition.addElement("Dust"); condition.addElement("Other"); Attribute _condition = new Attribute("contition", condition); FastVector temperature = new FastVector(); temperature.addElement("Hot"); temperature.addElement("Mild"); temperature.addElement("Cool"); Attribute _temperature = new Attribute("temperature", temperature); FastVector chill = new FastVector(); chill.addElement("Regrettable"); chill.addElement("Mint"); Attribute _chill = new Attribute("chill", chill); FastVector direction = new FastVector(); direction.addElement("Mint"); direction.addElement("Fair"); direction.addElement("Regular"); Attribute _direction = new Attribute("direction", direction); FastVector speed = new FastVector(); speed.addElement("Mint"); speed.addElement("Fair"); speed.addElement("Regular"); Attribute _speed = new Attribute("speed", speed); FastVector humidity = new FastVector(); humidity.addElement("High"); humidity.addElement("Normal"); humidity.addElement("Low"); Attribute _humidity = new Attribute("humidity", humidity); FastVector visibility = new FastVector(); visibility.addElement("Recommended"); visibility.addElement("Not_Recommended"); Attribute _visibility = new Attribute("visibility", visibility); FastVector preassure = new FastVector(); preassure.addElement("Fair"); preassure.addElement("Mint"); Attribute _preassure = new Attribute("preassure", preassure); FastVector Class = new FastVector(); Class.addElement("Recommended"); Class.addElement("Not_Recommended"); Attribute _Class = new Attribute("class", Class); FastVector atributos = new FastVector(9); atributos.addElement(_condition);//from w ww . j a va 2s .c o m atributos.addElement(_temperature); atributos.addElement(_chill); atributos.addElement(_direction); atributos.addElement(_speed); atributos.addElement(_humidity); atributos.addElement(_visibility); atributos.addElement(_preassure); atributos.addElement(_Class); ArrayList<Attribute> atributs = new ArrayList<>(); atributs.add(_condition); atributs.add(_temperature); atributs.add(_chill); atributs.add(_direction); atributs.add(_speed); atributs.add(_humidity); atributs.add(_visibility); atributs.add(_preassure); atributs.add(_Class); //Aqu se crea la instacia, que tiene todos los atributos del modelo Instances dataTest = new Instances("TestCases", atributos, 1); dataTest.setClassIndex(8); Instance setPrueba = new Instance(9); int index = -1; for (int i = 0; i < 8; i++) { index = atributs.get(i).indexOfValue(testCases[i]); //System.out.println(i + " " + atributs.get(i) + " " + index + " " + testCases[i]); setPrueba.setValue(atributs.get(i), index); } //Agregando el set que se desea evaluar. dataTest.add(setPrueba); //Realizando la Prediccin //La instancia es la 0 debido a que es la unica que se encuentra. double valorP = clasify.classifyInstance(dataTest.instance(0)); //get the name of the class value String prediccion = dataTest.classAttribute().value((int) valorP); return prediccion; }
From source file:com.github.fracpete.multisearch.ExampleHelper.java
License:Open Source License
/** * Loads the dataset from disk.//from w ww.ja v a 2s. co m * * @param filename the file to load * @param classIndex the 1-based class index (first and last accepted as well), * uses last attribute if null * @return the dataset * @throws Exception if loading of data fails */ public static Instances loadData(String filename, String classIndex) throws Exception { Instances data = DataSource.read(filename); if (classIndex != null) { if (classIndex.equals("first")) data.setClassIndex(0); else if (classIndex.equals("last")) data.setClassIndex(data.numAttributes() - 1); else data.setClassIndex(Integer.parseInt(classIndex) - 1); } else { data.setClassIndex(data.numAttributes() - 1); } return data; }
From source file:com.github.r351574nc3.amex.assignment2.App.java
License:Open Source License
/** * Loads the MPG data file from UCI/* w w w. j a v a 2 s. c om*/ * * @param {@link String} intances of path of the dataset * @return {@link Instances} instance containing all records of the dataset. */ public Instances load(final String mpgFile) throws Exception { try { final Instances retval = DataSource.read(mpgFile); retval.setClassIndex(0); retval.deleteStringAttributes(); return retval; } catch (Exception e) { error("%s:%s", UNABLE_TO_LOAD_DATA_MESSAGE, e.getClass().getSimpleName()); e.printStackTrace(); } return null; }