List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:assign00.ExperimentShell.java
/** * @param args the command line arguments *//*from w w w . ja va2 s . com*/ public static void main(String[] args) throws Exception { DataSource source = new DataSource(file); Instances dataSet = source.getDataSet(); //Set up data dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Random(1)); //determine sizes int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(training); Instances newTest = Filter.useFilter(test, standardizedData); Instances newTraining = Filter.useFilter(training, standardizedData); NeuralNetworkClassifier NWC = new NeuralNetworkClassifier(); NWC.buildClassifier(newTraining); Evaluation eval = new Evaluation(newTraining); eval.evaluateModel(NWC, newTest); System.out.println(eval.toSummaryString("\nResults\n======\n", false)); }
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java
public Integer classify(Tweet[] tweets) { // TEST/*w w w . ja v a 2 s . c om*/ // Generate two tweet examples Tweet exOne = new Tweet("This is good and fantastic"); exOne.setPreprocessedText("This is good and fantastic"); Tweet exTwo = new Tweet("Horribly, terribly bad and more"); exTwo.setPreprocessedText("Horribly, terribly bad and more"); Tweet exThree = new Tweet( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); exThree.setPreprocessedText( "I want to update lj and read my friends list, but I\\'m groggy and sick and blargh."); Tweet exFour = new Tweet("bad hate worst sick"); exFour.setPreprocessedText("bad hate worst sick"); tweets = new Tweet[] { exOne, exTwo, exThree, exFour }; // TEST // Load model // loadModel(); // Convert Tweet to Instance type // Get String Data // Create attributes for the Instances set Attribute twitter_id = new Attribute("twitter_id"); // Attribute body = new Attribute("body"); FastVector classVal = new FastVector(2); classVal.addElement("pos"); classVal.addElement("neg"); Attribute class_attr = new Attribute("class_attr", classVal); // Add them to a list FastVector attrVector = new FastVector(3); // attrVector.addElement(twitter_id); // attrVector.addElement(new Attribute("body", (FastVector) null)); // attrVector.addElement(class_attr); // Get the number of tweets and then create predictSet int numTweets = tweets.length; Enumeration structAttrs = dataStructure.enumerateAttributes(); // ArrayList<Attribute> attrList = new ArrayList<Attribute>(dataStructure.numAttributes()); while (structAttrs.hasMoreElements()) { attrVector.addElement((Attribute) structAttrs.nextElement()); } Instances predictSet = new Instances("predictInstances", attrVector, numTweets); // Instances predictSet = new Instances(dataStructure); predictSet.setClassIndex(2); // init prediction double prediction = -1; System.out.println("PredictSet matches source structure: " + predictSet.equalHeaders(dataStructure)); System.out.println("PredSet struct: " + predictSet.attribute(0)); System.out.println("PredSet struct: " + predictSet.attribute(1)); System.out.println("PredSet struct: " + predictSet.attribute(2)); // Array to return predictions //double[] tweetsClassified = new double[2][numTweets]; //List<Integer, Double> tweetsClass = new ArrayList<Integer, Double>(numTweets); for (int i = 0; i < numTweets; i++) { String content = (String) tweets[i].getPreprocessedText(); System.out.println("Tweet content: " + content); // attrList Instance tweetInstance = new Instance(predictSet.numAttributes()); tweetInstance.setDataset(predictSet); tweetInstance.setValue(predictSet.attribute(0), i); tweetInstance.setValue(predictSet.attribute(1), content); tweetInstance.setClassMissing(); predictSet.add(tweetInstance); try { // Apply string filter StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(predictSet); Instances filteredPredictSet = Filter.useFilter(predictSet, filter); // Apply model prediction = trainedModel.classifyInstance(filteredPredictSet.instance(i)); filteredPredictSet.instance(i).setClassValue(prediction); System.out.println("Classification: " + filteredPredictSet.instance(i).toString()); System.out.println("Prediction: " + prediction); } catch (Exception ex) { Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex); } } return 0; }
From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java
License:Apache License
public Instances getWekaInstances() { Instances instances = null; List<DActivationPatternPackage> dActivationPatternPackages = mAnalysisChain.getFinalLayers().get(0) .getResultAnalysisPackage().getActivationPatternPackages(); int counter = 0; for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) { if (counter > 0) { throw new RuntimeException("More than one DActivationPatternPackage found!"); }// w w w .j a v a 2 s. co m counter++; DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0); FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length); for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) { Attribute attribute = new Attribute(j + ""); fvWekaAttributes.addElement(attribute); } Set<String> labelSet = getLabelSet(dActivationPatternPackage); FastVector classValues = new FastVector(labelSet.size()); for (String label : labelSet) { classValues.addElement(label); } Attribute classAttribute = new Attribute("Class", classValues); fvWekaAttributes.addElement(classAttribute); instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes, dActivationPatternPackage.getActivationPatterns().size()); instances.setClassIndex(instances.numAttributes() - 1); for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) { DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i); Instance instance = new Instance(fvWekaAttributes.size()); for (int j = 0; j < activationPattern.getRawPattern().length; j++) { instance.setValue((Attribute) fvWekaAttributes.elementAt(j), activationPattern.getRawPattern()[j]); } instance.setDataset(instances); DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData(); List<String> keys = metadata.getMetaDataKeys(); for (int k = 0; k < keys.size(); k++) { if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) { String label = metadata.getMetaDataEntries().get(k); instance.setClassValue(label); break; } } instances.add(instance); } } return instances; }
From source file:at.tuflowgraphy.semanticapps.semdroid.DalvikBaseAnalyzer.java
License:Apache License
protected Instances generateInstancesHeader(DActivationPatternPackage dActivationPatternPackage) { DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0); FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length); for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) { Attribute attribute = new Attribute(j + ""); fvWekaAttributes.addElement(attribute); }// www .ja v a 2 s . co m Set<String> labelSet = getLabelSet(dActivationPatternPackage); FastVector classValues = new FastVector(labelSet.size()); for (String label : labelSet) { classValues.addElement(label); } Attribute classAttribute = new Attribute("Class", classValues); fvWekaAttributes.addElement(classAttribute); Instances instances = new Instances(mAnalysisConfig.getApplicationAnalysisName(), fvWekaAttributes, dActivationPatternPackage.getActivationPatterns().size()); instances.setClassIndex(instances.numAttributes() - 1); return instances; }
From source file:at.tuflowgraphy.semanticapps.semdroid.utils.ArffHelper.java
License:Apache License
public Instances getWekaInstances(AnalysisChain analysisChain, String name) { Instances instances = null; List<DActivationPatternPackage> dActivationPatternPackages = analysisChain.getFinalLayers().get(0) .getResultAnalysisPackage().getActivationPatternPackages(); int counter = 0; for (DActivationPatternPackage dActivationPatternPackage : dActivationPatternPackages) { if (counter > 0) { // String resultFileName = arffFile.getName(); // String newName = resultFileName.split("_")[0]; // int index = resultFileName.indexOf("_"); // newName += "-MISSING-" + counter + "-" // + resultFileName.substring(index); // arffFileToWriteTo = new File(arffFile.getParentFile(), newName); System.err.println("ERROR: Multiple activation pattern packages found! Should not happen..."); }/* ww w . j a va 2s . c o m*/ counter++; DActivationPattern activationPatternTemp = dActivationPatternPackage.getActivationPatterns().get(0); FastVector fvWekaAttributes = new FastVector(activationPatternTemp.getRawPattern().length); for (int j = 0; j < activationPatternTemp.getRawPattern().length; j++) { Attribute attribute = new Attribute(j + ""); fvWekaAttributes.addElement(attribute); } Set<String> labelSet = getLabelSet(dActivationPatternPackage); FastVector classValues = new FastVector(labelSet.size()); for (String label : labelSet) { classValues.addElement(label); } Attribute classAttribute = new Attribute("Class", classValues); fvWekaAttributes.addElement(classAttribute); instances = new Instances(name, fvWekaAttributes, dActivationPatternPackage.getActivationPatterns().size()); instances.setClassIndex(instances.numAttributes() - 1); for (int i = 0; i < dActivationPatternPackage.getActivationPatterns().size(); i++) { DActivationPattern activationPattern = dActivationPatternPackage.getActivationPatterns().get(i); Instance instance = new Instance(fvWekaAttributes.size()); for (int j = 0; j < activationPattern.getRawPattern().length; j++) { instance.setValue((Attribute) fvWekaAttributes.elementAt(j), activationPattern.getRawPattern()[j]); } instance.setDataset(instances); DSimpleStringMetaData metadata = (DSimpleStringMetaData) activationPattern.getMetaData(); List<String> keys = metadata.getMetaDataKeys(); for (int k = 0; k < keys.size(); k++) { if (keys.get(k).equals(DalvikInputPlugin.TAG_LABEL)) { String label = metadata.getMetaDataEntries().get(k); // TODO: dynamically add new labels to instances so that getLabelSet for-loop is not required // System.out.println(label); // if(!labelSet.contains(label)) { // labelSet.add(label); // // classValues.addElement(label); // classAttribute.addStringValue(label); // instances.attribute(instances.classIndex()).addValue(label); // System.out.println("ADDED " + label); // } instance.setClassValue(label); // TODO: only first class value used break; } } instances.add(instance); } } return instances; }
From source file:au.edu.usyd.it.yangpy.sampling.BPSO.java
License:Open Source License
/** * the target function in fitness form/*from w w w . j a va 2 s .c om*/ * * @return classification accuracy */ public double ensembleClassify() { double fitnessValue = 0.0; double classifiersScore = 0.0; /* load in the modified data set */ try { Instances reducedSet = new Instances(new BufferedReader(new FileReader("reduced.arff"))); reducedSet.setClassIndex(reducedSet.numAttributes() - 1); // calculating the evaluation values using each classifier respectively if (verbose == true) { System.out.println(); System.out.println(" |----------J4.8-----------|"); System.out.println(" | | |"); } J48 tree = new J48(); classifiersScore = classify(tree, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |-----3NearestNeighbor----|"); System.out.println(" | | |"); } IBk nn3 = new IBk(3); classifiersScore = classify(nn3, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |--------NaiveBayes-------|"); System.out.println(" | | |"); } NaiveBayes nb = new NaiveBayes(); classifiersScore = classify(nb, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |-------RandomForest------|"); System.out.println(" | | |"); } RandomForest rf5 = new RandomForest(); rf5.setNumTrees(5); classifiersScore = classify(rf5, reducedSet, internalTest); fitnessValue += classifiersScore; if (verbose == true) { System.out.println(); System.out.println(" |---------Logistic--------|"); System.out.println(" | | |"); } Logistic log = new Logistic(); classifiersScore = classify(log, reducedSet, internalTest); fitnessValue += classifiersScore; } catch (IOException ioe) { ioe.printStackTrace(); } fitnessValue /= 5; if (verbose == true) { System.out.println(); System.out.println("Fitness: " + fitnessValue); System.out.println("---------------------------------------------------"); } return fitnessValue; }
From source file:au.edu.usyd.it.yangpy.snp.GEsnpxPara.java
License:Open Source License
/** * this function perform genetic operations * * @param saveFlag append/write to the output file *//*w w w .j a va 2 s . c o m*/ public void performGeneticOperation(int saveFlag) throws Exception { // initialize processing components // loading the raw data Instances rawData = new Instances(new BufferedReader(new FileReader(file))); rawData.setClassIndex(rawData.numAttributes() - 1); ParallelGenetic genetic = new ParallelGenetic(rawData, chroLen, popSize, terGener, mode, balance, diversity, numThread); genetic.initializeParameters(); genetic.initializeChromosomes(); genetic.evaluate(); for (int i = 1; i < genetic.getTerimateGeneration(); i++) { genetic.selectElitism(); genetic.selectUsingTournament(); genetic.crossover(); genetic.mutate(); genetic.generateNewGeneration(); genetic.evaluate(); } if (saveFlag == 0) genetic.saveBestChro(false); else genetic.saveBestChro(true); }
From source file:aw_cluster.myKMeans.java
protected double[] moveCentroid(Instances members) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { vals[j] = members.meanOrMode(j); }/*w ww . j av a 2s .c o m*/ centroid.add(new Instance(1.0, vals)); return vals; }
From source file:binarizer.LayoutAnalysis.java
public double crossValidation(String arffFile) throws Exception { DataSource source = new DataSource(arffFile); Instances trainingData = source.getDataSet(); if (trainingData.classIndex() == -1) trainingData.setClassIndex(trainingData.numAttributes() - 1); NaiveBayes nb = new NaiveBayes(); nb.setUseSupervisedDiscretization(true); Evaluation evaluation = new Evaluation(trainingData); evaluation.crossValidateModel(nb, trainingData, 10, new Random(1)); System.out.println(evaluation.toSummaryString()); return evaluation.errorRate(); }
From source file:binarytreesom.clustering.BinaryTreeSOMClustering.java
/** * Initialize the tree configuration. This implementation considers a complete binary tree of depth h. *//*w w w .j a va2 s . co m*/ private void initialize() throws IOException { //the number of nodes N, is penednt on h. actualy N //h=ln N -> N=2^h Instances instances = readArff(getFilenameARFF()); instances.setClassIndex(-1);//clustering Stuff numberOfInstances = instances.numInstances(); dimensionality = instances.numAttributes(); data = new double[getNumberOfInstances()][getDimensionality()]; weight = new double[getNumberOfNeurons()][getDimensionality()]; //randomly select instances and assign to weight. for (int k = 0; k < getNumberOfNeurons(); k++) { weight[k] = instances.instance(r.nextInt(getNumberOfInstances())).toDoubleArray(); //hard copy of the double array } for (int k = 0; k < getNumberOfInstances(); k++) { data[k] = instances.instance(k).toDoubleArray(); //hard copy of the double array } }