List of usage examples for weka.core Instances Instances
public Instances(String name, ArrayList<Attribute> attInfo, int capacity)
From source file:br.fapesp.myutils.MyUtils.java
License:Open Source License
/** * Generates a Gaussian data set with K clusters and m dimensions * //from ww w.j av a 2 s . c o m * @param centers * K x m matrix * @param sigmas * K x m matrix * @param pointsPerCluster * number of points per cluster * @param seed * for the RNG * @param randomize * should the order of the instances be randomized? * @param supervised * should class label be present? if true, the class is the m+1 * attribute * * @return */ public static Instances genGaussianDataset(double[][] centers, double[][] sigmas, int pointsPerCluster, long seed, boolean randomize, boolean supervised) { Random r = new Random(seed); int K = centers.length; // number of clusters int m = centers[0].length; // number of dimensions FastVector atts = new FastVector(m); for (int i = 0; i < m; i++) atts.addElement(new Attribute("at" + i)); if (supervised) { FastVector cls = new FastVector(K); for (int i = 0; i < K; i++) cls.addElement("Gauss-" + i); atts.addElement(new Attribute("Class", cls)); } Instances data; if (supervised) data = new Instances(K + "-Gaussians-supervised", atts, K * pointsPerCluster); else data = new Instances(K + "-Gaussians", atts, K * pointsPerCluster); if (supervised) data.setClassIndex(m); Instance ith; for (int i = 0; i < K; i++) { for (int j = 0; j < pointsPerCluster; j++) { if (!supervised) ith = new DenseInstance(m); else ith = new DenseInstance(m + 1); ith.setDataset(data); for (int k = 0; k < m; k++) ith.setValue(k, centers[i][k] + (r.nextGaussian() * sigmas[i][k])); if (supervised) ith.setValue(m, "Gauss-" + i); data.add(ith); } } // run randomization filter if desired if (randomize) data.randomize(r); return data; }
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public Instances parseData(Object objData) { try {/* w ww .ja v a 2 s . c o m*/ Instances dataInstance; DataBag values = (DataBag) objData; int numAttributes = values.iterator().next().size(); // N_Features + 1 Class int bagSize = 0; // To set the number of train samples // To find the number of samples (instances in a bag) for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { it.next(); bagSize = bagSize + 1; } // Code for find the different classes names in the input String[] inputClass = new String[bagSize]; // String vector with the samples class's names int index = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple tuple = it.next(); inputClass[index] = DataType.toString(tuple.get(numAttributes - 1)); index = index + 1; } HashSet classSet = new HashSet(Arrays.asList(inputClass)); String[] classValue = (String[]) classSet.toArray(new String[0]); // To set the classes names in the attribute for the instance FastVector classNames = new FastVector(); for (int i = 0; i < classValue.length; i++) classNames.addElement(classValue[i]); // Creating the instance model N_Features + 1_ClassNames FastVector atts = new FastVector(); for (int i = 0; i < numAttributes - 1; i++) atts.addElement(new Attribute("att" + i)); dataInstance = new Instances("MyRelation", atts, numAttributes); dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1); // To set the instance values for the dataInstance model created Instance tmpData = new DenseInstance(numAttributes); index = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple tuple = it.next(); for (int i = 0; i < numAttributes - 1; i++) tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i))); //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1))); dataInstance.add(tmpData); dataInstance.instance(index).setValue(numAttributes - 1, DataType.toString(tuple.get(numAttributes - 1))); index = index + 1; } // Setting the class index dataInstance.setClassIndex(dataInstance.numAttributes() - 1); return dataInstance; } catch (Exception e) { System.err.println("Failed to process input; error - " + e.getMessage()); return null; } }
From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public Instances parseData(BufferedReader buff) { try {/*from ww w . ja v a 2 s.co m*/ Instances dataInstance; //DataBag values = (DataBag)objData; int numAttributes = 0; // N_Features + 1 Class List<String> inputClass = new ArrayList<String>(); List<String[]> dataset = new ArrayList<String[]>(); // To find the number of samples (instances in a bag) String line; while ((line = buff.readLine()) != null) { if (!line.isEmpty()) { String[] data = line.split(","); if (numAttributes == 0) numAttributes = data.length; inputClass.add(data[data.length - 1]); dataset.add(data); } } HashSet classSet = new HashSet(inputClass); String[] classValue = (String[]) classSet.toArray(new String[0]); // To set the classes names in the attribute for the instance FastVector classNames = new FastVector(); for (int i = 0; i < classValue.length; i++) classNames.addElement(classValue[i]); // Creating the instance model N_Features + 1_ClassNames FastVector atts = new FastVector(); for (int i = 0; i < numAttributes - 1; i++) atts.addElement(new Attribute("att" + i)); dataInstance = new Instances("MyRelation", atts, numAttributes); dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1); // To set the instance values for the dataInstance model created Instance tmpData = new DenseInstance(numAttributes); int index = 0; for (int k = 0; k < dataset.size(); k++) { for (int i = 0; i < numAttributes - 1; i++) tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i])); //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1))); dataInstance.add(tmpData); dataInstance.instance(index).setValue(numAttributes - 1, DataType.toString(dataset.get(k)[numAttributes - 1])); index = index + 1; } // Setting the class index dataInstance.setClassIndex(dataInstance.numAttributes() - 1); return dataInstance; } catch (Exception e) { System.err.println("Failed to process input; error - " + e.getMessage()); return null; } }
From source file:br.ufpe.cin.mpos.offload.DynamicDecisionSystem.java
License:Apache License
public synchronized boolean isRemoteAdvantage(int InputSize, Remotable.Classifier classifierRemotable) { boolean resp = false; try {/* www . j av a 2 s .c o m*/ if ((!(this.classifierModel.equals(classifierRemotable.toString()))) || this.classifier == null) { Log.d("classificacao", "classificador=" + classifierRemotable.toString()); this.classifierModel = classifierRemotable.toString(); loadClassifier(classifierRemotable); } Cursor c = dc.getData(); int colunas = c.getColumnCount(); Instance instance = new DenseInstance(colunas - 2); ArrayList<String> values = new ArrayList<String>(); ArrayList<Attribute> atts = new ArrayList<Attribute>(); if (c.moveToFirst()) { for (int i = 1; i <= colunas - 2; i++) { String feature = c.getColumnName(i); String value = c.getString(i); Attribute attribute; if (feature.equals(DatabaseManager.InputSize)) { values.add("" + InputSize); attribute = new Attribute(DatabaseManager.InputSize); } else { String[] strings = populateAttributes(i); ArrayList<String> attValues = new ArrayList<String>(Arrays.asList(strings)); attribute = new Attribute(feature, attValues); if (value != null) { values.add(value); } } atts.add(attribute); } Instances instances = new Instances("header", atts, atts.size()); instances.setClassIndex(instances.numAttributes() - 1); instance.setDataset(instances); for (int i = 0; i < atts.size(); i++) { if (i == 9) { instance.setMissing(atts.get(9)); } else if (atts.get(i).name().equals(DatabaseManager.InputSize)) { instance.setValue(atts.get(i), InputSize); } else { instance.setValue(atts.get(i), values.get(i)); } } double value = -1; value = classifier.distributionForInstance(instance)[0]; Log.d("classificacao", instance.toString() + " classifiquei com o seguinte valor" + value); resp = (0.7 <= value); if (resp) { Log.d("classificacao", "sim"); Log.d("Finalizado", "classifiquei " + instance.toString() + " com sim"); } else { Log.d("classificacao", "nao"); Log.d("Finalizado", "classifiquei " + instance.toString() + " com nao"); } } } catch (Exception e) { e.printStackTrace(); Log.e("sqlLite", e.getMessage()); Log.e("sqlLite", "Causa: " + e.getCause()); } return resp; }
From source file:br.unicamp.ic.recod.gpsi.gp.gpsiJGAPRoiFitnessFunction.java
@Override protected double evaluate(IGPProgram igpp) { double mean_accuracy = 0.0; Object[] noargs = new Object[0]; gpsiRoiBandCombiner roiBandCombinator = new gpsiRoiBandCombiner(new gpsiJGAPVoxelCombiner(super.b, igpp)); // TODO: The ROI descriptors must combine the images first //roiBandCombinator.combineEntity(this.dataset.getTrainingEntities()); gpsiMLDataset mlDataset = new gpsiMLDataset(this.descriptor); try {// w w w . j av a 2 s. com mlDataset.loadWholeDataset(this.dataset, true); } catch (Exception ex) { Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex); } int dimensionality = mlDataset.getDimensionality(); int n_classes = mlDataset.getTrainingEntities().keySet().size(); int n_entities = mlDataset.getNumberOfTrainingEntities(); ArrayList<Byte> listOfClasses = new ArrayList<>(mlDataset.getTrainingEntities().keySet()); Attribute[] attributes = new Attribute[dimensionality]; FastVector fvClassVal = new FastVector(n_classes); int i, j; for (i = 0; i < dimensionality; i++) attributes[i] = new Attribute("f" + Integer.toString(i)); for (i = 0; i < n_classes; i++) fvClassVal.addElement(Integer.toString(listOfClasses.get(i))); Attribute classes = new Attribute("class", fvClassVal); FastVector fvWekaAttributes = new FastVector(dimensionality + 1); for (i = 0; i < dimensionality; i++) fvWekaAttributes.addElement(attributes[i]); fvWekaAttributes.addElement(classes); Instances instances = new Instances("Rel", fvWekaAttributes, n_entities); instances.setClassIndex(dimensionality); Instance iExample; for (byte label : mlDataset.getTrainingEntities().keySet()) { for (double[] featureVector : mlDataset.getTrainingEntities().get(label)) { iExample = new Instance(dimensionality + 1); for (j = 0; j < dimensionality; j++) iExample.setValue(i, featureVector[i]); iExample.setValue(dimensionality, label); instances.add(iExample); } } int folds = 5; Random rand = new Random(); Instances randData = new Instances(instances); randData.randomize(rand); Instances trainingSet, testingSet; Classifier cModel; Evaluation eTest; try { for (i = 0; i < folds; i++) { cModel = (Classifier) new SimpleLogistic(); trainingSet = randData.trainCV(folds, i); testingSet = randData.testCV(folds, i); cModel.buildClassifier(trainingSet); eTest = new Evaluation(trainingSet); eTest.evaluateModel(cModel, testingSet); mean_accuracy += eTest.pctCorrect(); } } catch (Exception ex) { Logger.getLogger(gpsiJGAPRoiFitnessFunction.class.getName()).log(Level.SEVERE, null, ex); } mean_accuracy /= (folds * 100); return mean_accuracy; }
From source file:c4.pkg5crossv.Classifier.java
public static void trainAndTest() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); data.setClassIndex(data.numAttributes() - 1); //Losowy podzial tablicy data.randomize(new Random()); double percent = 60.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options);//from ww w. j ava2 s .c o m tree.buildClassifier(trainData); Evaluation eval2 = new Evaluation(trainData); eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation }
From source file:ca.uottawa.balie.WekaLearner.java
License:Open Source License
/** * Creates a new classification algorithm. * //from w w w.j a v a 2 s . co m * @param pi_Attributes Array of attributes * @param pi_ClassAttributes Class attribute */ public WekaLearner(WekaAttribute[] pi_Attributes, String[] pi_ClassAttributes) { m_ClassAttributes = pi_ClassAttributes; m_AttributeLabels = new String[pi_Attributes.length]; // Create the Weka Attributes m_WekaAttributes = new FastVector(pi_Attributes.length + 1); for (int i = 0; i != pi_Attributes.length; ++i) { if (pi_Attributes[i].Label().equals(CLASS_LABEL)) { throw new Error("Attribute cannot be named \"Class\" (reserved)"); } m_AttributeLabels[i] = pi_Attributes[i].Label(); if (pi_Attributes[i].IsNumeric()) { Attribute anAttribute = new Attribute(pi_Attributes[i].Label()); m_WekaAttributes.addElement(anAttribute); } else { Attribute anAttribute = new Attribute(pi_Attributes[i].Label(), pi_Attributes[i].Values()); m_WekaAttributes.addElement(anAttribute); } } // Create The class attribute FastVector fvClass = new FastVector(pi_ClassAttributes.length); for (int i = 0; i != pi_ClassAttributes.length; ++i) { fvClass.addElement(pi_ClassAttributes[i]); } Attribute aClass = new Attribute(CLASS_LABEL, fvClass); m_WekaAttributes.addElement(aClass); m_TrainingSet = new Instances("", m_WekaAttributes, 0); m_TrainingSet.setClassIndex(aClass.index()); m_TestingSet = new Instances("", m_WekaAttributes, 0); m_TestingSet.setClassIndex(aClass.index()); // create an empty confusion matrix.. will be populated at evaluation time m_ConfusionMatrix = new double[0][0]; m_bDoubleOnly = false; }
From source file:ca.uottawa.balie.WekaLearner.java
License:Open Source License
public WekaLearner(FastVector attrsMerged, String[] attrlblMerged, String[] classList, Instances trainMerged) { m_AttributeLabels = attrlblMerged;// w w w . java 2 s .c o m m_ClassAttributes = classList; m_WekaAttributes = attrsMerged; m_TrainingSet = trainMerged; m_TestingSet = new Instances("", m_WekaAttributes, 0); m_TestingSet.setClassIndex(m_WekaAttributes.size() - 1); }
From source file:categorization.SpectralWEKA.java
License:Open Source License
public void buildClusterer(ArrayList<String> seqDB, double[][] sm) { seqList = seqDB;/*ww w .j a v a 2 s.c om*/ this.setSimMatrix(sm); Attribute seqString = new Attribute("sequence", (FastVector) null); FastVector attrInfo = new FastVector(); attrInfo.addElement(seqString); Instances data = new Instances("data", attrInfo, 0); for (int i = 0; i < seqList.size(); i++) { Instance currentInst = new Instance(1); currentInst.setDataset(data); currentInst.setValue(0, seqList.get(i)); data.add(currentInst); } try { buildClusterer(data); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:cezeri.utils.FactoryInstance.java
public static Instances generateInstances(String relationName, int nCols) { CMatrix cm = CMatrix.getInstance().zeros(1, nCols); FastVector att = new FastVector(); for (int i = 0; i < cm.getColumnNumber(); i++) { att.addElement(new Attribute("f" + (i + 1))); }/* w w w . j ava2 s . c o m*/ Instances ret = new Instances(relationName, att, cm.getRowNumber()); for (int i = 0; i < cm.getRowNumber(); i++) { Instance ins = new Instance(cm.getColumnNumber()); for (int j = 0; j < cm.getColumnNumber(); j++) { ins.setValue(j, cm.get2DArrayDouble()[i][j]); } ret.add(ins); } ret.setClassIndex(ret.numAttributes() - 1); return ret; }