List of usage examples for weka.core Instances renameAttribute
public void renameAttribute(Attribute att, String name)
From source file:app.RunApp.java
License:Open Source License
/** * Transform multi-label dataset into one or more multi-class or binary datasets * /*from ww w . ja va2s . c om*/ * @return Positive number if successfull and negative otherwise */ private int transform() { if (dataset == null) { JOptionPane.showMessageDialog(null, "You must load a dataset.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } transformedDatasets.clear(); if (radioBRTrans.isSelected()) { BinaryRelevanceTransformation BRTrans = new BinaryRelevanceTransformation(dataset); for (int i = 0; i < dataset.getNumLabels(); i++) { try { LabelsMetaDataImpl newLMD = (LabelsMetaDataImpl) dataset.getLabelsMetaData().clone(); for (int j = 0; j < dataset.getNumLabels(); j++) { if (i != j) { newLMD.removeLabelNode(dataset.getLabelNames()[j]); } } Instances inst = BRTrans.transformInstances(i); inst.renameAttribute(inst.classIndex(), dataset.getLabelNames()[i]); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } } else if (radioLPTrans.isSelected()) { try { LabelPowersetTransformation LPTrans = new LabelPowersetTransformation(); Instances inst = LPTrans.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } else if (radioRemoveLabelsTrans.isSelected()) { try { Instances inst = RemoveAllLabels.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } else if (radioIncludeLabelsTrans.isSelected()) { try { IncludeLabelsTransformation includeTrans = new IncludeLabelsTransformation(); Instances inst = includeTrans.transformInstances(dataset); transformedDatasets.add(inst); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); return -1; } } jButtonSaveDatasetsTrans.setEnabled(true); return 1; }
From source file:at.aictopic1.sentimentanalysis.machinelearning.impl.TwitterClassifer.java
public Instances loadTrainingData() { try {//w w w .j a v a 2s . c o m //DataSource source = new DataSource("C:\\Users\\David\\Documents\\Datalogi\\TU Wien\\2014W_Advanced Internet Computing\\Labs\\aic_group2_topic1\\Other Stuff\\training_dataset.arff"); DataSource source = new DataSource( "C:\\Users\\David\\Documents\\Datalogi\\TU Wien\\2014W_Advanced Internet Computing\\Labs\\Data sets\\labelled.arff"); // System.out.println("Data Structure pre processing: " + source.getStructure()); Instances data = source.getDataSet(); // Get and save the dataStructure of the dataset dataStructure = source.getStructure(); try { // Save the datastructure to file // serialize dataStructure weka.core.SerializationHelper.write(modelDir + algorithm + ".dataStruct", dataStructure); } catch (Exception ex) { Logger.getLogger(TwitterClassifer.class.getName()).log(Level.SEVERE, null, ex); } // Set class index data.setClassIndex(2); // Giving attributes unique names before converting strings data.renameAttribute(2, "class_attr"); data.renameAttribute(0, "twitter_id"); // Convert String attribute to Words using filter StringToWordVector filter = new StringToWordVector(); filter.setInputFormat(data); Instances filteredData = Filter.useFilter(data, filter); System.out.println("filteredData struct: " + filteredData.attribute(0)); System.out.println("filteredData struct: " + filteredData.attribute(1)); System.out.println("filteredData struct: " + filteredData.attribute(2)); return filteredData; } catch (Exception ex) { System.out.println("Error loading training set: " + ex.toString()); return null; //Logger.getLogger(Trainer.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:de.uni_potsdam.hpi.bpt.promnicat.analysisModules.clustering.ProcessInstances.java
License:Open Source License
/** * Method for testing this class./* w ww.j av a 2 s. c o m*/ * * @param argv * should contain one element: the name of an ARFF file */ // @ requires argv != null; // @ requires argv.length == 1; // @ requires argv[0] != null; public static void test(String[] argv) { ProcessInstances instances, secondInstances, train, test, empty; Random random = new Random(2); Reader reader; int start, num; FastVector testAtts, testVals; int i, j; try { if (argv.length > 1) { throw (new Exception("Usage: ProcessInstances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new ProcessInstances("test_set", testAtts, new FastVector(), 10); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.addInstance(new ProcessInstance(instances.numAttributes())); instances.setClassIndex(0); System.out.println("\nSet of instances created from scratch:\n"); System.out.println(instances); if (argv.length == 1) { String filename = argv[0]; reader = new FileReader(filename); // Read first five instances and print them System.out.println("\nFirst five instances from file:\n"); instances = new ProcessInstances(reader, 1); instances.setClassIndex(instances.numAttributes() - 1); i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } System.out.println(instances); // Read all the instances in the file reader = new FileReader(filename); instances = new ProcessInstances(reader); // Make the last attribute be the class instances.setClassIndex(instances.numAttributes() - 1); // Print header and instances. System.out.println("\nDataset:\n"); System.out.println(instances); System.out.println("\nClass index: " + instances.classIndex()); } // Test basic methods based on class index. System.out.println("\nClass name: " + instances.classAttribute().name()); System.out.println("\nClass index: " + instances.classIndex()); System.out.println("\nClass is nominal: " + instances.classAttribute().isNominal()); System.out.println("\nClass is numeric: " + instances.classAttribute().isNumeric()); System.out.println("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.out.println(instances.classAttribute().value(i)); } System.out.println("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { ProcessInstance inst = instances.getInstance(i); System.out.print(inst.classValue() + "\t"); System.out.print(inst.toString(inst.classIndex())); if (instances.getInstance(i).classIsMissing()) { System.out.println("\tis missing"); } else { System.out.println(); } } // Create random weights. System.out.println("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.getInstance(i).setWeight(random.nextDouble()); } // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Insert an attribute secondInstances = new ProcessInstances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.out.println("\nSet with inserted attribute:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.out.println("\nSet with attribute deleted:\n"); System.out.println(secondInstances); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.out.println("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.out.println("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.getInstance(i).isMissing(j)) { System.out.print("? "); } else { System.out.print(instances.getInstance(i).value(j) + " "); } } System.out.println(); } // Just print header System.out.println("\nEmpty dataset:\n"); empty = new ProcessInstances(instances, 0); System.out.println(empty); System.out.println("\nClass name: " + empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().isNominal()) { Instances copy = new ProcessInstances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value(0), "new_val_name"); System.out.println("\nDataset with names changed:\n" + copy); System.out.println("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.out.print("\nSubset of dataset: "); System.out.println(num + " instances from " + (start + 1) + ". instance"); secondInstances = new ProcessInstances(instances, start, num); System.out.println("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(secondInstances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.out.println("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().isNominal()) { instances.stratify(3); } for (j = 0; j < 3; j++) { train = instances.trainCV(3, j, new Random(1)); test = instances.testCV(3, j); // Print all instances and their weights (and the sum of // weights). System.out.println("\nTrain: "); System.out.println("\nInstances and their weights:\n"); System.out.println(train.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(train.sumOfWeights()); System.out.println("\nClass name: " + train.classAttribute().name()); System.out.println("\nTest: "); System.out.println("\nInstances and their weights:\n"); System.out.println(test.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(test.sumOfWeights()); System.out.println("\nClass name: " + test.classAttribute().name()); } // Randomize instances and print them. System.out.println("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.out.print("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.out.println("\nInstances and their weights:\n"); System.out.println(instances.instancesAndWeights()); System.out.print("\nSum of weights: "); System.out.println(instances.sumOfWeights()); } catch (Exception e) { e.printStackTrace(); } }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * /*from ww w . j av a2s .c om*/ * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public void trainOneVsAll(String modelpath, String prefix) throws Exception { Instances orig = new Instances(traindata); Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); String classAtt = traindata.classAttribute().name(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); System.err.println("trainer onevsall for class " + v + " classifier"); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } // copy instances and set the same class value Instances ovsa = new Instances(orig); //create a new class attribute // // Declare the class attribute along with its values ArrayList<String> classVal = new ArrayList<String>(); classVal.add("dummy"); //needed because of weka's sparse data format problems... classVal.add(v); classVal.add("UNKNOWN"); ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes()); //change all instance labels that have not the current class value to "other" for (int i = 0; i < ovsa.numInstances(); i++) { Instance inst = ovsa.instance(i); String instClass = inst.stringValue(ovsa.attribute(classAtt).index()); if (instClass.equalsIgnoreCase(v)) { inst.setValue(ovsa.attribute(classAtt + "2").index(), v); } else { inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN"); } } //delete the old class attribute and set the new. ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index()); ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index()); ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt); ovsa.setClassIndex(ovsa.attribute(classAtt).index()); //build the classifier, crossvalidate and store the model setTraindata(ovsa); saveModel(modelpath + File.separator + prefix + "_" + v + ".model"); setTestdata(ovsa); testModel(modelpath + File.separator + prefix + "_" + v + ".model"); System.err.println("trained onevsall " + v + " classifier"); } setTraindata(orig); }
From source file:meka.classifiers.multilabel.MULAN.java
License:Open Source License
@Override public void buildClassifier(Instances instances) throws Exception { testCapabilities(instances);/*from w ww . j a v a 2 s .c om*/ long before = System.currentTimeMillis(); if (getDebug()) System.err.print(" moving target attributes to the beginning ... "); Random r = instances.getRandomNumberGenerator(0); String name = "temp_" + MLUtils.getDatasetName(instances) + "_" + r.nextLong() + ".arff"; System.err.println("Using temporary file: " + name); int L = instances.classIndex(); // rename attributes, because MULAN doesn't deal well with hypens etc for (int i = L; i < instances.numAttributes(); i++) { instances.renameAttribute(i, "a_" + i); } BufferedWriter writer = new BufferedWriter(new FileWriter(name)); m_InstancesTemplate = F.meka2mulan(new Instances(instances), L); writer.write(m_InstancesTemplate.toString()); writer.flush(); writer.close(); MultiLabelInstances train = new MultiLabelInstances(name, L); try { new File(name).delete(); } catch (Exception e) { System.err.println( "[Error] Failed to delete temporary file: " + name + ". You may want to delete it manually."); } if (getDebug()) System.out.println(" done "); long after = System.currentTimeMillis(); System.err.println("[Note] Discount " + ((after - before) / 1000.0) + " seconds from this build time"); m_InstancesTemplate = new Instances(train.getDataSet(), 0); System.out.println("CLASSIFIER " + m_Classifier); //m_InstancesTemplate.delete(); if (m_MethodString.equals("BR")) m_MULAN = new BinaryRelevance(m_Classifier); else if (m_MethodString.equals("LP")) m_MULAN = new LabelPowerset(m_Classifier); else if (m_MethodString.equals("CLR")) m_MULAN = new CalibratedLabelRanking(m_Classifier); else if (m_MethodString.equals("RAkEL1")) { m_MULAN = new RAkEL(new LabelPowerset(m_Classifier), 10, L / 2); System.out.println("m=10,k=" + (L / 2)); } else if (m_MethodString.equals("RAkEL2")) { m_MULAN = new RAkEL(new LabelPowerset(m_Classifier), 2 * L, 3); System.out.println("m=" + (L * 2) + ",k=3"); } else if (m_MethodString.equals("MLkNN")) m_MULAN = new MLkNN(10, 1.0); else if (m_MethodString.equals("IBLR_ML")) m_MULAN = new IBLR_ML(10); else if (m_MethodString.equals("BPMLL")) { //BPMLL is run withthe number of hidden units equal to 20% of the input units. m_MULAN = new BPMLL(); ((BPMLL) m_MULAN).setLearningRate(0.01); ((BPMLL) m_MULAN).setHiddenLayers(new int[] { 30 }); ((BPMLL) m_MULAN).setTrainingEpochs(100); } else if (m_MethodString.startsWith("HOMER")) { //Class m = Class.forName("HierarchyBuilder.Method.Random"); //Class w = Class.forName("mulan.classifier.LabelPowerset"); //Constructor c = new h.getConstructor(new Class[]{MultiLabelLearner.class, Integer.TYPE, HierarchyBuilder.Method.class}); //Object obj = h.newInstance(); String ops[] = m_MethodString.split("\\."); // number of clusters int n = 3; try { n = Integer.parseInt(ops[2]); } catch (Exception e) { System.err.println("[Warning] Could not parse number of clusters, using default: " + n); } // learner // @TODO use reflection here MultiLabelLearner mll = new LabelPowerset(m_Classifier); if (ops[3].equalsIgnoreCase("BinaryRelevance")) { mll = new BinaryRelevance(m_Classifier); } else if (ops[3].equalsIgnoreCase("ClassifierChain")) { mll = new ClassifierChain(m_Classifier); } else if (ops[3].equalsIgnoreCase("LabelPowerset")) { // already set } else { System.err.println( "[Warning] Did not recognise classifier type String, using default: LabelPowerset"); } if (getDebug()) { System.out.println("HOMER(" + mll + "," + n + "," + ops[1] + ")"); } m_MULAN = new HOMER(mll, n, HierarchyBuilder.Method.valueOf(ops[1])); } else throw new Exception("Could not find MULAN Classifier by that name: " + m_MethodString); m_MULAN.setDebug(getDebug()); m_MULAN.build(train); }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
@Override public Instances process(Instances D) throws Exception { //System.out.println("PROCESS! = "+D.numInstances()); int L = D.classIndex(); D = new Instances(D); // D_ // rename classes for (int j = 0; j < L; j++) { D.renameAttribute(j, encodeClass(j)); }//from w w w . ja va 2 s . c o m // merge labels D = mergeLabels(D, indices, m_P, m_N); // templates x_template = D.firstInstance(); setOutputFormat(D); //System.out.println("PROCESS! => "+D); return D; }
From source file:myclassifier.Util.java
public static Instances setAttributeThreshold(Instances data, Attribute att, int threshold) throws Exception { Instances temp = new Instances(data); Add filter = new Add(); filter.setAttributeName("thresholded " + att.name()); filter.setAttributeIndex(String.valueOf(att.index() + 2)); filter.setNominalLabels("<=" + threshold + ",>" + threshold); filter.setInputFormat(temp);/*ww w . ja v a 2s. c om*/ Instances thresholdedData = Filter.useFilter(data, filter); for (int i = 0; i < thresholdedData.numInstances(); i++) { if ((int) thresholdedData.instance(i).value(thresholdedData.attribute(att.name())) <= threshold) thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()), "<=" + threshold); else thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()), ">" + threshold); } thresholdedData = wekaCode.removeAttributes(thresholdedData, String.valueOf(att.index() + 1)); thresholdedData.renameAttribute(thresholdedData.attribute("thresholded " + att.name()), att.name()); return thresholdedData; }
From source file:org.opentox.jaqpot3.qsar.predictor.MissingValueFilterPredictor.java
License:Open Source License
@Override public Instances predict(Instances data) throws JaqpotException { HashSet<String> ignoredUris = (HashSet<String>) model.getActualModel().getSerializableActualModel(); for (String attribute2Bignored : ignoredUris) { Attribute attr = data.attribute(attribute2Bignored); if (attr != null) { data.deleteAttributeAt(attr.index()); }// ww w. ja v a2s .c o m } updateFeatureMap(model); weka.filters.unsupervised.attribute.ReplaceMissingValues replacer = new ReplaceMissingValues(); try { replacer.setInputFormat(data); } catch (Exception ex) { Logger.getLogger(MissingValueFilterPredictor.class.getName()).log(Level.SEVERE, null, ex); throw new JaqpotException(ex); } Iterator<String> features = featureToMVH.keySet().iterator(); String nextFeature = null; Attribute currentAttribute = null; while (features.hasNext()) { nextFeature = features.next(); currentAttribute = data.attribute(nextFeature); if (currentAttribute == null) { throw new JaqpotException( "The dataset you provided does not contain the necessary " + "feature : " + nextFeature); } data.renameAttribute(currentAttribute, featureToMVH.get(nextFeature)); } return data; }
From source file:org.opentox.jaqpot3.qsar.predictor.PLSPredictor.java
License:Open Source License
@Override public Instances predict(Instances input) throws JaqpotException { PLSModel actual = (PLSModel) model.getActualModel().getSerializableActualModel(); PLSFilter plsFilter = actual.getPls(); Instances newData = InstancesUtil.sortForPMMLModel(model.getIndependentFeatures(), trFieldsAttrIndex, input, -1);/* w ww .j a v a2s .c om*/ try { newData = Filter.useFilter(newData, plsFilter); } catch (Exception ex) { Logger.getLogger(PLSPredictor.class.getName()).log(Level.SEVERE, null, ex); } AttributeCleanup justCompounds = new AttributeCleanup(true, nominal, numeric, string); Instances compounds = null; try { compounds = justCompounds.filter(input); } catch (QSARException ex) { // logger.debug(null, ex); } int i = 0; for (Feature f : model.getPredictedFeatures()) { newData.renameAttribute(i++, f.getUri().toString()); } String target = null; for (Parameter p : model.getParameters()) { if ("target".equals(p.getName().getValueAsString())) { target = p.getValue().toString(); } } newData.renameAttribute(newData.attribute("Class"), target); List<Integer> trFieldsIndex = WekaInstancesProcess.getTransformationFieldsAttrIndex(newData, pmmlObject); newData = WekaInstancesProcess.removeInstancesAttributes(newData, trFieldsIndex); newData = Instances.mergeInstances(compounds, newData); return newData; }
From source file:org.opentox.jaqpot3.qsar.predictor.ScalingPredictor.java
License:Open Source License
@Override public Instances predict(Instances inputData) throws JaqpotException { try {/* w w w. j av a 2 s. c om*/ ScalingModel actualModel = (ScalingModel) model.getActualModel(); Map<String, Double> mins = actualModel.getMinVals2(); Map<String, Double> maxs = actualModel.getMaxVals2(); Attribute attr; for (String attrName : actualModel.getExcludeAttributesDoA()) { attr = inputData.attribute(attrName); if (attr != null) { inputData.deleteAttributeAt(attr.index()); } } updateFeatureMap(model); //int Nattr = inputData.numAttributes(); int Ninst = inputData.numInstances(); Iterator<String> features = featureToScaled.keySet().iterator(); String nextFeature = null; Attribute currentAttribute = null; double currentMin = 0; double currentMax = 1; double currentValue = 0; while (features.hasNext()) { nextFeature = features.next(); currentMin = mins.get(nextFeature); currentMax = maxs.get(nextFeature); currentAttribute = inputData.attribute(nextFeature); for (int iInst = 0; iInst < Ninst; iInst++) { currentValue = inputData.instance(iInst).value(currentAttribute); currentValue = (currentValue - currentMin) / (currentMax - currentMin); inputData.instance(iInst).setValue(currentAttribute, currentValue); } } /* Rename Attributes in `inputData`*/ features = featureToScaled.keySet().iterator(); while (features.hasNext()) { nextFeature = features.next(); currentAttribute = inputData.attribute(nextFeature); if (currentAttribute == null) { throw new JaqpotException("The dataset you provided does not contain the necessary " + "feature : " + nextFeature); } inputData.renameAttribute(currentAttribute, featureToScaled.get(nextFeature)); } return inputData; } catch (Throwable thr) { thr.printStackTrace(); } return null; }