List of usage examples for weka.core Instances instance
publicInstance instance(int index)
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier * (E and A) for E#A aspect categories/* w ww . ja v a2s .c o m*/ * @throws Exception */ public final void trainATC2(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String testFile = parsedArguments.getString("testset"); String paramFile2 = parsedArguments.getString("params2"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); boolean onlyTest = parsedArguments.getBoolean("testOnly"); double threshold = 0.5; double threshold2 = 0.5; String modelsPath = "/home/inaki/elixa-atp/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); if (onlyTest) { if (FileUtilsElh.checkFile(testFile)) { System.err.println("read from test file"); reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat, nullSentenceOpinions, lang); atcTrain.setCorpus(reader); traindata = atcTrain.loadInstances(true, "atc"); } } //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); //WekaWrapper classifyAtts; WekaWrapper onevsall; try { //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //onevsall Instances entdata = new Instances(traindata); entdata.deleteAttributeAt(entdata.attribute("attCat").index()); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); entdata.setClassIndex(entdata.attribute("entCat").index()); onevsall = new WekaWrapper(entdata, true); if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all models ready"); } onevsall.setTestdata(entdata); HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all predictions ready"); HashMap<Integer, String> instOps = new HashMap<Integer, String>(); for (String oId : atcTrain.getOpinInst().keySet()) { instOps.put(atcTrain.getOpinInst().get(oId), oId); } atcTrain = new Features(reader, paramFile2, "3"); entdata = atcTrain.loadInstances(true, "attTrain2_data"); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); //entdata.setClassIndex(entdata.attribute("entCat").index()); Attribute insAtt = entdata.attribute("instanceId"); double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins" + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { Instance newIns = new SparseInstance(currentInst); newIns.setDataset(entdata); entdata.add(newIns); newIns.setValue(insAtt, maxInstId + oSubId); newIns.setClassValue(cl); instOps.put((int) maxInstId + oSubId, oId); } // if the are more create new instances else { currentInst.setClassValue(cl); //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId); //reader.addOpinion(op); } oSubId++; } } //finished updating instances data } entdata.setClass(entdata.attribute("attCat")); onevsall = new WekaWrapper(entdata, true); /** * Bigarren sailkatzailea * * */ if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "attCat"); System.out.println("trainATC: one vs all attcat models ready"); } ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat"); insAtt = entdata.attribute("instanceId"); maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues()); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins: " + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold2) { ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } // if the are more create new instances else { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); reader.removeOpinion(oId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } oSubId++; } } //finished updating instances data } } reader.print2Semeval2015format(paramFile + "entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc2 (oneVsAll)"); }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * /* w ww . j a v a 2 s . c om*/ * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public void trainOneVsAll(String modelpath, String prefix) throws Exception { Instances orig = new Instances(traindata); Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); String classAtt = traindata.classAttribute().name(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); System.err.println("trainer onevsall for class " + v + " classifier"); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } // copy instances and set the same class value Instances ovsa = new Instances(orig); //create a new class attribute // // Declare the class attribute along with its values ArrayList<String> classVal = new ArrayList<String>(); classVal.add("dummy"); //needed because of weka's sparse data format problems... classVal.add(v); classVal.add("UNKNOWN"); ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes()); //change all instance labels that have not the current class value to "other" for (int i = 0; i < ovsa.numInstances(); i++) { Instance inst = ovsa.instance(i); String instClass = inst.stringValue(ovsa.attribute(classAtt).index()); if (instClass.equalsIgnoreCase(v)) { inst.setValue(ovsa.attribute(classAtt + "2").index(), v); } else { inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN"); } } //delete the old class attribute and set the new. ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index()); ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index()); ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt); ovsa.setClassIndex(ovsa.attribute(classAtt).index()); //build the classifier, crossvalidate and store the model setTraindata(ovsa); saveModel(modelpath + File.separator + prefix + "_" + v + ".model"); setTestdata(ovsa); testModel(modelpath + File.separator + prefix + "_" + v + ".model"); System.err.println("trained onevsall " + v + " classifier"); } setTraindata(orig); }
From source file:entities.ArffFile.java
/** * Dada una lista de parametros, se ejecuta el filtro de microagregacion. * Todos estos parametros son entrada del usuario. * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada. * @param numCluster//from w ww . j a v a2s . c o m * @param seed * @param maxIterations * @param replaceMissingValues * @param preserveInstancesOrder * @param attributes lista de los atributos que se desean generalizar con cluster */ public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations, boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes) throws Exception { //instancesFilter = new Instances(instances); SimpleKMeans kMeans; kMeans = new SimpleKMeans(); Instances uniqueAttributes; uniqueAttributes = new Instances(instancesFilter); List<String> names = new ArrayList<>(); int i = 0; for (Integer attribute : attributes) { String name = new String(instancesFilter.attribute(attribute).name()); if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString()) throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING"); names.add(name); } while (uniqueAttributes.numAttributes() != attributes.size()) { if (!names.contains(uniqueAttributes.attribute(i).name())) uniqueAttributes.deleteAttributeAt(i); else i++; } try { kMeans.setNumClusters(numCluster); kMeans.setMaxIterations(maxIterations); kMeans.setSeed(seed); kMeans.setDisplayStdDevs(false); kMeans.setDistanceFunction(df); kMeans.setDontReplaceMissingValues(replaceMissingValues); kMeans.setPreserveInstancesOrder(preserveInstancesOrder); kMeans.buildClusterer(uniqueAttributes); //System.out.println(kMeans); for (int j = 0; j < uniqueAttributes.numInstances(); j++) { int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j)); for (int k = 0; k < uniqueAttributes.numAttributes(); k++) { if (uniqueAttributes.attribute(k).isNumeric()) uniqueAttributes.instance(j).setValue(k, Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k))); else uniqueAttributes.instance(j).setValue(k, kMeans.getClusterCentroids().instance(cluster).toString(k)); } } replaceValues(uniqueAttributes, attributes); } catch (Exception ex) { Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex); } //saveToFile("4"); }
From source file:entities.ArffFile.java
/** * Agrega los nuevos valores que se encuentran en uniqueAttribute * A instancesFilter para luego ser exportado en archivo arff * @param uniqueAttribute /*from w w w. jav a 2s.c o m*/ */ public void replaceValues(Instances uniqueAttribute, List<Integer> attributes) { for (int i = 0; i < instancesFilter.numInstances(); i++) { for (int j = 0; j < attributes.size(); j++) { if (instancesFilter.attribute(attributes.get(j)).isNumeric()) instancesFilter.instance(i).setValue(attributes.get(j), Double.parseDouble(uniqueAttribute.instance(i).toString(j))); else instancesFilter.instance(i).setValue(attributes.get(j), uniqueAttribute.instance(i).toString(j)); } } }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * Return max dimensions of subdataset for a PR (total, p, n) * @param originalDataset/*from ww w.j a v a2 s . c o m*/ * @param positiveExamplePercentProportion * @return */ public SubdatasetDimensions calculateSubdatasetDimensionsForProportion(Instances originalDataset, BigDecimal positiveExamplePercentProportion) { // size of subdataset, initialized to original size int total = originalDataset.numInstances(); // number of positive instances int p = 0; // number of negative instances int n = 0; // current PR int pp = 0; // count positives for (int i = 0; i < total; i++) { if (originalDataset.instance(i).stringValue(originalDataset.classIndex()).equals(Settings.buggyLabel)) { p++; } } n = total - p; // finds actual PR pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out.println( "[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] attuale: p=" + p + " n=" + n + " pp = " + pp); // if current PR equals desired one, return current dimensions if (pp == positiveExamplePercentProportion.intValue()) return new SubdatasetDimensions(p, n); // if current PR is greater than the desired one // decrements p until ceiling of current PR is greater than the desired one if (pp > positiveExamplePercentProportion.intValue()) { while (pp > positiveExamplePercentProportion.intValue()) { p--; pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p=" + p + " n=" + n + " pp = " + pp); } // goes back if the previous PR was "nearer" to the desired than the current one if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + 1 + n, p + 1), pp, positiveExamplePercentProportion.intValue())) { p++; pp = calculatePositivePercentCeil(p + n, p); } } // if current PR is less than the desired one // decrements n until ceiling of current PR is less than the desired one if (pp < positiveExamplePercentProportion.intValue()) { while (pp < positiveExamplePercentProportion.intValue()) { n--; pp = calculatePositivePercentCeil(p + n, p); if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] p=" + p + " n=" + n + " pp = " + pp); } // goes back if the previous PR was "nearer" to the desired than the current one if (isPPPNearerThanPPToDesiredPercent(calculatePositivePercentCeil(p + n + 1, p), pp, positiveExamplePercentProportion.intValue())) { n++; pp = calculatePositivePercentCeil(p + n, p); } } if (verbose) System.out .println("[DifficultyResamplingManager , calculateSubdatasetDimensionsForProportion] finale p=" + p + " n=" + n + " pp = " + pp); return new SubdatasetDimensions(p, n); }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * called by generateResampledSubdataset * //from w ww . j a v a 2 s. co m * @param originalDataset * @param subdatasetDimensions * @return */ private Instances generateResampledSubdataset(Instances originalDataset, SubdatasetDimensions subdatasetDimensions) { // creates an empty dataset Instances resampledSubdataset = new Instances(originalDataset); resampledSubdataset.delete(); // randomize dataset instances order originalDataset.randomize(RandomizationManager.randomGenerator); // calc number of positives to insert int positivesToInsert = subdatasetDimensions.getP(); if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = " + positivesToInsert); // calc number of negatives to insert int negativesToInsert = subdatasetDimensions.getN(); // iterates over the original dataset instances for (int i = 0; i < originalDataset.numInstances(); i++) { // if instance is positive and more are needed in the new dataset, inserts into new dataset if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.buggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); positivesToInsert--; } // if instance is negative and more are needed in the new dataset, inserts into new dataset if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); negativesToInsert--; } } if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: " + this.printDatasetInfo(resampledSubdataset)); return resampledSubdataset; }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * prints number of posive and negative instances and respective percentaghes * @param dataset// w w w. j a va 2 s . c om * @return */ public String printDatasetInfo(Instances dataset) { int positives = 0; int negatives = 0; for (int i = 0; i < dataset.numInstances(); i++) { if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.buggyLabel)) { positives++; } if (dataset.instance(i).stringValue(dataset.classIndex()).equals(Settings.nonbuggyLabel)) { negatives++; } } double percent = ((double) positives / (double) dataset.numInstances()) * 100; return new String("totale istanze: " + dataset.numInstances() + ", p+n=" + (positives + negatives) + ", p: " + positives + ", n: " + negatives + ", %p : " + percent); }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * //from ww w .j a va 2 s. co m * Increments fp and fn by specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again. * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param fpPercentage * @param fnPercentage * @return Instances noisyDataset */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal fpPercentage, BigDecimal fnPercentage) { // exits if no noise must be added if (fnPercentage.equals(BigDecimal.ZERO) && fpPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int numOfPositives = 0; int numOfNegatives = 0; for (int j = 0; j < numInstances; j++) { if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { numOfPositives++; } // this is a redundant control, but better safe than sorry else if (origDataset.instance(j).stringValue(origDataset.classIndex()).equals(Settings.nonbuggyLabel)) { numOfNegatives++; } } // calculates the number of false positives to insert int fpToInsert = (int) Math.round(numOfNegatives * fpPercentage.doubleValue() / 100); int fpInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpToInsert= " + fpToInsert + ", totIntances= " + origDataset.numInstances() + " true negatives= " + numOfNegatives + " %fp= " + fpPercentage); // calculates the number of false negatives to insert int fnToInsert = (int) Math.round(numOfPositives * fnPercentage.doubleValue() / 100); int fnInserted = 0; if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] fnToInsert= " + fnToInsert + ", totIntances= " + origDataset.numInstances() + " true positives= " + numOfPositives + " %fn= " + fnPercentage); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn to add and this is a positive instances it turns it into a negative, making it a fn if ((fnInserted < fnToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.buggyLabel))) { origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fnInserted++; if (verbose) System.out.print(" - added FN, added " + fnInserted + " of " + fnToInsert + " "); } // if there are fp to add and this is a negative instances it turns it into a positive, making it a fp else if ((fpInserted < fpToInsert) && (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { origDataset.instance(i).setValue(att, Settings.buggyLabel); fpInserted++; if (verbose) System.out.print(" - added FP, added " + fpInserted + " of " + fpToInsert + " "); } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:entity.NoiseInjectionManager.java
License:Open Source License
/** * Increments fp and fn in combination by a specified percentages. * Randomize order of instances and modifies instances until noise quota is reached. * Than randomized instances again.// w w w. j av a2 s.com * NOTE: It modifies the given dataset, because it is a reference. * * @param origDataset * @param combinedFpFnPercentage * @return noisydata */ public Instances addNoiseToDataset(Instances origDataset, BigDecimal combinedFpFnPercentage) { // exits if no noise must be added if (combinedFpFnPercentage.equals(BigDecimal.ZERO)) { if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] nessun errore da aggiungere"); return origDataset; } // total instances in dataset int numInstances = origDataset.numInstances(); // finds positive (buggy) and negative (non-buggy) instances numbers int fpAndFnToInsert = (int) Math.round(numInstances * combinedFpFnPercentage.doubleValue() / 100); int fpAndFnInserted = 0; if (verbose) System.out.println("\n\n[NoiseManager , addNoiseToDataset] fpAndFnToInsert= " + fpAndFnToInsert + ", totIntances= " + origDataset.numInstances()); if (verbose) System.out.println("[NoiseManager , addNoiseToDataset] buggy label: " + Settings.buggyLabel + " - nonbuggy label: " + Settings.nonbuggyLabel); // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); for (int i = 0; i < origDataset.numInstances(); i++) { if (verbose) System.out.print("\nORIGINAL VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); // gets the classification attribute (it HAS to be the last) Attribute att = origDataset.instance(i).attribute(origDataset.classIndex()); // if there are fn or fp to add if (fpAndFnInserted < fpAndFnToInsert) { // if this is a positive instances it turns it into a negative, making it a fn if (origDataset.instance(i).stringValue(origDataset.classIndex()).equals(Settings.buggyLabel)) { if (verbose) System.out.print(" - added FN, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.nonbuggyLabel); fpAndFnInserted++; } // if this is a negative instances it turns it into a positive, making it a fp else if (origDataset.instance(i).stringValue(origDataset.classIndex()) .equals(Settings.nonbuggyLabel)) { if (verbose) System.out.print(" - added FP, added " + fpAndFnInserted + " of " + fpAndFnToInsert + " "); origDataset.instance(i).setValue(att, Settings.buggyLabel); fpAndFnInserted++; } } if (verbose) System.out.print(" FINAL ELEMENT VALUES: " + origDataset.instance(i).value(origDataset.attribute(origDataset.classIndex())) + " - " + origDataset.instance(i).stringValue(origDataset.classIndex())); } // randomize order of instances origDataset.randomize(RandomizationManager.randomGenerator); return origDataset; }
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * //w w w . j a v a2 s . co m * @param mlData * @return the transformed instances * @throws Exception */ public Instances transformInstances(MultiLabelInstances mlData) throws Exception { data = mlData.getDataSet(); numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); Instances newData = null; // This must be different in order to combine ALL class states, not only existing ones. // gather distinct label combinations // ASSUME CLASSES ARE BINARY ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>(); double[] dblLabels = new double[numLabels]; double nCombinations = Math.pow(2, numLabels); for (int i = 0; i < nCombinations; i++) { for (int l = 0; l < numLabels; l++) { int digit = (int) Math.pow(2, numLabels - 1 - l); dblLabels[l] = (digit & i) / digit; } LabelSet labelSet = new LabelSet(dblLabels); labelSets.add(labelSet); } // for (int i = 0; i < numInstances; i++) { // // construct labelset // double[] dblLabels = new double[numLabels]; // for (int j = 0; j < numLabels; j++) { // int index = labelIndices[j]; // dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); // } // LabelSet labelSet = new LabelSet(dblLabels); // // // add labelset if not already present // labelSets.add(labelSet); // } // create class attribute ArrayList<String> classValues = new ArrayList<String>(labelSets.size()); for (LabelSet subset : labelSets) { classValues.add(subset.toBitString()); } newClass = new Attribute("class", classValues); // for (String s : classValues) // { // System.out.print(s+", "); // // } // System.out.println(); // remove all labels newData = RemoveAllLabels.transformInstances(data, labelIndices); // add new class attribute newData.insertAttributeAt(newClass, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); // add class values for (int i = 0; i < newData.numInstances(); i++) { //System.out.println(newData.instance(i).toString()); String strClass = ""; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index)); } //System.out.println(strClass); newData.instance(i).setClassValue(strClass); } transformedFormat = new Instances(newData, 0); return newData; }