List of usage examples for weka.core Instances toString
@Override
public String toString()
From source file:mao.datamining.DataSetPair.java
private void createSampleDataSets() { try {//from w w w . j av a 2s . c o m //reload the new data from new arff file: Main.OrangeProcessedDSHome+"/afterRemoveRows.arff" Instances newData = ConverterUtils.DataSource .read(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"); newData.setClassIndex(newData.numAttributes() - 1); //create none sample file // Main.logging("== New Data After Doing Nothing, waiting for CostMatrix: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterNoneSampling.arff")))) { writer.write(newData.toString()); } //create under sample file // System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); SpreadSubsample underSampleFilter = new weka.filters.supervised.instance.SpreadSubsample(); underSampleFilter.setInputFormat(newData); String underOptionsClone[] = new String[underSampleFilterOptions.length]; System.arraycopy(underSampleFilterOptions, 0, underOptionsClone, 0, underSampleFilterOptions.length); underSampleFilter.setOptions(underOptionsClone); Instances underNewData = Filter.useFilter(newData, underSampleFilter); // Main.logging("== New Data After Under Sampling: ===\n" + underNewData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterUnderSampling.arff")))) { writer.write(underNewData.toString()); } //create over sample file // System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); //weka.filters.supervised.instance.SMOTE -C 0 -K 5 -P 1000.0 -S 1 smoteOptions SMOTE smote = new weka.filters.supervised.instance.SMOTE(); smote.setInputFormat(newData); String overOptionsClone[] = new String[overSampleSmoteOptions.length]; System.arraycopy(overSampleSmoteOptions, 0, overOptionsClone, 0, overSampleSmoteOptions.length); smote.setOptions(overOptionsClone); Instances overNewData = Filter.useFilter(newData, smote); // Main.logging("== New Data After Over Sampling: ===\n" + overNewData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterOverSampling.arff")))) { writer.write(overNewData.toString()); } } catch (Exception ex) { Logger.getLogger(DataSetPair.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:mao.datamining.DataSetPair.java
/** * Pre-Process the training data set with: * RemoveUselessColumnsByMissingValues filter * SpreadSubsample filter to shrink the majority class instances * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection *//* ww w. j a va2 s . co m*/ private void processTrainRawData() { System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); finalTrainAttrList.clear(); try { doItOnce4All(); String sampleFilePath = null; //step 2, either over sample, or under sample //weka.filters.supervised.instance.SpreadSubsample if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) { System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleOver)) { System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleNone)) { //do nothing, System.out.println("None Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) { //do nothing System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else { doNotSupport(); } Instances newData = ConverterUtils.DataSource.read(sampleFilePath); newData.setClassIndex(newData.numAttributes() - 1); // Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString()); //Step 3, select features AttributeSelection attrSelectionFilter = new AttributeSelection(); ASEvaluation eval = null; ASSearch search = null; //ranker if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) { System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); eval = new weka.attributeSelection.InfoGainAttributeEval(); //weka.attributeSelection.Ranker -T 0.02 -N -1 search = new Ranker(); String rankerOptios[] = { "-T", "0.01", "-N", "-1" }; if (resampleMethod.equalsIgnoreCase(resampleOver)) { rankerOptios[1] = "0.1"; } ((Ranker) search).setOptions(rankerOptios); Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker"); } //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0 else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) { System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); eval = new CfsSubsetEval(); search = new LinearForwardSelection(); String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" }; ((LinearForwardSelection) search).setOptions(linearOptios); Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection"); } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) { System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss"); Main.logging("No Feature Selection Method"); } else { doNotSupport(); } if (eval != null) { attrSelectionFilter.setEvaluator(eval); attrSelectionFilter.setSearch(search); attrSelectionFilter.setInputFormat(newData); newData = Filter.useFilter(newData, attrSelectionFilter); } Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString()); //finally, write the final dataset to file system try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) { writer.write(newData.toString()); } int numAttributes = newData.numAttributes(); for (int i = 0; i < numAttributes; i++) { String attrName = newData.attribute(i).name(); finalTrainAttrList.add(attrName); } Main.logging(finalTrainAttrList.toString()); // //set the final train dataset finalTrainDataSet = newData; finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1); Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString()); } catch (Exception ex) { Main.logging(null, ex); } }
From source file:mao.datamining.DataSetPair.java
/** * To drop the useless columns accordingly on the test dataset, if it exists *//*from ww w .j a v a2 s .c o m*/ private void processTestDataSet() { if (!new File(testSourceFileName).exists()) return; try { Instances orangeTestDataSet = ConverterUtils.DataSource.read(testSourceFileName); Remove remove = new Remove(); StringBuilder indexBuffer = new StringBuilder(); for (String attrName : finalTrainAttrList) { int attrIndex = orangeTestDataSet.attribute(attrName).index(); indexBuffer.append(attrIndex + 1).append(","); } Main.logging("Attribute Indices: \n" + indexBuffer.toString()); remove.setAttributeIndices(indexBuffer.toString()); remove.setInvertSelection(true); remove.setInputFormat(orangeTestDataSet); Instances testNewDataSet = Filter.useFilter(orangeTestDataSet, remove); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(this.testFileName)))) { writer.write(testNewDataSet.toString()); } //set the final test dataset finalTestDataSet = testNewDataSet; finalTestDataSet.setClassIndex(finalTestDataSet.numAttributes() - 1); Main.logging("test dataset class attr: " + finalTestDataSet.classAttribute().toString()); } catch (Exception e) { Main.logging(null, e); } }
From source file:mao.datamining.DataSetPair.java
private static void generateDataSetPairs() { String matrix[][] = {/*w w w .j a va 2 s . c om*/ { resampleNone, featureSelectionNo }, { resampleNone, featureSelectionA }, { resampleNone, featureSelectionB }, { resampleUnder, featureSelectionNo }, { resampleUnder, featureSelectionA }, { resampleUnder, featureSelectionB }, { resampleOver, featureSelectionNo }, { resampleOver, featureSelectionA }, { resampleOver, featureSelectionB }, { resampleMatrix, featureSelectionNo }, { resampleMatrix, featureSelectionA }, { resampleMatrix, featureSelectionB } }; String mergeFilePath = Main.OrangeProcessedDSHome + "/mergedFile.arff"; DataSetPair ds1 = new DataSetPair(); for (String[] row : matrix) { try { ds1.setFeatures(row[0], row[1]); ds1.processTrainRawData(); ds1.processTestDataSet(); Util.DataSetFiles dsFiles = new Util.DataSetFiles(ds1.getTrainFileName(), ds1.getTestFileName(), mergeFilePath); //merge the 2 files Util.mergeTrainTestFiles(dsFiles); Instances mergeData = null; // //numeric to nominal, to be delete ??? // mergeData = ConverterUtils.DataSource.read(mergeFilePath); // Instances transformedDS = Util.transformNum2Nominal(mergeData, columns2Nominal); // try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(mergeFilePath)))) { // writer.write(transformedDS.toString()); // } mergeData = ConverterUtils.DataSource.read(mergeFilePath); //normalize the 2 files together Instances normalizeData = Util.normalizeDS(mergeData); try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(mergeFilePath)))) { writer.write(normalizeData.toString()); } //split them Util.splitTrainTestFiles(dsFiles); } catch (Exception ex) { Logger.getLogger(DataSetPair.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:meddle.TrainModelByDomainOS.java
License:Open Source License
public static Instances populateArff(Info info, Map<String, Integer> wordCount, ArrayList<Map<String, Integer>> trainMatrix, ArrayList<Integer> PIILabels, int numSamples, int theta) { // System.out.println(info); // Mapping feature_name_index Map<String, Integer> fi = new HashMap<String, Integer>(); int index = 0; // Populate Features ArrayList<Attribute> attributes = new ArrayList<Attribute>(); int high_freq = trainMatrix.size(); if (high_freq - theta < 30) theta = 0;//from w w w . j av a 2 s . c om for (Map.Entry<String, Integer> entry : wordCount.entrySet()) { // filter low frequency word String currentWord = entry.getKey(); int currentWordFreq = entry.getValue(); if (currentWordFreq < theta) { if (!SharedMem.wildKeys.get("android").containsKey(currentWord) && !SharedMem.wildKeys.get("ios").containsKey(currentWord) && !SharedMem.wildKeys.get("windows").containsKey(currentWord)) continue; } Attribute attribute = new Attribute(currentWord); attributes.add(attribute); fi.put(currentWord, index); index++; } ArrayList<String> classVals = new ArrayList<String>(); classVals.add("" + LABEL_NEGATIVE); classVals.add("" + LABEL_POSITIVE); attributes.add(new Attribute("PIILabel", classVals)); // Populate Data Points Iterator<Map<String, Integer>> all = trainMatrix.iterator(); int count = 0; Instances trainingInstances = new Instances("Rel", attributes, 0); trainingInstances.setClassIndex(trainingInstances.numAttributes() - 1); while (all.hasNext()) { Map<String, Integer> dataMap = all.next(); double[] instanceValue = new double[attributes.size()]; for (int i = 0; i < attributes.size() - 1; i++) { instanceValue[i] = 0; } int label = PIILabels.get(count); instanceValue[attributes.size() - 1] = label; for (Map.Entry<String, Integer> entry : dataMap.entrySet()) { if (fi.containsKey(entry.getKey())) { int i = fi.get(entry.getKey()); int val = entry.getValue(); instanceValue[i] = val; } } Instance data = new SparseInstance(1.0, instanceValue); trainingInstances.add(data); count++; } // Write into .arff file for persistence try { BufferedWriter bw = new BufferedWriter(new FileWriter(RConfig.arffFolder + info.domainOS + ".arff")); bw.write(trainingInstances.toString()); bw.close(); } catch (IOException e) { e.printStackTrace(); } return trainingInstances; }
From source file:miRdup.WekaModule.java
License:Open Source License
public static void trainModel(File arff, String keyword) { dec.setMaximumFractionDigits(3);/*from w w w . ja v a2 s . c o m*/ System.out.println("\nTraining model on file " + arff); try { // load data DataSource source = new DataSource(arff.toString()); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } PrintWriter pwout = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "Output")); PrintWriter pwroc = new PrintWriter(new FileWriter(keyword + Main.modelExtension + "roc.arff")); //remove ID row Remove rm = new Remove(); rm.setAttributeIndices("1"); FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); // // train model svm // weka.classifiers.functions.LibSVM model = new weka.classifiers.functions.LibSVM(); // model.setOptions(weka.core.Utils.splitOptions("-S 0 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.0010 -P 0.1 -B")); // train model MultilayerPerceptron // weka.classifiers.functions.MultilayerPerceptron model = new weka.classifiers.functions.MultilayerPerceptron(); // model.setOptions(weka.core.Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a")); // train model Adaboost on RIPPER // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.JRip -- -F 10 -N 2.0 -O 5 -S 1")); // train model Adaboost on FURIA // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions(weka.core.Utils.splitOptions("weka.classifiers.meta.AdaBoostM1 -P 100 -S 1 -I 10 -W weka.classifiers.rules.FURIA -- -F 10 -N 2.0 -O 5 -S 1 -p 0 -s 0")); //train model Adaboot on J48 trees // weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); // model.setOptions( // weka.core.Utils.splitOptions( // "-P 100 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -C 0.25 -M 2")); //train model Adaboot on Random Forest trees weka.classifiers.meta.AdaBoostM1 model = new weka.classifiers.meta.AdaBoostM1(); model.setOptions(weka.core.Utils .splitOptions("-P 100 -S 1 -I 10 -W weka.classifiers.trees.RandomForest -- -I 50 -K 0 -S 1")); if (Main.debug) { System.out.print("Model options: " + model.getClass().getName().trim() + " "); } System.out.print(model.getClass() + " "); for (String s : model.getOptions()) { System.out.print(s + " "); } pwout.print("Model options: " + model.getClass().getName().trim() + " "); for (String s : model.getOptions()) { pwout.print(s + " "); } //build model // model.buildClassifier(data); fc.setClassifier(model); fc.buildClassifier(data); // cross validation 10 times on the model Evaluation eval = new Evaluation(data); //eval.crossValidateModel(model, data, 10, new Random(1)); StringBuffer sb = new StringBuffer(); eval.crossValidateModel(fc, data, 10, new Random(1), sb, new Range("first,last"), false); //System.out.println(sb); pwout.println(sb); pwout.flush(); // output pwout.println("\n" + eval.toSummaryString()); System.out.println(eval.toSummaryString()); pwout.println(eval.toClassDetailsString()); System.out.println(eval.toClassDetailsString()); //calculate importants values String ev[] = eval.toClassDetailsString().split("\n"); String ptmp[] = ev[3].trim().split(" "); String ntmp[] = ev[4].trim().split(" "); String avgtmp[] = ev[5].trim().split(" "); ArrayList<String> p = new ArrayList<String>(); ArrayList<String> n = new ArrayList<String>(); ArrayList<String> avg = new ArrayList<String>(); for (String s : ptmp) { if (!s.trim().isEmpty()) { p.add(s); } } for (String s : ntmp) { if (!s.trim().isEmpty()) { n.add(s); } } for (String s : avgtmp) { if (!s.trim().isEmpty()) { avg.add(s); } } double tp = Double.parseDouble(p.get(0)); double fp = Double.parseDouble(p.get(1)); double tn = Double.parseDouble(n.get(0)); double fn = Double.parseDouble(n.get(1)); double auc = Double.parseDouble(avg.get(7)); pwout.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn); System.out.println("\nTP=" + tp + "\nFP=" + fp + "\nTN=" + tn + "\nFN=" + fn); //specificity, sensitivity, Mathew's correlation, Prediction accuracy double sp = ((tn) / (tn + fp)); double se = ((tp) / (tp + fn)); double acc = ((tp + tn) / (tp + tn + fp + fn)); double mcc = ((tp * tn) - (fp * fn)) / Math.sqrt((tp + fp) * (tn + fn) * (tp + fn) * tn + fp); String output = "\nse=" + dec.format(se).replace(",", ".") + "\nsp=" + dec.format(sp).replace(",", ".") + "\nACC=" + dec.format(acc).replace(",", ".") + "\nMCC=" + dec.format(mcc).replace(",", ".") + "\nAUC=" + dec.format(auc).replace(",", "."); pwout.println(output); System.out.println(output); pwout.println(eval.toMatrixString()); System.out.println(eval.toMatrixString()); pwout.flush(); pwout.close(); //Saving model System.out.println("Model saved: " + keyword + Main.modelExtension); weka.core.SerializationHelper.write(keyword + Main.modelExtension, fc.getClassifier() /*model*/); // get curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); pwroc.print(result.toString()); pwroc.flush(); pwroc.close(); // draw curve //rocCurve(eval); } catch (Exception e) { e.printStackTrace(); } }
From source file:miRdup.WekaModule.java
License:Open Source License
public static void rocCurve(Evaluation eval) { try {/* ww w . j a v a 2 s.c om*/ // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); result.toString(); // plot curve ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")"); vmc.setName(result.relationName()); PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.addInstanceNumberAttribute(); // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) { cp[n] = true; } tempd.setConnectPoints(cp); // add plot vmc.addPlot(tempd); // result.toString(); // display curve String plotName = vmc.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Classifier Visualize: " + plotName); jf.setSize(500, 400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vmc, BorderLayout.CENTER); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); System.out.println(""); } catch (Exception e) { e.printStackTrace(); } }
From source file:moa.tud.ke.patching.AdaptivePatchingAdwin.java
static void writeArff(String filename, Instances data) { try {/*from www .j av a 2s .c o m*/ BufferedWriter writer = new BufferedWriter(new FileWriter(filename)); writer.write(data.toString()); writer.flush(); writer.close(); } catch (Exception e) { System.err.println("Error writing arff file."); } }
From source file:model.clustering.Clustering.java
public String filledFile(Instances data, int numOfClusters, String remove) throws Exception { String mainData = data.toString(); int index = mainData.indexOf("@data"); String clusterData = mainData.substring(0, index + 6); Remove removeFilter = new Remove(); removeFilter.setAttributeIndices(remove); kMeansCLusterer = new SimpleKMeans(); kMeansCLusterer.setNumClusters(numOfClusters); FilteredClusterer filteredClusterer = new FilteredClusterer(); filteredClusterer.setClusterer(kMeansCLusterer); filteredClusterer.setFilter(removeFilter); filteredClusterer.buildClusterer(data); Enumeration<Instance> newData = data.enumerateInstances(); eval = new ClusterEvaluation(); eval.setClusterer(filteredClusterer); eval.evaluateClusterer(data);/*ww w . j av a2s . c om*/ while (newData.hasMoreElements()) { Instance i = (Instance) newData.nextElement(); int kluster = filteredClusterer.clusterInstance(i); String instanceString = i.toString() + "," + kluster; clusterData = clusterData + instanceString + "\n"; } return clusterData; }
From source file:mulan.data.ConverterCLUS.java
License:Open Source License
/** * Converts the original dataset to mulan compatible dataset. * * @param sourceFilename the source file name * @param arffFilename the converted arff name * @param xmlFilename the xml name/* w w w . j av a 2 s . co m*/ * @throws java.lang.Exception */ public static void convert(String sourceFilename, String arffFilename, String xmlFilename) throws Exception { String line; try { BufferedReader brInput = new BufferedReader(new FileReader(sourceFilename)); String relationName = null; ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); Instances data = null; int numAttributes = 0; String[] labelNames = null; while ((line = brInput.readLine()) != null) { if (line.startsWith("@RELATION")) { relationName = line.replace("@RELATION ", "").replaceAll("'", "").trim(); continue; } if (line.startsWith("@ATTRIBUTE ")) { String tokens[] = line.split("\\s+"); Attribute att; if (line.startsWith("@ATTRIBUTE class")) { labelNames = tokens[3].split(","); for (int i = 0; i < labelNames.length; i++) { ArrayList<String> labelValues = new ArrayList<String>(); labelValues.add("0"); labelValues.add("1"); att = new Attribute(labelNames[i], labelValues); attInfo.add(att); } } else { numAttributes++; if (tokens[2].equals("numeric")) { att = new Attribute(tokens[1]); } else { ArrayList<String> nominalValues = new ArrayList<String>(); tokens[2].substring(1, tokens[2].length() - 1); String[] nominalTokens = tokens[2].substring(1, tokens[2].length() - 1).split(","); for (int i = 0; i < nominalTokens.length; i++) { nominalValues.add(nominalTokens[i]); } att = new Attribute(tokens[1], nominalValues); } attInfo.add(att); } continue; } if (line.toLowerCase().startsWith("@data")) { data = new Instances(relationName, attInfo, 0); while ((line = brInput.readLine()) != null) { // fill data String[] tokens = line.split(","); double[] values = new double[attInfo.size()]; for (int i = 0; i < numAttributes; i++) { Attribute att = (Attribute) attInfo.get(i); if (att.isNumeric()) { values[i] = Double.parseDouble(tokens[i]); } else { values[i] = att.indexOfValue(tokens[i]); } } String[] labels = tokens[numAttributes].split("@"); // fill class values for (int j = 0; j < labels.length; j++) { String[] splitedLabels = labels[j].split("/"); String attrName = splitedLabels[0]; Attribute att = data.attribute(attrName); values[attInfo.indexOf(att)] = 1; for (int k = 1; k < splitedLabels.length; k++) { attrName = attrName + "/" + splitedLabels[k]; att = data.attribute(attrName); values[attInfo.indexOf(att)] = 1; } } Instance instance = new DenseInstance(1, values); data.add(instance); } } } BufferedWriter writer; writer = new BufferedWriter(new FileWriter(arffFilename)); writer.write(data.toString()); writer.close(); // write xml file writer = new BufferedWriter(new FileWriter(xmlFilename)); writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"); writer.write("<labels xmlns=\"http://mulan.sourceforge.net/labels\">\n"); writer.write("<label name=\"" + labelNames[0] + "\">"); int depth = 0; for (int i = 1; i < labelNames.length; i++) { int difSlashes = countSlashes(labelNames[i]) - countSlashes(labelNames[i - 1]); // child if (difSlashes == 1) { depth++; writer.write("\n"); for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } // sibling if (difSlashes == 0) { writer.write("</label>\n"); for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } // ancestor if (difSlashes < 0) { writer.write("</label>\n"); for (int j = 0; j < Math.abs(difSlashes); j++) { depth--; for (int k = 0; k < depth; k++) { writer.write("\t"); } writer.write("</label>\n"); } for (int j = 0; j < depth; j++) { writer.write("\t"); } writer.write("<label name=\"" + labelNames[i] + "\">"); } } writer.write("</label>\n"); while (depth > 0) { for (int k = 0; k < depth; k++) { writer.write("\t"); } writer.write("</label>\n"); depth--; } writer.write("</labels>"); writer.close(); } catch (IOException ioEx) { ioEx.printStackTrace(); } }