List of usage examples for weka.core Instances deleteAttributeAt
public void deleteAttributeAt(int position)
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. */// w ww. j ava 2 s. c o m public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier * (E and A) for E#A aspect categories//from w ww . ja va2 s .com * @throws Exception */ public final void trainATC2(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String testFile = parsedArguments.getString("testset"); String paramFile2 = parsedArguments.getString("params2"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); boolean onlyTest = parsedArguments.getBoolean("testOnly"); double threshold = 0.5; double threshold2 = 0.5; String modelsPath = "/home/inaki/elixa-atp/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); if (onlyTest) { if (FileUtilsElh.checkFile(testFile)) { System.err.println("read from test file"); reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat, nullSentenceOpinions, lang); atcTrain.setCorpus(reader); traindata = atcTrain.loadInstances(true, "atc"); } } //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); //WekaWrapper classifyAtts; WekaWrapper onevsall; try { //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //onevsall Instances entdata = new Instances(traindata); entdata.deleteAttributeAt(entdata.attribute("attCat").index()); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); entdata.setClassIndex(entdata.attribute("entCat").index()); onevsall = new WekaWrapper(entdata, true); if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all models ready"); } onevsall.setTestdata(entdata); HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all predictions ready"); HashMap<Integer, String> instOps = new HashMap<Integer, String>(); for (String oId : atcTrain.getOpinInst().keySet()) { instOps.put(atcTrain.getOpinInst().get(oId), oId); } atcTrain = new Features(reader, paramFile2, "3"); entdata = atcTrain.loadInstances(true, "attTrain2_data"); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); //entdata.setClassIndex(entdata.attribute("entCat").index()); Attribute insAtt = entdata.attribute("instanceId"); double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins" + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { Instance newIns = new SparseInstance(currentInst); newIns.setDataset(entdata); entdata.add(newIns); newIns.setValue(insAtt, maxInstId + oSubId); newIns.setClassValue(cl); instOps.put((int) maxInstId + oSubId, oId); } // if the are more create new instances else { currentInst.setClassValue(cl); //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId); //reader.addOpinion(op); } oSubId++; } } //finished updating instances data } entdata.setClass(entdata.attribute("attCat")); onevsall = new WekaWrapper(entdata, true); /** * Bigarren sailkatzailea * * */ if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "attCat"); System.out.println("trainATC: one vs all attcat models ready"); } ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat"); insAtt = entdata.attribute("instanceId"); maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues()); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins: " + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold2) { ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } // if the are more create new instances else { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); reader.removeOpinion(oId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } oSubId++; } } //finished updating instances data } } reader.print2Semeval2015format(paramFile + "entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc2 (oneVsAll)"); }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * train ATC using a single classifier (one vs. all) for E#A aspect categories. * //from w ww. j a v a2s .c o m * @param inputStream * @throws IOException */ public final void trainATCsingleCategory(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String testFile = parsedArguments.getString("testset"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); boolean onlyTest = parsedArguments.getBoolean("testOnly"); double threshold = 0.5; String modelsPath = "/home/inaki/Proiektuak/BOM/SEMEVAL2015/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); if (onlyTest) { if (FileUtilsElh.checkFile(testFile)) { System.err.println("read from test file"); reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat, nullSentenceOpinions, lang); atcTrain.setCorpus(reader); traindata = atcTrain.loadInstances(true, "atc"); } } //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); //WekaWrapper classifyEnts; //WekaWrapper classifyAtts; WekaWrapper onevsall; try { //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //onevsall //Instances entdata = new Instances(traindata); traindata.deleteAttributeAt(traindata.attribute("attCat").index()); traindata.deleteAttributeAt(traindata.attribute("entCat").index()); traindata.setClassIndex(traindata.attribute("entAttCat").index()); onevsall = new WekaWrapper(traindata, true); if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "entAttCat"); System.out.println("trainATC: one vs all models ready"); } onevsall.setTestdata(traindata); HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat"); System.out.println("trainATC: one vs all predictions ready"); HashMap<Integer, String> kk = new HashMap<Integer, String>(); for (String oId : atcTrain.getOpinInst().keySet()) { kk.put(atcTrain.getOpinInst().get(oId), oId); } Object[] ll = ovsaRes.get(1).keySet().toArray(); for (Object l : ll) { System.err.print((String) l + " - "); } System.err.print("\n"); for (int i : ovsaRes.keySet()) { //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(kk.get(i)).getsId(); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); oSubId++; //create and add opinion to the structure //trgt, offsetFrom, offsetTo, polarity, cat, sId); Opinion op = new Opinion(kk.get(i) + "_" + oSubId, "", 0, 0, "", cl, sId); reader.addOpinion(op); } } } reader.print2Semeval2015format(paramFile + "entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc2 (oneVsAll)"); }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * /*from w w w . j ava 2s .com*/ * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public void trainOneVsAll(String modelpath, String prefix) throws Exception { Instances orig = new Instances(traindata); Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); String classAtt = traindata.classAttribute().name(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); System.err.println("trainer onevsall for class " + v + " classifier"); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } // copy instances and set the same class value Instances ovsa = new Instances(orig); //create a new class attribute // // Declare the class attribute along with its values ArrayList<String> classVal = new ArrayList<String>(); classVal.add("dummy"); //needed because of weka's sparse data format problems... classVal.add(v); classVal.add("UNKNOWN"); ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes()); //change all instance labels that have not the current class value to "other" for (int i = 0; i < ovsa.numInstances(); i++) { Instance inst = ovsa.instance(i); String instClass = inst.stringValue(ovsa.attribute(classAtt).index()); if (instClass.equalsIgnoreCase(v)) { inst.setValue(ovsa.attribute(classAtt + "2").index(), v); } else { inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN"); } } //delete the old class attribute and set the new. ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index()); ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index()); ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt); ovsa.setClassIndex(ovsa.attribute(classAtt).index()); //build the classifier, crossvalidate and store the model setTraindata(ovsa); saveModel(modelpath + File.separator + prefix + "_" + v + ".model"); setTestdata(ovsa); testModel(modelpath + File.separator + prefix + "_" + v + ".model"); System.err.println("trained onevsall " + v + " classifier"); } setTraindata(orig); }
From source file:entities.ArffFile.java
/** * Dada una lista de parametros, se ejecuta el filtro de microagregacion. * Todos estos parametros son entrada del usuario. * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada. * @param numCluster/*from w ww. j a v a 2 s.c om*/ * @param seed * @param maxIterations * @param replaceMissingValues * @param preserveInstancesOrder * @param attributes lista de los atributos que se desean generalizar con cluster */ public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations, boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes) throws Exception { //instancesFilter = new Instances(instances); SimpleKMeans kMeans; kMeans = new SimpleKMeans(); Instances uniqueAttributes; uniqueAttributes = new Instances(instancesFilter); List<String> names = new ArrayList<>(); int i = 0; for (Integer attribute : attributes) { String name = new String(instancesFilter.attribute(attribute).name()); if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString()) throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING"); names.add(name); } while (uniqueAttributes.numAttributes() != attributes.size()) { if (!names.contains(uniqueAttributes.attribute(i).name())) uniqueAttributes.deleteAttributeAt(i); else i++; } try { kMeans.setNumClusters(numCluster); kMeans.setMaxIterations(maxIterations); kMeans.setSeed(seed); kMeans.setDisplayStdDevs(false); kMeans.setDistanceFunction(df); kMeans.setDontReplaceMissingValues(replaceMissingValues); kMeans.setPreserveInstancesOrder(preserveInstancesOrder); kMeans.buildClusterer(uniqueAttributes); //System.out.println(kMeans); for (int j = 0; j < uniqueAttributes.numInstances(); j++) { int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j)); for (int k = 0; k < uniqueAttributes.numAttributes(); k++) { if (uniqueAttributes.attribute(k).isNumeric()) uniqueAttributes.instance(j).setValue(k, Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k))); else uniqueAttributes.instance(j).setValue(k, kMeans.getClusterCentroids().instance(cluster).toString(k)); } } replaceValues(uniqueAttributes, attributes); } catch (Exception ex) { Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex); } //saveToFile("4"); }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java
License:Apache License
public static DataSet processDataSet(DataSet ds, VariableParser parser) { String independent = ds.getIndependent(); if (independent == null) throw new WekaWrapperException("Independent variable is not set in dataset."); HashMap<String, String> expression_list = parser.getNewMetrics(); Instances data = convertDataSetToInstances(ds); try {// w w w.java2 s. com // Apply filters for all the new variables for (Map.Entry<String, String> entry : expression_list.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); logger.debug("Generating new variable " + key + " as " + value); AddExpression add_filter = new AddExpression(); add_filter.setName(key); add_filter.setExpression(value); add_filter.setInputFormat(data); data = useFilter(data, add_filter); } } catch (Exception e) { logger.error("Error while processing new variables", e); throw new WekaWrapperException("Error while processing new variables"); } // Iterate over all the columns and keep only the ones contained in variables list List<String> variables = parser.getColumns(); // Append independent variable to the list of variables to keep variables.add(independent); // Remove unneeded attributes try { // it's important to iterate from last to first, because when we remove // an instance, the rest shifts by one position. for (int i = data.numAttributes() - 1; i >= 0; i--) { String n = data.attribute(i).name(); if (!variables.contains(data.attribute(i).name())) { logger.trace("Deleting unnecessary attribute " + data.attribute(i).name()); data.deleteAttributeAt(i); } } data.toString(); } catch (Exception e) { logger.error("Error while removing unneeded variables", e); throw new WekaWrapperException("Error while removing unneeded variables"); } // Convert Instances in csv and return the new DataSet String new_path = CoreConfiguration.getNewCSVFileName(); try { CSVSaver saver = new CSVSaver(); saver.setInstances(data); saver.setFile(new File(new_path)); saver.writeBatch(); } catch (Exception e) { logger.error("Error while removing unneeded variables", e); throw new WekaWrapperException("Error while removing unneeded variables"); } DataSet ret = new DataSet(new_path); ret.setIndependent(independent); return ret; }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public void buildRanker(Instances data) throws Exception { m_Classifiers = new ArrayList<weka.classifiers.AbstractClassifier>(); m_AlgoPairs = new ArrayList<String>(); m_NumLabels = Tools.getNumberTargets(data); // build pb datasets for (int a = 0; a < m_NumLabels; a++) { for (int b = 0; b < m_NumLabels; b++) { String pairStr = a + "|" + b; if (!hasPair(m_AlgoPairs, pairStr) && a != b) { m_AlgoPairs.add(pairStr); Instances d = new Instances(data); d.setClassIndex(-1);//from ww w . j ava2 s . co m d.deleteAttributeAt(d.numAttributes() - 1); weka.filters.unsupervised.attribute.Add add = new weka.filters.unsupervised.attribute.Add(); add.setInputFormat(d); add.setOptions(weka.core.Utils .splitOptions("-T NOM -N class -L " + ((int) a) + "," + ((int) b) + " -C last")); d = Filter.useFilter(d, add); d.setClassIndex(d.numAttributes() - 1); for (int i = 0; i < d.numInstances(); i++) { Instance metaInst = (Instance) data.instance(i); Instance inst = d.instance(i); double[] rankVector = Tools.getTargetVector(metaInst); double rank_a = rankVector[a]; double rank_b = rankVector[b]; if (rank_a < rank_b) { inst.setClassValue(0.0); } else { inst.setClassValue(1.0); } } //weka.classifiers.functions.SMO cls = new weka.classifiers.functions.SMO(); //String ops = "weka.classifiers.functions.SMO -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 0.01\""; //cls.setOptions(weka.core.Utils.splitOptions(ops)); //cls.buildClassifier(d); //weka.classifiers.functions.Logistic cls = new weka.classifiers.functions.Logistic(); //weka.classifiers.trees.J48 cls = new weka.classifiers.trees.J48(); //weka.classifiers.rules.ZeroR cls = new weka.classifiers.rules.ZeroR(); weka.classifiers.trees.DecisionStump cls = new weka.classifiers.trees.DecisionStump(); cls.buildClassifier(d); m_Classifiers.add(cls); m_BaseClassifierName = cls.getClass().getSimpleName(); m_Add = add; } } } }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
@Override public double[] recommendRanking(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);//from w w w .j av a 2 s . c o m tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < predRanking.length; i++) { predRanking[i] = m_NumLabels - 1; } for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= 1; } predRanking = Tools.doubleArrayToRanking(predRanking); return predRanking; }
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
public double[] recommendRanking2(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);/*from w w w . jav a 2 s . co m*/ tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0]; String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= predProb; } return Tools.doubleArrayToRanking(predRanking); }
From source file:lu.lippmann.cdb.datasetview.tabs.StatsTabView.java
License:Open Source License
public static Instances buildStatsForNumericalAttributes(final Instances dataset) throws Exception { final StringBuilder sb = new StringBuilder("@relation blabla\n"); sb.append("@attribute 'name' string\n"); sb.append("@attribute 'min' string\n"); sb.append("@attribute 'max' string\n"); sb.append("@attribute 'mean' string\n"); sb.append("@attribute 'stdDev' string\n"); sb.append("@attribute 'missing values count' string\n"); sb.append("@attribute 'missing values %' string\n"); sb.append("@attribute 'values repartition' string\n"); sb.append("@data\n"); for (int i = 0; i < dataset.numAttributes(); i++) { if (dataset.attribute(i).isNumeric() && !dataset.attribute(i).isDate()) { sb.append(dataset.attribute(i).name()).append(",") .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.min)) .append(",") .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.max)) .append(",") .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.mean)) .append(",") .append(FormatterUtil.DECIMAL_FORMAT.format(dataset.attributeStats(i).numericStats.stdDev)) .append(",").append(dataset.attributeStats(i).missingCount).append(",") .append(FormatterUtil.DECIMAL_FORMAT .format(100d * dataset.attributeStats(i).missingCount / dataset.numInstances())) .append(",").append("''").append("\n"); } else if (dataset.attribute(i).isNominal()) { sb.append(dataset.attribute(i).name()).append(",'','','','','','','"); final Map<Object, String> nominalRep = WekaDataStatsUtil .getNominalRepartitionForDescription(dataset, i); for (Map.Entry<Object, String> e : nominalRep.entrySet()) { sb.append(e.getKey()).append("=").append(e.getValue()).append(" "); }/* w ww . j a v a 2 s. com*/ sb.append("'\n"); } } final Instances newds = WekaDataAccessUtil.loadInstancesFromARFFString(sb.toString(), false, false); if (WekaDataStatsUtil.getNominalAttributesIndexes(dataset).length == 0) { newds.deleteAttributeAt(newds.numAttributes() - 1); } return newds; }