List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:id3classifier.ID3Classifiers.java
@Override public void buildClassifier(Instances instances) throws Exception { // create list of instances of size instances' number of instances // create list of attributes of size instances' number of attributes List<Instance> instanceList = new ArrayList<>(instances.numInstances()); List<Attribute> attributeList = new ArrayList<>(instances.numAttributes()); // from index 0 to instances' number of instances, add instances' current // instance to the list of instances... mouthfull for (int i = 0; i < instances.numInstances(); i++) { instanceList.add(instances.instance(i)); }//from w w w .java2 s . c o m // from index 0 to instances' number of attributes, if the index is not // equal to instances' class index... for (int i = 0; i < instances.numAttributes(); i++) { if (i != instances.classIndex()) { // add instances' current attribute to the attribute list attributeList.add(instances.attribute(i)); } } // set tree equal to the tree built by buildTree() using the instance // list and the attribute list tree = buildTree(instanceList, attributeList); }
From source file:id3j48.WekaAccess.java
public static Instances readArff(String filename) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(datasetFolder + File.separator + filename); Instances data = source.getDataSet(); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); return data;//from w w w.j a v a2 s . com }
From source file:id3j48.WekaAccess.java
public static Instances readCsv(String filename) throws Exception { CSVLoader csvLoader = new CSVLoader(); csvLoader.setSource(new File(datasetFolder + File.separator + filename)); Instances data = csvLoader.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); }/* w w w .j a v a2 s. co m*/ return data; }
From source file:imba.classifier.NBTubes.java
@Override public void buildClassifier(Instances data) { dataClassifier = new ArrayList<>(); infoClassifier = new ArrayList<>(); validAttribute = new ArrayList<>(); dataset = null;// www . java2 s . c om sumClass = null; dataSize = 0; header_Instances = data; Filter f; int i, j, k, l, m; int sumVal; int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1 i = 0; while (i < numAttr && wasNumeric == false) { if (i == classIdx) { i++; } if (i != numAttr && data.attribute(i).isNumeric()) { wasNumeric = true; } i++; } Instance p; //kasih filter if (wasNumeric) { f = new Normalize(); //Filter f = new NumericToNominal(); try { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } } //f = new NumericToNominal(); if (filter.equals("Discretize")) { f = new Discretize(); } else { f = new NumericToNominal(); } try { if (wasNumeric) { f.setInputFormat(dataset); for (Instance i1 : dataset) { f.input(i1); } } else { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = null; dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } //building data structure classIdx = data.classIndex(); dataSize = data.size(); //isi data dan info classifier dengan array kosong i = 0; j = i; while (j < numAttr) { if (i == classIdx) { i++; } else { dataClassifier.add(new ArrayList<>()); infoClassifier.add(new ArrayList<>()); if (j < i) { m = j - 1; } else { m = j; } k = 0; while (k < dataset.attribute(j).numValues()) { dataClassifier.get(m).add(new ArrayList<>()); infoClassifier.get(m).add(new ArrayList<>()); l = 0; while (l < dataset.attribute(classIdx).numValues()) { dataClassifier.get(m).get(k).add(0); infoClassifier.get(m).get(k).add(0.0); l++; } k++; } } i++; j++; } //isi data classifier dari dataset sumClass = new int[data.numClasses()]; i = 0; while (i < dataset.size()) { j = 0; k = j; while (k < dataset.numAttributes()) { if (j == classIdx) { j++; } else { if (k < j) { m = k - 1; } else { m = k; } dataClassifier.get(m).get((int) dataset.get(i).value(k)).set( (int) dataset.get(i).value(classIdx), dataClassifier.get(m).get((int) dataset.get(i).value(k)) .get((int) dataset.get(i).value(classIdx)) + 1); if (m == 0) { sumClass[(int) dataset.get(i).value(classIdx)]++; } } k++; j++; } i++; } //proses double values i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { k = 0; while (k < dataClassifier.get(i).get(j).size()) { infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]); k++; } j++; } i++; } /* //liat apakah ada nilai di tiap atribut //yang merepresentasikan lebih dari 80% data i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { j++; } i++; } */ }
From source file:irisdata.IrisData.java
/** * @param args the command line arguments * @throws java.lang.Exception //from w w w. j ava2 s.co m */ public static void main(String[] args) throws Exception { String file = "/Users/paul/Desktop/BYU-Idaho/Spring2015/CS450/iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(1)); // set training set to 70% RemovePercentage remove = new RemovePercentage(); remove.setPercentage(30); remove.setInputFormat(data); Instances trainingSet = Filter.useFilter(data, remove); // set the rest for the testing set remove.setInvertSelection(true); Instances testSet = Filter.useFilter(data, remove); // train classifier - kind of HardCodedClassifier classifier = new HardCodedClassifier(); classifier.buildClassifier(trainingSet); // this does nothing right now // Evaluate classifier Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(classifier, testSet); //eval.crossValidateModel(classifier, data, 10, new Random(1)); // Print some statistics System.out.println("Results: " + eval.toSummaryString()); }
From source file:irisdriver.IrisDriver.java
/** * @param args the command line arguments *///from w w w . ja va 2s.c o m public static void main(String[] args) { //As an example of arguments: sepallength=5.1 sepalwidth=3.5 petallength=1.4 petalwidth=0.2 try { Hashtable<String, String> values = new Hashtable<String, String>(); /*Iris irisModel = new Iris(); for(int i = 0; i < args.length; i++) { String[] tokens = args[i].split("="); values.put(tokens[0], tokens[1]); } System.out.println("Classification: " + irisModel.classifySpecies(values));*/ //Loading the model String pathModel = ""; String pathTestSet = ""; JFileChooser chooserModel = new JFileChooser(); chooserModel.setCurrentDirectory(new java.io.File(".")); chooserModel.setDialogTitle("Choose the model"); chooserModel.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserModel.setAcceptAllFileFilterUsed(true); if (chooserModel.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathModel = chooserModel.getSelectedFile(); pathModel = filePathModel.getPath(); Iris irisModel = new Iris(pathModel); //Loading the model JFileChooser chooserTestSet = new JFileChooser(); chooserTestSet.setDialogTitle("Choose TEST SET"); chooserTestSet.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); chooserTestSet.setAcceptAllFileFilterUsed(true); //Loading the testing dataset if (chooserTestSet.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) { File filePathTestSet = chooserTestSet.getSelectedFile(); pathTestSet = filePathTestSet.getPath(); //WRITTING THE OUTPUT: BufferedWriter writer = new BufferedWriter(new FileWriter("D:\\output_file.txt")); //Transforming the data set into pairs attribute-value ConverterUtils.DataSource unlabeledSource = new ConverterUtils.DataSource(pathTestSet); Instances unlabeledData = unlabeledSource.getDataSet(); if (unlabeledData.classIndex() == -1) { unlabeledData.setClassIndex(unlabeledData.numAttributes() - 1); } for (int i = 0; i < unlabeledData.numInstances(); i++) { Instance ins = unlabeledData.instance(i); //ins.numAttributes()-1 --> not to include the label for (int j = 0; j < ins.numAttributes() - 1; j++) { String attrib = ins.attribute(j).name(); double val = ins.value(ins.attribute(j)); values.put(attrib, String.valueOf(val)); } String predictedLabel = irisModel.classifySpecies(values); System.out.println("Classification: " + predictedLabel); values.clear(); //Writting the results in a txt writer.write("The label is: " + predictedLabel); //writer.newLine(); //writers.write("The error rate of the prediction is : " + eval.errorRate()); //writer.newLine(); } writer.flush(); writer.close(); } } } catch (Exception ex) { Logger.getLogger(IrisDriver.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:j48.C45PruneableClassifierTreeG.java
License:Open Source License
/** * finds new nodes that improve accuracy and grafts them onto the tree * * @param fulldata the instances in whole trainset * @param iindex records num tests each instance has failed up to this node * @param limits the upper/lower limits for numeric attributes * @param parent the node immediately before the current one * @param pLaplace laplace for leaf, calculated by parent (in case leaf empty) * @param pLeafClass class of leaf, determined by parent (in case leaf empty) */// w ww.j ava2 s.c om private void findGraft(Instances fulldata, double[][] iindex, double[][] limits, ClassifierTree parent, double pLaplace, int pLeafClass) throws Exception { // get the class for this leaf int leafClass = (m_isEmpty) ? pLeafClass : localModel().distribution().maxClass(); // get the laplace value for this leaf double leafLaplace = (m_isEmpty) ? pLaplace : laplaceLeaf(leafClass); // sort the instances into those at the leaf, those in atbop, and discarded Instances l = new Instances(fulldata, fulldata.numInstances()); Instances n = new Instances(fulldata, fulldata.numInstances()); int lcount = 0; int acount = 0; for (int x = 0; x < fulldata.numInstances(); x++) { if (iindex[0][x] <= 0 && iindex[1][x] <= 0) continue; if (iindex[0][x] != 0) { l.add(fulldata.instance(x)); l.instance(lcount).setWeight(iindex[0][x]); // move instance's weight in iindex to same index as in l iindex[0][lcount++] = iindex[0][x]; } if (iindex[1][x] > 0) { n.add(fulldata.instance(x)); n.instance(acount).setWeight(iindex[1][x]); // move instance's weight in iindex to same index as in n iindex[1][acount++] = iindex[1][x]; } } boolean graftPossible = false; double[] classDist = new double[n.numClasses()]; for (int x = 0; x < n.numInstances(); x++) { if (iindex[1][x] > 0 && !n.instance(x).classIsMissing()) classDist[(int) n.instance(x).classValue()] += iindex[1][x]; } for (int cVal = 0; cVal < n.numClasses(); cVal++) { double theLaplace = (classDist[cVal] + 1.0) / (classDist[cVal] + 2.0); if (cVal != leafClass && (theLaplace > leafLaplace) && (biprob(classDist[cVal], classDist[cVal], leafLaplace) > m_BiProbCrit)) { graftPossible = true; break; } } if (!graftPossible) { return; } // 1. Initialize to {} a set of tuples t containing potential tests ArrayList t = new ArrayList(); // go through each attribute for (int a = 0; a < n.numAttributes(); a++) { if (a == n.classIndex()) continue; // skip the class // sort instances in atbop by $a int[] sorted = sortByAttribute(n, a); // 2. For each continuous attribute $a: if (n.attribute(a).isNumeric()) { // find min and max values for this attribute at the leaf boolean prohibited = false; double minLeaf = Double.POSITIVE_INFINITY; double maxLeaf = Double.NEGATIVE_INFINITY; for (int i = 0; i < l.numInstances(); i++) { if (l.instance(i).isMissing(a)) { if (l.instance(i).classValue() == leafClass) { prohibited = true; break; } } double value = l.instance(i).value(a); if (!m_relabel || l.instance(i).classValue() == leafClass) { if (value < minLeaf) minLeaf = value; if (value > maxLeaf) maxLeaf = value; } } if (prohibited) { continue; } // (a) find values of // $n: instances in atbop (already have that, actually) // $v: a value for $a that exists for a case in the atbop, where // $v is < the min value for $a for a case at the leaf which // has the class $c, and $v is > the lowerlimit of $a at // the leaf. // (note: error in original paper stated that $v must be // smaller OR EQUAL TO the min value). // $k: $k is a class // that maximize L' = Laplace({$x: $x contained in cases($n) // & value($a,$x) <= $v & value($a,$x) > lowerlim($l,$a)}, $k). double minBestClass = Double.NaN; double minBestLaplace = leafLaplace; double minBestVal = Double.NaN; double minBestPos = Double.NaN; double minBestTotal = Double.NaN; double[][] minBestCounts = null; double[][] counts = new double[2][n.numClasses()]; for (int x = 0; x < n.numInstances(); x++) { if (n.instance(sorted[x]).isMissing(a)) break; // missing are sorted to end: no more valid vals double theval = n.instance(sorted[x]).value(a); if (m_Debug) System.out.println("\t " + theval); if (theval <= limits[a][0]) { if (m_Debug) System.out.println("\t <= lowerlim: continuing..."); continue; } // note: error in paper would have this read "theVal > minLeaf) if (theval >= minLeaf) { if (m_Debug) System.out.println("\t >= minLeaf; breaking..."); break; } counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; if (x != n.numInstances() - 1) { int z = x + 1; while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) { z++; x++; counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; } } // work out the best laplace/class (for <= theval) double total = Utils.sum(counts[0]); for (int c = 0; c < n.numClasses(); c++) { double temp = (counts[0][c] + 1.0) / (total + 2.0); if (temp > minBestLaplace) { minBestPos = counts[0][c]; minBestTotal = total; minBestLaplace = temp; minBestClass = c; minBestCounts = copyCounts(counts); minBestVal = (x == n.numInstances() - 1) ? theval : ((theval + n.instance(sorted[x + 1]).value(a)) / 2.0); } } } // (b) add to t tuple <n,a,v,k,L',"<="> if (!Double.isNaN(minBestVal) && biprob(minBestPos, minBestTotal, leafLaplace) > m_BiProbCrit) { GraftSplit gsplit = null; try { gsplit = new GraftSplit(a, minBestVal, 0, leafClass, minBestCounts); } catch (Exception e) { System.err.println("graftsplit error: " + e.getMessage()); System.exit(1); } t.add(gsplit); } // free space minBestCounts = null; // (c) find values of // n: instances in atbop (already have that, actually) // $v: a value for $a that exists for a case in the atbop, where // $v is > the max value for $a for a case at the leaf which // has the class $c, and $v is <= the upperlimit of $a at // the leaf. // k: k is a class // that maximize L' = Laplace({x: x contained in cases(n) // & value(a,x) > v & value(a,x) <= upperlim(l,a)}, k). double maxBestClass = -1; double maxBestLaplace = leafLaplace; double maxBestVal = Double.NaN; double maxBestPos = Double.NaN; double maxBestTotal = Double.NaN; double[][] maxBestCounts = null; for (int c = 0; c < n.numClasses(); c++) { // zero the counts counts[0][c] = 0; counts[1][c] = 0; // shouldn't need to do this ... } // check smallest val for a in atbop is < upper limit if (n.numInstances() >= 1 && n.instance(sorted[0]).value(a) < limits[a][1]) { for (int x = n.numInstances() - 1; x >= 0; x--) { if (n.instance(sorted[x]).isMissing(a)) continue; double theval = n.instance(sorted[x]).value(a); if (m_Debug) System.out.println("\t " + theval); if (theval > limits[a][1]) { if (m_Debug) System.out.println("\t >= upperlim; continuing..."); continue; } if (theval <= maxLeaf) { if (m_Debug) System.out.println("\t < maxLeaf; breaking..."); break; } // increment counts counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; if (x != 0 && !n.instance(sorted[x - 1]).isMissing(a)) { int z = x - 1; while (z >= 0 && n.instance(sorted[z]).value(a) == theval) { z--; x--; counts[1][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; } } // work out best laplace for > theval double total = Utils.sum(counts[1]); for (int c = 0; c < n.numClasses(); c++) { double temp = (counts[1][c] + 1.0) / (total + 2.0); if (temp > maxBestLaplace) { maxBestPos = counts[1][c]; maxBestTotal = total; maxBestLaplace = temp; maxBestClass = c; maxBestCounts = copyCounts(counts); maxBestVal = (x == 0) ? theval : ((theval + n.instance(sorted[x - 1]).value(a)) / 2.0); } } } // (d) add to t tuple <n,a,v,k,L',">"> if (!Double.isNaN(maxBestVal) && biprob(maxBestPos, maxBestTotal, leafLaplace) > m_BiProbCrit) { GraftSplit gsplit = null; try { gsplit = new GraftSplit(a, maxBestVal, 1, leafClass, maxBestCounts); } catch (Exception e) { System.err.println("graftsplit error:" + e.getMessage()); System.exit(1); } t.add(gsplit); } } } else { // must be a nominal attribute // 3. for each discrete attribute a for which there is no // test at an ancestor of l // skip if this attribute has already been used if (limits[a][1] == 1) { continue; } boolean[] prohibit = new boolean[l.attribute(a).numValues()]; for (int aval = 0; aval < n.attribute(a).numValues(); aval++) { for (int x = 0; x < l.numInstances(); x++) { if ((l.instance(x).isMissing(a) || l.instance(x).value(a) == aval) && (!m_relabel || (l.instance(x).classValue() == leafClass))) { prohibit[aval] = true; break; } } } // (a) find values of // $n: instances in atbop (already have that, actually) // $v: $v is a value for $a // $k: $k is a class // that maximize L' = Laplace({$x: $x contained in cases($n) // & value($a,$x) = $v}, $k). double bestVal = Double.NaN; double bestClass = Double.NaN; double bestLaplace = leafLaplace; double[][] bestCounts = null; double[][] counts = new double[2][n.numClasses()]; for (int x = 0; x < n.numInstances(); x++) { if (n.instance(sorted[x]).isMissing(a)) continue; // zero the counts for (int c = 0; c < n.numClasses(); c++) counts[0][c] = 0; double theval = n.instance(sorted[x]).value(a); counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; if (x != n.numInstances() - 1) { int z = x + 1; while (z < n.numInstances() && n.instance(sorted[z]).value(a) == theval) { z++; x++; counts[0][(int) n.instance(sorted[x]).classValue()] += iindex[1][sorted[x]]; } } if (!prohibit[(int) theval]) { // work out best laplace for > theval double total = Utils.sum(counts[0]); bestLaplace = leafLaplace; bestClass = Double.NaN; for (int c = 0; c < n.numClasses(); c++) { double temp = (counts[0][c] + 1.0) / (total + 2.0); if (temp > bestLaplace && biprob(counts[0][c], total, leafLaplace) > m_BiProbCrit) { bestLaplace = temp; bestClass = c; bestVal = theval; bestCounts = copyCounts(counts); } } // add to graft list if (!Double.isNaN(bestClass)) { GraftSplit gsplit = null; try { gsplit = new GraftSplit(a, bestVal, 2, leafClass, bestCounts); } catch (Exception e) { System.err.println("graftsplit error: " + e.getMessage()); System.exit(1); } t.add(gsplit); } } } // (b) add to t tuple <n,a,v,k,L',"="> // done this already } } // 4. remove from t all tuples <n,a,v,c,L,x> such that L <= // Laplace(cases(l),c) or prob(x,n,Laplace(cases(l),c) <= 0.05 // -- checked this constraint prior to adding a tuple -- // *** step six done before step five for efficiency *** // 6. for each <n,a,v,k,L,x> in t ordered on L from highest to lowest // order the tuples from highest to lowest laplace // (this actually orders lowest to highest) Collections.sort(t); // 5. remove from t all tuples <n,a,v,c,L,x> such that there is // no tuple <n',a',v',k',L',x'> such that k' != c & L' < L. for (int x = 0; x < t.size(); x++) { GraftSplit gs = (GraftSplit) t.get(x); if (gs.maxClassForSubsetOfInterest() != leafClass) { break; // reached a graft with class != leafClass, so stop deleting } else { t.remove(x); x--; } } // if no potential grafts were found, do nothing and return if (t.size() < 1) { return; } // create the distributions for each graft for (int x = t.size() - 1; x >= 0; x--) { GraftSplit gs = (GraftSplit) t.get(x); try { gs.buildClassifier(l); gs.deleteGraftedCases(l); // so they don't go down the other branch } catch (Exception e) { System.err.println("graftsplit build error: " + e.getMessage()); } } // add this stuff to the tree ((C45PruneableClassifierTreeG) parent).setDescendents(t, this); }
From source file:j48.GraftSplit.java
License:Open Source License
/** * method for returning information about this GraftSplit * @param data instances for determining names of attributes and values * @return a string showing this GraftSplit's information *//* w w w.j a v a 2 s.c om*/ public String toString(Instances data) { String theTest; if (m_testType == 0) theTest = " <= "; else if (m_testType == 1) theTest = " > "; else if (m_testType == 2) theTest = " = "; else theTest = " != "; if (data.attribute(m_attIndex).isNominal()) theTest += data.attribute(m_attIndex).value((int) m_splitPoint); else theTest += Double.toString(m_splitPoint); return data.attribute(m_attIndex).name() + theTest + " (" + Double.toString(m_laplace) + ") --> " + data.attribute(data.classIndex()).value(m_maxClass); }
From source file:kea.KEAFilter.java
License:Open Source License
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained in the object are * ignored - only the structure is required). * @return true if the outputFormat may be collected immediately *//*from w w w .j a v a 2 s. co m*/ public boolean setInputFormat(Instances instanceInfo) throws Exception { if (instanceInfo.classIndex() >= 0) { throw new Exception("Don't know what do to if class index set!"); } if (!instanceInfo.attribute(m_KeyphrasesAtt).isString() || !instanceInfo.attribute(m_DocumentAtt).isString()) { throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes."); } m_PunctFilter = new KEAPhraseFilter(); int[] arr = new int[1]; arr[0] = m_DocumentAtt; m_PunctFilter.setAttributeIndicesArray(arr); m_PunctFilter.setInputFormat(instanceInfo); m_PunctFilter.setDisallowInternalPeriods(getDisallowInternalPeriods()); m_NumbersFilter = new NumbersFilter(); m_NumbersFilter.setInputFormat(m_PunctFilter.getOutputFormat()); super.setInputFormat(m_NumbersFilter.getOutputFormat()); return false; }
From source file:learn.Classification.Chinese.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined by * a call to getStructure then method should do so before processing the * rest of the data set.//from www.j av a2 s . co m * * @return the structure of the data set as an empty set of Instances * @throws IOException * if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); ArrayList<String> classes = new ArrayList<String>(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.add((String) enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.get(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; /* * while ((c = is.read()) != -1) { txtStr.append((char) c); * } */ //FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader( new InputStreamReader(new FileInputStream(txt), "UTF-8")); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new DenseInstance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }