List of usage examples for weka.core Instances lastInstance
publicInstance lastInstance()
From source file:com.hack23.cia.service.impl.action.user.wordcount.WordCounterImpl.java
License:Apache License
@Override public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) { final String html = documentContentData.getContent(); final Attribute input = new Attribute("html", (ArrayList<String>) null); final ArrayList<Attribute> inputVec = new ArrayList<>(); inputVec.add(input);/*from www . j ava2 s . c om*/ final Instances htmlInst = new Instances("html", inputVec, 1); htmlInst.add(new DenseInstance(1)); htmlInst.instance(0).setValue(0, html); final StopwordsHandler StopwordsHandler = new StopwordsHandler() { @Override public boolean isStopword(final String word) { return word.length() < 5; } }; final NGramTokenizer tokenizer = new NGramTokenizer(); tokenizer.setNGramMinSize(1); tokenizer.setNGramMaxSize(1); tokenizer.setDelimiters(" \r\n\t.,;:'\"()?!'"); final StringToWordVector filter = new StringToWordVector(); filter.setTokenizer(tokenizer); filter.setStopwordsHandler(StopwordsHandler); filter.setLowerCaseTokens(true); filter.setOutputWordCounts(true); filter.setWordsToKeep(maxResult); final Map<String, Integer> result = new HashMap<>(); try { filter.setInputFormat(htmlInst); final Instances dataFiltered = Filter.useFilter(htmlInst, filter); final Instance last = dataFiltered.lastInstance(); final int numAttributes = last.numAttributes(); for (int i = 0; i < numAttributes; i++) { result.put(last.attribute(i).name(), Integer.valueOf(last.toString(i))); } } catch (final Exception e) { LOGGER.warn("Problem calculating wordcount for : {} , exception:{}", documentContentData.getId(), e); } return result; }
From source file:controller.MineroControler.java
public String regresionLineal() { BufferedReader breader = null; Instances datos = null; breader = new BufferedReader(fuente_arff); try {/*from w w w .j a v a2 s . com*/ datos = new Instances(breader); datos.setClassIndex(datos.numAttributes() - 1); // clase principal, ltima en atributos } catch (IOException ex) { System.err.println("Problemas al intentar cargar los datos"); } LinearRegression regresionL = new LinearRegression(); try { regresionL.buildClassifier(datos); Instance nuevaCal = datos.lastInstance(); double calif = regresionL.classifyInstance(nuevaCal); setValorCalculado(new Double(calif)); } catch (Exception ex) { System.err.println("Problemas al clasificar instancia"); } return regresionL.toString(); }
From source file:data.Regression.java
public int regression(String fileName) { String arffName = FileTransfer.transfer(fileName); try {/* www. ja v a 2 s .c om*/ //load data Instances data = new Instances(new BufferedReader(new FileReader(arffName))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfEnrolled (" + num + "): " + people); return people; } catch (Exception e) { e.printStackTrace(); System.out.println("Regression fail"); } return 0; }
From source file:data.RegressionDrop.java
public void regression() throws Exception { //public static void main(String[] args) throws Exception{ //load data//from w ww . java 2 s. co m Instances data = new Instances(new BufferedReader(new FileReader("NumOfDroppedByYear.arff"))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfDropped (" + num + "): " + people); }
From source file:eksploracja.Eksploracja.java
/** * @param args the command line arguments *///from www . j a v a2 s . c o m public static void main(String[] args) throws Exception { // TODO code application logic here //sout +tabualcja System.out.println("Hello world - tu eksploracja"); //Pobieranie danych String filename = "C:\\Program Files\\Weka-3-8\\data\\weather.numeric.arff"; DataSource source = new DataSource(filename); Instances mojeDane = source.getDataSet(); //Wywietlanie danych System.out.println("Dane: "); // System.out.println(mojeDane); //cao danych Instance wiersz0 = mojeDane.firstInstance(); System.out.println("Pocztek " + mojeDane.firstInstance()); //pierwszy wiersz System.out.println("Koniec " + mojeDane.lastInstance()); //ostatni wiersz System.out.println("\nLiczba danych: " + mojeDane.numInstances()); System.out.println("\nAtrybuty w liczbie: " + mojeDane.numAttributes()); for (int i = 0; i < mojeDane.numAttributes(); i++) { System.out.println(i + ". " + mojeDane.attribute(i)); Attribute atr = mojeDane.attribute(i); System.out.println(i + " " + atr.name()); if (atr.isNominal()) { System.out.println("Typ danych nominalne"); } else { System.out.println("Typ danych numeryczne"); } } //Zapisywanie danych w posataci liczbowej System.out.println("Dane - jako liczby: "); System.out.println(Arrays.toString(wiersz0.toDoubleArray())); }
From source file:GroupProject.DMChartUI.java
/** * gets the user input from the string and turns it into a weka instance * the instance is then returned to be classified * *///from www . j a v a 2s.c o m private Instance getInstance(boolean rem) { String fileName = "tempInstance.csv"; FileWriter fileWriter = null; try { fileWriter = new FileWriter(fileName); System.out.println("i found the tempInstance files"); fileWriter.append(header.toString()); //a string with commas in between each word //the first word should be: "0" System.out.println("Im goint to get the user string"); String input_instance = getUserString(); System.out.println("The user input:" + input_instance); fileWriter.append(input_instance); //add new line fileWriter.append("\n"); System.out.println("i got user input"); } catch (Exception e) { System.out.println("Error in CsvFileWriter !!!"); e.printStackTrace(); } finally { try { fileWriter.flush(); fileWriter.close(); } catch (IOException e) { System.out.println("Error while flushing/closing fileWriter !!!"); e.printStackTrace(); } } String fileNameArff = "tempInstance.arff"; CSVtoArff converter = new CSVtoArff(); Instances students = null; Instances students2 = null; //load data into Instances try { System.out.println("im going to convert the csv file "); converter.convert(fileName, fileNameArff); System.out.println("I converted the csv file"); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } try { students = new Instances(new BufferedReader(new FileReader(fileNameArff))); //students2= new Instances(new BufferedReader(new FileReader(fileNameArff))); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } Remove remove = new Remove(); int[] toremove = { 0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17 }; remove.setAttributeIndicesArray(toremove); try { remove.setInputFormat(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } Instances instNew = null; try { instNew = Filter.useFilter(students, remove); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } Instance result; if (rem) { result = instNew.lastInstance(); } else result = students.lastInstance(); System.out.println("this was the instance"); System.out.println(result); return result; }
From source file:imba.classifier.NBTubes.java
@Override public double[] distributionForInstance(Instance instance) throws Exception { //Fungsi ini menentukan probabilitas setiap kelas instance untuk instance //yang ada di parameter fungsi Instances temp = null; Instance p;/* w ww .j a v a 2 s . com*/ Filter f; double[] a = new double[infoClassifier.get(0).get(0).size()]; int i, j, k, l, x, c; double t, prev; Enumeration n; boolean big; String val; String[] valMinMax; if (wasNumeric) { header_Instances.add(instance); f = new Normalize(); try { f.setInputFormat(header_Instances); for (Instance i1 : header_Instances) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } temp = f.getOutputFormat(); while ((p = f.output()) != null) { temp.add(p); } } f = new NumericToNominal(); if (wasNumeric) { try { f.setInputFormat(temp); for (Instance i1 : temp) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } temp = null; temp = f.getOutputFormat(); p = null; while ((p = f.output()) != null) { temp.add(p); } instance = temp.lastInstance(); header_Instances.remove(header_Instances.size() - 1); } else { f.setInputFormat(header_Instances); f.input(instance); f.batchFinished(); instance = f.output(); } //Itung distribusi instance utk tiap kelas i = 0; while (i < (a.length)) { a[i] = (double) sumClass[i] / dataSize; j = 0; k = 0; while (j < infoClassifier.size()) { if (j == classIdx) { k++; } if (wasNumeric) { if (filter.equals("Discretize")) { l = 0; big = false; while (l < dataset.attribute(k).numValues() && big == false) { //parse val = String.valueOf(dataset.attribute(k).value(l)); //System.out.println("k = " + k); //System.out.println("nilai = " + instance.stringValue(k)); val = val.replaceAll("'", ""); val = val.replaceAll("\\(", ""); val = val.replaceAll("\\)", ""); val = val.replaceAll("]", ""); //System.out.println(val); valMinMax = val.split("-"); //cocokin if (valMinMax.length == 3) { if (valMinMax[1].equals("inf")) { valMinMax[1] = "0.0"; } //System.out.println("Min = " + valMinMax[1]); //System.out.println("Max = " + valMinMax[2]); if (Double.valueOf(instance.stringValue(k)) > Double.valueOf(valMinMax[1]) && Double .valueOf(instance.stringValue(k)) <= Double.valueOf(valMinMax[2])) { big = true; } } else { if (valMinMax.length == 2) { if (valMinMax[1].equals("inf")) { valMinMax[1] = "1.0"; } if (Double.valueOf(instance.stringValue(k)) > Double.valueOf(valMinMax[0]) && Double.valueOf(instance.stringValue(k)) <= Double .valueOf(valMinMax[1])) { big = true; } } else { l = dataset.attribute(k).indexOfValue(instance.stringValue(k)); big = true; } //System.out.println("Min = " + valMinMax[0]); //System.out.println("Max = " + valMinMax[1]); } l++; } x = l - 1; //System.out.println("x = " + x); } else { big = false; l = 0; n = dataset.attribute(k).enumerateValues(); t = 0; prev = 0; while (l < dataset.attribute(k).numValues() && big == false) { t = Double.valueOf(n.nextElement().toString()); //System.out.println(prev + " " + t); if (Double.valueOf(instance.stringValue(k)) <= t) { big = true; } else { prev = t; } l++; } if (big == true && t != Double.valueOf(instance.stringValue(k))) { System.out.println(prev + " " + Double.valueOf(instance.stringValue(k)) + " " + t); } //x = l - 1; if (classIdx < 2) { c = 2; } else { c = 1; } if (big == true && l > c) { if ((Double.valueOf(instance.stringValue(k)) - prev) <= (t - Double.valueOf(instance.stringValue(k)))) { x = l - 2; } else { x = l - 1; } } else { x = l - 1; } } } else { x = dataset.attribute(k).indexOfValue(instance.stringValue(k)); } a[i] *= infoClassifier.get(j).get(x).get(i); k++; j++; } i++; } return a; }
From source file:jjj.asap.sas.datasets.job.Import.java
License:Open Source License
private void buildDataset(int k, String input, String output) { if (IOUtils.exists(output)) { Job.log("NOTE", output + " already exists - nothing to do."); return;//from w w w. j a v a 2 s . com } // create empty dataset final DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); if (Contest.isMultiChoice(k)) { builder.addNominalVariable("color", Contest.COLORS); } builder.addStringVariable("text"); builder.addNominalVariable("score", Contest.getRubrics(k)); Instances dataset = builder.getDataset(IOUtils.getName(output)); // now add obs Iterator<String> it = new FileIterator(input); while (it.hasNext()) { // parse data String[] data = StringUtils.safeSplit(it.next(), "\t", 6); double id = Double.parseDouble(data[0]); String score = data[2]; String color = data[4]; String text = data[5]; // add to dataset dataset.add(new DenseInstance(dataset.numAttributes())); Instance ob = dataset.lastInstance(); ob.setValue(dataset.attribute("id"), id); if (Contest.isMultiChoice(k)) { ob.setValue(dataset.attribute("color"), color); } ob.setValue(dataset.attribute("text"), text); if ("?".equals(score)) { ob.setValue(dataset.attribute("score"), Utils.missingValue()); } else { ob.setValue(dataset.attribute("score"), score); } } Dataset.save(output, dataset); }
From source file:jjj.asap.sas.parser.job.ImportParserData.java
License:Open Source License
private void process(final String parent, int essaySet, Map<Double, List<String>> tags, Map<Double, List<String>> parseTrees, Map<Double, List<String>> depends) { // check if output exists boolean any = false; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-extra-stats.arff")) any = true;/* w w w . j av a 2 s. c om*/ if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-pos-tags.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-parse-tree.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends0.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends1.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends2.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends3.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends4.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends5.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends6.arff")) any = true; if (!any) { Job.log("NOTE", "work/datasets/" + parent + "/" + essaySet + "-*.arff returns all required datasets - nothing to do"); return; } // Load an existing dataset to use as a template. Instances dataset = Dataset.load("work/datasets/" + parent + "/" + essaySet + "-spell-checked.arff"); // create the output datasets here. except for the extra statistics, // the format is the same as 'dataset'. Instances tagsData = new Instances(dataset, 0); tagsData.setRelationName(essaySet + "-pos-tags.arff"); Instances treeData = new Instances(dataset, 0); treeData.setRelationName(essaySet + "-parse-tree.arff"); Instances dependsData[] = new Instances[7]; for (int j = 0; j < 7; j++) { dependsData[j] = new Instances(dataset, 0); dependsData[j].setRelationName(essaySet + "-depends" + j + ".arff"); } // extra stats DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); if (Contest.isMultiChoice(essaySet)) { builder.addNominalVariable("color", Contest.COLORS); } builder.addVariable("x_sent"); builder.addVariable("x_para"); builder.addVariable("x_length"); builder.addVariable("x_words"); builder.addVariable("x_unique_words"); builder.addNominalVariable("score", Contest.getRubrics(essaySet)); Instances extraStats = builder.getDataset(essaySet + "-extra-stats.arff"); // now add rows for each instance for (int i = 0; i < dataset.numInstances(); i++) { // common variables Instance ob = dataset.instance(i); double id = ob.value(0); String y = ob.isMissing(dataset.numAttributes() - 1) ? null : ob.stringValue(dataset.numAttributes() - 1); String color = Contest.isMultiChoice(essaySet) ? ob.stringValue(dataset.attribute("color")) : null; String str = ob.stringValue(dataset.attribute("text")); // // Extra stats // int nSent = tags.containsKey(id) ? tags.get(id).size() : 0; int nPara = 0; for (int a = 0; a < str.length(); a++) { if (str.charAt(a) == '^') nPara++; } int nLength = str.length(); int nWords = 0; int nUniqueWords = 0; String[] words = str.toLowerCase().split(" "); nWords = words.length; Set<String> u = new HashSet<String>(); for (String w : words) { u.add(w); } nUniqueWords = u.size(); extraStats.add(new DenseInstance(extraStats.numAttributes())); Instance extra = extraStats.lastInstance(); extra.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { extra.setValue(1, color); } extra.setValue(extraStats.attribute("x_sent"), nSent); extra.setValue(extraStats.attribute("x_para"), nPara); extra.setValue(extraStats.attribute("x_length"), nLength); extra.setValue(extraStats.attribute("x_words"), nWords); extra.setValue(extraStats.attribute("x_unique_words"), nUniqueWords); if (y == null) extra.setValue(extraStats.numAttributes() - 1, Utils.missingValue()); else extra.setValue(extraStats.numAttributes() - 1, y); // // POS tags // String tagsText = ""; List<String> tagsList = tags.get(id); if (tagsList == null || tagsList.isEmpty()) { Job.log("WARNING", "no tags for " + id); tagsText = "x"; } else { for (String tagsItem : tagsList) { tagsText += tagsItem; } } tagsData.add(new DenseInstance(ob.numAttributes())); Instance tagsOb = tagsData.lastInstance(); tagsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { tagsOb.setValue(1, color); tagsOb.setValue(2, tagsText.trim()); if (y == null) { tagsOb.setValue(3, Utils.missingValue()); } else { tagsOb.setValue(3, y); } } else { tagsOb.setValue(1, tagsText.trim()); if (y == null) { tagsOb.setValue(2, Utils.missingValue()); } else { tagsOb.setValue(2, y); } } // // Parse Tree // String treeText = ""; List<String> treeList = parseTrees.get(id); if (treeList == null || treeList.isEmpty()) { Job.log("WARNING", "no parse tree for " + id); treeText = "x"; } else { for (String treeItem : treeList) { treeText += treeItem; } } treeData.add(new DenseInstance(ob.numAttributes())); Instance treeOb = treeData.lastInstance(); treeOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { treeOb.setValue(1, color); treeOb.setValue(2, treeText.trim()); if (y == null) { treeOb.setValue(3, Utils.missingValue()); } else { treeOb.setValue(3, y); } } else { treeOb.setValue(1, treeText.trim()); if (y == null) { treeOb.setValue(2, Utils.missingValue()); } else { treeOb.setValue(2, y); } } // // Depends data // for (int j = 0; j < 7; j++) { String text = ""; List<String> list = depends.get(id); if (list == null || list.isEmpty()) { Job.log("WARNING", "no depends for " + id); text = "x"; } else { for (String item : list) { String[] term = StringUtils.safeSplit(item, "/", 3); switch (j) { case 0: text += item; break; case 1: text += term[1] + "/" + term[2]; break; case 2: text += term[0] + "/" + term[2]; break; case 3: text += term[0] + "/" + term[1]; break; case 4: text += term[0]; break; case 5: text += term[1]; break; case 6: text += term[2]; break; } text += " "; } } dependsData[j].add(new DenseInstance(ob.numAttributes())); Instance dependsOb = dependsData[j].lastInstance(); dependsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { dependsOb.setValue(1, color); dependsOb.setValue(2, text.trim()); if (y == null) { dependsOb.setValue(3, Utils.missingValue()); } else { dependsOb.setValue(3, y); } } else { dependsOb.setValue(1, text.trim()); if (y == null) { dependsOb.setValue(2, Utils.missingValue()); } else { dependsOb.setValue(2, y); } } } // j } // dataset // Now save the new datasets Dataset.save("work/datasets/" + parent + "/" + tagsData.relationName(), tagsData); Dataset.save("work/datasets/" + parent + "/" + treeData.relationName(), treeData); for (int j = 0; j < 7; j++) { Dataset.save("work/datasets/" + parent + "/" + dependsData[j].relationName(), dependsData[j]); } Dataset.save("work/datasets/" + parent + "/" + extraStats.relationName(), extraStats); }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Main - run some tests.// w w w. j a v a 2 s . c o m */ public static void main(String args[]) throws Exception { Instances D = new Instances(new FileReader(args[0])); Instance x = D.lastInstance(); D.remove(D.numInstances() - 1); int L = Integer.parseInt(args[1]); D.setClassIndex(L); double y[] = new double[L]; Random r = new Random(); int s[] = new int[] { 1, 0, 2 }; int PA_J[][] = new int[][] { {}, {}, { 0, 1 }, }; //MLUtils.randomize(s,r); // MUST GO IN TREE ORDER !! for (int j : s) { int pa_j[] = PA_J[j]; System.out.println("PARENTS = " + Arrays.toString(pa_j)); //MLUtils.randomize(pa_j,r); System.out.println("**** TRAINING ***"); CNode n = new CNode(j, null, pa_j); n.build(D, new SMO()); /* */ //Instances D_ = n.transform(D); //n.T = D_; System.out.println("============== D_" + j + " / class = " + n.T.classIndex() + " ="); System.out.println("" + n.T); System.out.println("**** TESTING ****"); /* Instance x_ = MLUtils.setTemplate(x,(Instance)D_.firstInstance().copy(),D_); for(int pa : pa_j) { //System.out.println(""+map[pa]); x_.setValue(n.map[pa],y[pa]); } //x_.setDataset(T); x_.setClassMissing(); */ //n.T = D_; Instance x_ = n.transform(x, y); System.out.println("" + x_); y[j] = 1; } }