List of usage examples for weka.classifiers.functions LinearRegression classifyInstance
@Override public double classifyInstance(Instance instance) throws Exception
From source file:controller.MineroControler.java
public String regresionLineal() { BufferedReader breader = null; Instances datos = null;// w ww . j a v a 2 s . co m breader = new BufferedReader(fuente_arff); try { datos = new Instances(breader); datos.setClassIndex(datos.numAttributes() - 1); // clase principal, ltima en atributos } catch (IOException ex) { System.err.println("Problemas al intentar cargar los datos"); } LinearRegression regresionL = new LinearRegression(); try { regresionL.buildClassifier(datos); Instance nuevaCal = datos.lastInstance(); double calif = regresionL.classifyInstance(nuevaCal); setValorCalculado(new Double(calif)); } catch (Exception ex) { System.err.println("Problemas al clasificar instancia"); } return regresionL.toString(); }
From source file:data.Regression.java
public int regression(String fileName) { String arffName = FileTransfer.transfer(fileName); try {/* w w w.j a v a 2 s . c om*/ //load data Instances data = new Instances(new BufferedReader(new FileReader(arffName))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfEnrolled (" + num + "): " + people); return people; } catch (Exception e) { e.printStackTrace(); System.out.println("Regression fail"); } return 0; }
From source file:data.RegressionDrop.java
public void regression() throws Exception { //public static void main(String[] args) throws Exception{ //load data//from ww w.j a v a 2 s.co m Instances data = new Instances(new BufferedReader(new FileReader("NumOfDroppedByYear.arff"))); data.setClassIndex(data.numAttributes() - 1); //build model LinearRegression model = new LinearRegression(); model.buildClassifier(data); //the last instance with missing class is not used System.out.println(model); //classify the last instance Instance num = data.lastInstance(); int people = (int) model.classifyInstance(num); System.out.println("NumOfDropped (" + num + "): " + people); }
From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java
License:Open Source License
public static Double leaveOneOutErrorLinRegLambda(double lambda, Instances data) { // MANUAL /* w w w. ja va 2 s.com*/ // create a linear regression classifier with Xy_polynorm data LinearRegression linreg = createLinearRegression(); linreg.setRidge(lambda); double mse = 0; for (int i = 0; i < data.numInstances(); ++i) { log.info("fold " + i); Instances train = data.trainCV(data.numInstances(), i); log.info("train"); Instances test = data.testCV(data.numInstances(), i); log.info("test"); double actualY = data.instance(i).classValue(); log.info("actualY"); try { linreg.buildClassifier(train); log.info("buildClassifier"); } catch (Exception e) { log.error("failed to build classifier in cross validation", e); return null; } double predictedY = 0; try { predictedY = linreg.classifyInstance(test.instance(0)); log.info("predictedY"); } catch (Exception e) { log.error("failed to classify in cross validation", e); return null; } double error = predictedY - actualY; log.info("error " + error); mse += error * error; log.info("mse " + mse); } if (data.numInstances() == 0) { log.error("no instances in leave-one-out data"); return null; } mse /= data.numInstances(); log.info("mse " + mse); return mse; // // USING WEKA // // // create evaluation object // Evaluation eval = null; // try { // eval = new Evaluation(data); // } catch (Exception e) { // log.error("weka Evaluation() creation threw exception", e); // //e.printStackTrace(); // return null; // } // // // create a linear regression classifier with Xy_polynorm data // LinearRegression linreg = createLinearRegression(); // linreg.setRidge(lambda); // // try { // // linreg.buildClassifier(data); // // } catch (Exception e) { // // log.error("FAILED: linear regression threw exception", e); // // //e.printStackTrace(); // // return null; // // } // // // initialize the evaluation object // Classifier classifier = linreg; // int numFolds = data.numInstances(); // Random random = new Random(0); // try { // eval.crossValidateModel(classifier , data , numFolds , random); // } catch (Exception e) { // log.error("crossvalidation threw exception", e); // //e.printStackTrace(); // return null; // } // // double mse = eval.errorRate(); // return mse; }
From source file:es.ua.dlsi.experiments.id3.CheckCorrectCandidatePositionLeaveOneOutScoresMaximumEntropy.java
License:Open Source License
/** * @param args the command line arguments *//*from w w w. j a va 2 s. co m*/ public static void main(String[] args) { CmdLineParser parser = new CmdLineParser(); CmdLineParser.Option odictionary = parser.addStringOption('d', "dictionary"); CmdLineParser.Option oremove1entry = parser.addBooleanOption("remove-1entrypars"); CmdLineParser.Option ooutput = parser.addStringOption('o', "output"); CmdLineParser.Option otreeoutput = parser.addStringOption("tree-output"); CmdLineParser.Option onotclosedcats = parser.addBooleanOption("remove-closedcats"); CmdLineParser.Option ovocabularypath = parser.addStringOption('v', "vocabulary"); CmdLineParser.Option oplf_tmp = parser.addStringOption('p', "plf-tmp-path"); CmdLineParser.Option olrm = parser.addStringOption('m', "linear-regression-model"); try { parser.parse(args); } catch (CmdLineParser.IllegalOptionValueException e) { System.err.println(e); System.exit(-1); } catch (CmdLineParser.UnknownOptionException e) { System.err.println(e); System.exit(-1); } String dictionary = (String) parser.getOptionValue(odictionary, null); String output = (String) parser.getOptionValue(ooutput, null); String treeoutput = (String) parser.getOptionValue(otreeoutput, null); String vocabularypath = (String) parser.getOptionValue(ovocabularypath, null); String plf_tmp = (String) parser.getOptionValue(oplf_tmp, null); String lrm = (String) parser.getOptionValue(olrm, null); boolean remove1entry = (Boolean) parser.getOptionValue(oremove1entry, false); boolean notclosedcats = (Boolean) parser.getOptionValue(onotclosedcats, false); //Preparing output stream PrintWriter pw; if (output != null) { try { pw = new PrintWriter(output); } catch (FileNotFoundException ex) { System.err.println("Error while traying to write output file '" + output + "'."); pw = new PrintWriter(System.out); } } else { System.err.println("Warning: output file not defined. Output redirected to standard output."); pw = new PrintWriter(System.out); } //Preparing output stream PrintWriter treepw = null; if (treeoutput != null) { try { treepw = new PrintWriter(treeoutput); } catch (FileNotFoundException ex) { System.err.println("Error while traying to write output file for the tree '" + treeoutput + "'."); treepw = new PrintWriter(System.out); } } //Reading the vocabulary Vocabulary vocabulary = null; try { vocabulary = new Vocabulary(vocabularypath); } catch (FileNotFoundException ex) { System.err.println("ERROR: File '" + vocabularypath + "' could not be found."); System.exit(-1); } catch (IOException ex) { System.err.println("Error while reading file '" + vocabularypath + "' could not be found."); System.exit(-1); } //Reading the dictionary and generating the set of lexical forms DictionaryReader dicReader = new DictionaryReader(dictionary); Dictionary dic = dicReader.readDic(); //Building the suffix tree Dix2suffixtree d2s; d2s = new Dix2suffixtree(dic); FeatureExtractor featextractor = new FeatureExtractor(dic, vocabulary, d2s, plf_tmp); LinearRegression lrmodel = null; try { PMMLModel pmmlModel = PMMLFactory.getPMMLModel(lrm); if (pmmlModel instanceof PMMLClassifier) { Classifier classifier = ((PMMLClassifier) pmmlModel); lrmodel = (LinearRegression) classifier; } } catch (Exception ex) { ex.printStackTrace(System.err); System.exit(-1); } //Loop that goes all over the entries of the dictionary for (Section s : dic.sections) { for (int i = 0; i < s.elements.size(); i++) { E e = s.elements.remove(i); //If the entry is a multiword is discarded if (e.isMultiWord()) { System.err.println("Multiword: " + e.toString()); } else { //Getting the stema nd paradign of the entry Candidate candidate = DicEntry.GetStemParadigm(e); if (candidate != null) { Pardef pardef = dic.pardefs.getParadigmDefinition(candidate.getParadigm()); if (pardef != null) { ParadigmProfiler pp = new ParadigmProfiler(new Paradigm(pardef, dic), dic); if (!remove1entry || pp.NumberOfWords() > 1) { String stem = candidate.getStem(); String bestsurfaceform; Pardef p = dic.pardefs.getParadigmDefinition(candidate.getParadigm()); Paradigm paradigm = new Paradigm(p, dic); //If indicated, entries generating forms from a closed category may be discarded if (!notclosedcats || !paradigm.isClosedCategory()) { //Choosing the most frequent surface form in the vocabulary bestsurfaceform = vocabulary.GetMostFrequentSurfaceForm(stem, paradigm); //If no one of the surface forms appear in the vocabulary: if (bestsurfaceform == null) { System.err.println("Warning: no occurrence for word with stem " + stem + " and paradigm " + paradigm.getName()); //Random form bestsurfaceform = stem + paradigm.getSuffixes().iterator().next().getSuffix(); } //If the lemma cannot be found, the system stops working if (candidate.GetLemma(dic) == null) { System.err.println("Error: lemma cannot be generated for stem " + stem + " and paradigm " + paradigm.getName()); System.exit(-1); } //Generating the list of candidates for the most common surface form //Set<Candidate> candidates=d2s.getSuffixTree(). // SegmentWord(bestsurfaceform); SortedSetOfCandidates candidates = d2s.CheckNewWord(bestsurfaceform, vocabulary, plf_tmp, null, notclosedcats); if (candidates.GetNumberOfDifferentCandidates() == 0) { String newsurfaceform; for (Suffix suf : paradigm.getSuffixes()) { newsurfaceform = stem + suf; if (!newsurfaceform.equals(bestsurfaceform)) { candidates = d2s.CheckNewWord(newsurfaceform, vocabulary, null, null, notclosedcats); if (candidates.GetNumberOfDifferentCandidates() > 0) { bestsurfaceform = newsurfaceform; break; } } } } if (candidates.GetNumberOfDifferentCandidates() == 0) { System.err.println("Warning: no candidates for candidate " + stem + "/" + paradigm.getName()); } else { Set<String> possiblesurfaceforms = new LinkedHashSet<String>(); //the key of this map is the set of surface forms and the value is the set of paradigms generating them Set<EquivalentCandidates> sf_candidate = new LinkedHashSet<EquivalentCandidates>(); for (RankedCandidate qc : candidates.getCandidates()) { possiblesurfaceforms.addAll(qc.getSurfaceForms(dic)); sf_candidate.add(qc); } for (EquivalentCandidates ec : sf_candidate) { RankedCandidate qc = (RankedCandidate) ec; FeatureSet featset = featextractor.GetFeatureSet(qc, notclosedcats); try { double probability = lrmodel .classifyInstance(featset.toWekaInstance()); qc.setScore(probability); } catch (Exception ex) { ex.printStackTrace(System.err); } } InstanceCollection records; // read in all our data records = new InstanceCollection(); records.buildInstances(possiblesurfaceforms, sf_candidate, dic); Tree tree = new Tree(records); tree.Print(treepw); treepw.flush(); try { int numberofquestions = tree.QuestionsToParadigm(candidate); //Printing the output pw.println(bestsurfaceform + ";" + stem + ";" + paradigm.getName() + ";" + numberofquestions); pw.flush(); s.elements.add(i, e); } catch (NotInTreeException ex) { System.out.println("Error: correct candidate for " + stem + ";" + paradigm.getName() + " is not in the ID3 tree."); } } } else { System.err.println("Closed category: " + e.toString()); } } else { System.err.println("Candidate " + candidate.toString() + " not processed: it is the only word in the paradigm"); } } else { System.err.println( "Paradigm " + candidate.getParadigm() + " does not appear in the dictionary"); } } else { System.err.println("Entry " + e.toString() + " does not contain any paradigm"); } } } } pw.close(); if (treepw != null) { treepw.close(); } }
From source file:GroupProject.DMChartUI.java
/** * Action for the generate button/*w ww.j a v a 2 s . c o m*/ * It reads the user input from the table and the selected options and performs * a classifiecation of the user input * the user can choose linear regression, naive bayes classifier, or j48 trees to classify * */ private void generateButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_generateButtonActionPerformed // TODO add your handling code here: // TODO add your handling code here: //File file = new File("studentTemp.csv"); CSVtoArff converter = new CSVtoArff(); Instances students = null; Instances students2 = null; try { converter.convert("studentTemp.csv", "studentTemp.arff"); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } try { students = new Instances(new BufferedReader(new FileReader("studentTemp.arff"))); students2 = new Instances(new BufferedReader(new FileReader("studentTemp.arff"))); } catch (IOException ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get column to predict values for //int target=students.numAttributes()-1; int target = dataSelector.getSelectedIndex() + 1; System.out.printf("this is the target: %d\n", target); //set target students.setClassIndex(target); students2.setClassIndex(target); //case on which radio button is selected //Linear Regressions if (LRB.isSelected()) { LinearRegression model = null; if (Lmodel != null) { model = Lmodel; } else { buildLinearModel(); model = Lmodel; } System.out.println("im doing linear regression"); equationDisplayArea.setText(model.toString()); System.out.println("im going to get the instance"); Instance prediction2 = getInstance(true); Remove remove = new Remove(); int[] toremove = { 0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 16, 17 }; remove.setAttributeIndicesArray(toremove); try { remove.setInputFormat(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } Instances instNew = null; try { instNew = Filter.useFilter(students, remove); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } prediction2.setDataset(instNew); System.err.print("i got the instance"); double result = 0; try { result = model.classifyInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.printf("the result : %f \n ", result); predictValue.setText(Double.toString(result)); System.out.println("I'm done with Linear Regression"); } //Naive Bayes else if (NBB.isSelected()) { Classifier cModel = null; if (NBmodel != null) { cModel = NBmodel; } else { buildNBClassifier(); cModel = NBmodel; } System.out.println("im doing NB"); //build test Evaluation eTest = null; try { eTest = new Evaluation(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Using NB"); try { eTest.evaluateModel(cModel, students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //display the test results to console String strSummary = eTest.toSummaryString(); System.out.println(strSummary); //build instance to predict System.out.println("im going to get the instance"); Instance prediction2 = getInstance(false); prediction2.setDataset(students); System.err.print("i got the instance"); //replace with loop stating the class names //fit text based on name of categories double pred = 0; try { pred = cModel.classifyInstance(prediction2); prediction2.setClassValue(pred); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get the predicted value and set predictValue to it predictValue.setText(prediction2.classAttribute().value((int) pred)); System.out.println("I'm done with Naive Bayes"); double[] fDistribution2 = null; try { fDistribution2 = cModel.distributionForInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } double max = 0; int maxindex = 0; max = fDistribution2[0]; for (int i = 0; i < fDistribution2.length; i++) { if (fDistribution2[i] > max) { maxindex = i; max = fDistribution2[i]; } System.out.println("the value at " + i + " : " + fDistribution2[i]); System.out.println("the label at " + i + prediction2.classAttribute().value(i)); } prediction2.setClassValue(maxindex); predictValue.setText(prediction2.classAttribute().value(maxindex)); } //J48 Tree else if (JB.isSelected()) { System.out.println("im doing j48 "); Classifier jModel = null; if (Jmodel != null) { jModel = Jmodel; } else { buildJClassifier(); jModel = Jmodel; } //test model Evaluation eTest2 = null; try { eTest2 = new Evaluation(students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } System.out.println("Using J48 test"); try { eTest2.evaluateModel(jModel, students); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } String strSummary2 = eTest2.toSummaryString(); System.out.println(strSummary2); System.out.println("im going to get the instance"); Instance prediction2 = getInstance(false); prediction2.setDataset(students); System.err.print("i got the instance\n"); double pred = 0; try { pred = jModel.classifyInstance(prediction2); prediction2.setClassValue(pred); System.out.println("i did a prediction"); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } //get the predicted value and set predictValue to it System.out.println("this was pred:" + pred); predictValue.setText(prediction2.classAttribute().value((int) pred)); System.out.println("I'm done with J48"); //replace with loop stating the class names //fit text based on name of categories double[] fDistribution2 = null; try { fDistribution2 = jModel.distributionForInstance(prediction2); } catch (Exception ex) { Logger.getLogger(DMChartUI.class.getName()).log(Level.SEVERE, null, ex); } double max = 0; int maxindex = 0; max = fDistribution2[0]; for (int i = 0; i < fDistribution2.length; i++) { if (fDistribution2[i] > max) { maxindex = i; max = fDistribution2[i]; } System.out.println("the value at " + i + " : " + fDistribution2[i]); System.out.println("the label at " + i + " " + prediction2.classAttribute().value(i)); } prediction2.setClassValue(maxindex); predictValue.setText(prediction2.classAttribute().value(maxindex)); } }