List of usage examples for weka.core Instances toSummaryString
public String toSummaryString()
From source file:lu.lippmann.cdb.ext.hydviga.cbr.GapFillingKnowledgeDBAnalyzer.java
License:Open Source License
/** * Main method./*w ww. j a va 2s . c o m*/ * @param args command line arguments */ public static void main(final String[] args) { try { HydroRunner.init(false); Instances newkdb = new Instances(GapFillingKnowledgeDB.getKnowledgeDB()); System.out.println("Considered fictive gaps -> " + getCountOfFictiveGaps(newkdb)); System.out.println(newkdb.toSummaryString()); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useDownstream").index(), "false"); newkdb = WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb, newkdb.attribute("useUpstream").index(), "false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useNearest").index(),"false"); //newkdb=WekaDataProcessingUtil.filterDataSetOnNominalValue(newkdb,newkdb.attribute("useMostSimilar").index(),"false"); //System.out.println(newkdb.toSummaryString()); Instances withGoodNashSutcliffe = new Instances(newkdb, 0); for (int i = 0; i < newkdb.numInstances(); i++) { if (newkdb.instance(i).value(newkdb.attribute("NashSutcliffe").index()) > 0.5d) { withGoodNashSutcliffe.add(new DenseInstance(1d, newkdb.instance(i).toDoubleArray())); } } System.out.println(withGoodNashSutcliffe.numInstances() + " / " + newkdb.numInstances()); final double perc = (double) getCountOfFictiveGaps(withGoodNashSutcliffe) / getCountOfFictiveGaps(newkdb); System.out.println("Fictive gaps that are infilled with a good Nash-Sutcliffe -> " + getCountOfFictiveGaps(withGoodNashSutcliffe) + " (" + perc + "%)"); WekaDataAccessUtil.saveInstancesIntoARFFFile(withGoodNashSutcliffe, new File("./withGoodNashSutcliffe.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.ext.hydviga.util.TransformTimeSeries.java
License:Open Source License
/** * Main method.//w w w.jav a2 s . c o m * @param args command line arguments */ public static final void main(final String[] args) { try { final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File("." + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2.arff")); System.out.println(dataSet.toSummaryString()); final int numAttributes = dataSet.numAttributes(); final int numInstances = dataSet.numInstances(); for (int i = 0; i < numAttributes; i++) { final int i_bis = (int) (Math.random() * (double) (numAttributes - 3)); final int i_tri = (int) (Math.random() * (double) (numAttributes - 3)); for (int j = 0; j < numInstances; j++) { final Instance instance_j = dataSet.instance(j); if (instance_j.isMissing(i)) continue; if (instance_j.isMissing(i_bis)) continue; if (instance_j.isMissing(i_tri)) continue; final double iValue = instance_j.value(i); final double iBisValue = instance_j.value(i_bis); final double iTriValue = instance_j.value(i_tri); instance_j.setValue(i, (iValue + iBisValue + iTriValue)); } } WekaDataAccessUtil.saveInstancesIntoARFFFile(dataSet, new File("." + File.separatorChar + "data_fake" + File.separatorChar + "all_valid_q_series_complete2_fake.arff")); } catch (final Exception e) { e.printStackTrace(); } }
From source file:mao.datamining.DataSetPair.java
private void doItOnce4All() { if (didIt)/*from w ww . j a v a 2 s . com*/ return; didIt = true; try { //step 0, remove all those empty columns, which has more than 50% missing values Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName); orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1); Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1); MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString()); //step 0-1, to remove all columns which has more than half missing values Instances newData = orangeDataSet; RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues(); removeMissingValuesColumns.setM_maxMissingPercentage(50); removeMissingValuesColumns.setManualDeleteColumns(columns2Delete); removeMissingValuesColumns.setInputFormat(newData); newData = Filter.useFilter(newData, removeMissingValuesColumns); Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n" + newData.toSummaryString()); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) { writer.write(newData.toString()); } //step 0-2 to transform those numeric columns to Nominal //to delete those instances with more than half missing values BufferedReader reader70 = new BufferedReader(new InputStreamReader( new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff"))); BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff"))); int columnNum = newData.numAttributes(); int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0; String line = null; int missingColumnNum = 0; while ((line = reader70.readLine()) != null) { missingColumnNum = 0; for (int i = 0; i < line.length(); i++) { if (line.charAt(i) == '?') missingColumnNum++; } if (missingColumnNum * 100 / columnNum < 50) { writerAfterDeleteRows.write(line); writerAfterDeleteRows.newLine(); } else { System.out.println("Delete Row: [" + line + "]"); if (line.endsWith("-1")) { deleteM1Num++; } else { delete1Num++; } } } System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num + ", delete class 1: " + delete1Num); reader70.close(); writerAfterDeleteRows.close(); //create sample files: createSampleDataSets(); } catch (Exception e) { Main.logging(null, e); } }
From source file:mao.datamining.DataSetPair.java
/** * Pre-Process the training data set with: * RemoveUselessColumnsByMissingValues filter * SpreadSubsample filter to shrink the majority class instances * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection *///from w w w . j a v a 2 s . c o m private void processTrainRawData() { System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); System.out.println("====================" + this.trainFileName + "===================="); finalTrainAttrList.clear(); try { doItOnce4All(); String sampleFilePath = null; //step 2, either over sample, or under sample //weka.filters.supervised.instance.SpreadSubsample if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) { System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleOver)) { System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleNone)) { //do nothing, System.out.println("None Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) { //do nothing System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss"); sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff"; } else { doNotSupport(); } Instances newData = ConverterUtils.DataSource.read(sampleFilePath); newData.setClassIndex(newData.numAttributes() - 1); // Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString()); //Step 3, select features AttributeSelection attrSelectionFilter = new AttributeSelection(); ASEvaluation eval = null; ASSearch search = null; //ranker if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) { System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss"); eval = new weka.attributeSelection.InfoGainAttributeEval(); //weka.attributeSelection.Ranker -T 0.02 -N -1 search = new Ranker(); String rankerOptios[] = { "-T", "0.01", "-N", "-1" }; if (resampleMethod.equalsIgnoreCase(resampleOver)) { rankerOptios[1] = "0.1"; } ((Ranker) search).setOptions(rankerOptios); Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker"); } //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0 else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) { System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss"); eval = new CfsSubsetEval(); search = new LinearForwardSelection(); String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" }; ((LinearForwardSelection) search).setOptions(linearOptios); Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection"); } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) { System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss"); Main.logging("No Feature Selection Method"); } else { doNotSupport(); } if (eval != null) { attrSelectionFilter.setEvaluator(eval); attrSelectionFilter.setSearch(search); attrSelectionFilter.setInputFormat(newData); newData = Filter.useFilter(newData, attrSelectionFilter); } Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString()); //finally, write the final dataset to file system try (BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) { writer.write(newData.toString()); } int numAttributes = newData.numAttributes(); for (int i = 0; i < numAttributes; i++) { String attrName = newData.attribute(i).name(); finalTrainAttrList.add(attrName); } Main.logging(finalTrainAttrList.toString()); // //set the final train dataset finalTrainDataSet = newData; finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1); Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString()); } catch (Exception ex) { Main.logging(null, ex); } }
From source file:mao.datamining.ModelProcess.java
/** * To create and verify the classifier models. * * @param classifierClazz//from www.j a v a 2s . c om * @param options */ private void runTestCase(DataSetPair ds, String classifierClazz, String reportFilePath, String[] options, String caseNum, boolean runDummy, String testCaseDetailFile) { String optionsCopy[] = new String[options.length]; System.arraycopy(options, 0, optionsCopy, 0, options.length); //write the summary FileOutputStream testCaseSummaryOut = null; try { testCaseSummaryOut = new FileOutputStream(testCaseDetailFile); testCaseSummaryOut.write(("Train File: " + ds.getTrainFileName()).getBytes()); testCaseSummaryOut.write("\n\n".getBytes()); } catch (Exception ex) { Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex); } //Prepare the TestResult TestResult result = new TestResult(); result.setCaseNum(caseNum); result.setMissingValueMode(ds.getMissingProcessMode()); result.setResampleMode(ds.getResampleMethod()); result.setFeatureSelectMode(ds.getFeatureSelectionMode()); result.setClassifier(classifierClazz); StringBuilder optionsStr = new StringBuilder(); for (String s : optionsCopy) { if (s == null) { break; } optionsStr.append(s).append(" "); } result.setClassifierOptions(optionsStr.toString()); //get the training and test data sets Instances finalTrainDataSet = ds.getFinalTrainDataSet(); Instances finalTestDataSet = ds.getFinalTestDataSet(); //start building and testing ModelProcess.logging( "\n\n\n=========================================START=================================================\n" + "======= " + classifierClazz + "," + optionsStr.toString() + "========\n" + "=========================================START================================================="); if (runDummy) { ModelProcess.logging("Dummy Run the process"); } else { Classifier classifier = null; String tmpClazz = null; String tmpOption[] = null; //if use matrix, //weka.classifiers.meta.CostSensitiveClassifier -cost-matrix "[0.0 2.0; 5.0 0.0]" -S 1 -W weka.classifiers.trees.J48 -- -C 0.25 -M 2 if (this.useCostMatrix) { List<String> costOptionsList = new ArrayList<>(); costOptionsList.add("-cost-matrix"); costOptionsList.add("\"[0.0 3.0; 30.0 0.0]\""); costOptionsList.add("-S"); costOptionsList.add("1"); costOptionsList.add("-W"); costOptionsList.add(classifierClazz); costOptionsList.add("--"); for (String s : optionsCopy) { costOptionsList.add(s); } String newOptions[] = new String[costOptionsList.size()]; for (int i = 0; i < newOptions.length; i++) { newOptions[i] = costOptionsList.get(i); } tmpClazz = "weka.classifiers.meta.CostSensitiveClassifier"; tmpOption = newOptions; } else { tmpClazz = classifierClazz; tmpOption = optionsCopy; } //end create the classifier try { try { //finalTrainDataSet testCaseSummaryOut.write( ("Data Set Summary: " + finalTrainDataSet.toSummaryString() + "\n\n").getBytes()); testCaseSummaryOut.write(("classifier: " + tmpClazz + "\n").getBytes()); testCaseSummaryOut.write(("options: " + Arrays.toString(tmpOption) + "\n\n").getBytes()); } catch (FileNotFoundException ex) { Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex); } classifier = (Classifier) Utils.forName(Classifier.class, tmpClazz, tmpOption); } catch (Exception ex) { Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex); } //end create the classifier // this.testCV(classifier, finalTrainDataSet, testCaseSummaryOut, result); this.testWithExtraDS(classifier, finalTrainDataSet, finalTestDataSet, testCaseSummaryOut, result); try { testCaseSummaryOut.close(); } catch (IOException ex) { Logger.getLogger(ModelProcess.class.getName()).log(Level.SEVERE, null, ex); } ModelProcess.logging( "========================================END==================================================\n" + result.toString() + "========================================END=================================================="); this.lockFile(reportFilePath); try (FileOutputStream fout = new FileOutputStream(reportFilePath, true)) { fout.write(result.toString().getBytes()); fout.write("\n".getBytes()); fout.flush(); fout.close(); } catch (Exception ex) { Main.logging(null, ex); } this.unLockFile(reportFilePath); } }
From source file:mlpoc.MLPOC.java
/** * @param args the command line arguments */// w w w . j a va 2 s . c o m public static void main(String[] args) { try { // TODO code application logic here BufferedReader br; br = new BufferedReader( new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff")); Instances training_data = new Instances(br); br.close(); training_data.setClassIndex(training_data.numAttributes() - 1); br = new BufferedReader(new FileReader("D:/Extra/B.E Project/agrodeploy/webapp/Data/TestFinal.arff")); Instances testing_data = new Instances(br); br.close(); testing_data.setClassIndex(testing_data.numAttributes() - 1); String summary = training_data.toSummaryString(); int number_samples = training_data.numInstances(); int number_attributes_per_sample = training_data.numAttributes(); System.out.println("Number of attributes in model = " + number_attributes_per_sample); System.out.println("Number of samples = " + number_samples); System.out.println("Summary: " + summary); System.out.println(); J48 j48 = new J48(); FilteredClassifier fc = new FilteredClassifier(); fc.setClassifier(j48); fc.buildClassifier(training_data); System.out.println("Testing instances: " + testing_data.numInstances()); for (int i = 0; i < testing_data.numInstances(); i++) { double pred = fc.classifyInstance(testing_data.instance(i)); String s1 = testing_data.classAttribute().value((int) pred); System.out.println(testing_data.instance(i) + " Predicted value: " + s1); } Evaluation crossValidate = crossValidate( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); DataSource source = new DataSource( "D:/Extra/B.E Project/agrodeploy/webapp/Data/ClusterAutotrain12.arff"); Instances data = source.getDataSet(); System.out.println(data.numInstances()); data.setClassIndex(data.numAttributes() - 1); // 1. meta-classifier useClassifier(data); // 2. filter useFilter(data); } catch (Exception ex) { Logger.getLogger(MLPOC.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:sentinets.Prediction.java
License:Open Source License
public void writeStats(Instances tweetInstances) { //TweetCorpusStatistics stats = new TweetCorpusStatistics(); System.out.println("Stats Instances: \n" + tweetInstances.toSummaryString()); for (int i = 0; i < tweetInstances.size(); i++) { String user = tweetInstances.get(i).stringValue(11 - 1); String mentions = tweetInstances.get(i).stringValue(3 - 1); String hashtags = tweetInstances.get(i).stringValue(14 - 1); String epClass = tweetInstances.get(i).stringValue(15 - 1); String snsClass = tweetInstances.get(i).stringValue(16 - 1); System.out.println("Tweet Details:\t" + user + "\t" + mentions + "\t" + hashtags + "\t" + printDist(classDist.get(i))); //stats.updateStatistics(user, mentions, hashtags, epClass+","+snsClass, classDist.get(i)); }/*from w ww . j a v a 2 s . co m*/ }
From source file:soccer.core.MyMatchLoader.java
public void test() throws IOException, Exception { Instances instances = loader.getDataSet(); NumericToNominal nm = new NumericToNominal(); nm.setOptions(new String[] { "-R", "last" }); nm.setInputFormat(instances);/* w ww . java 2s.c om*/ instances = Filter.useFilter(instances, nm); instances.setClassIndex(instances.numAttributes() - 1); System.out.println(instances.toSummaryString()); }
From source file:soccer.core.MyPlayerLoader.java
public void test() throws IOException, Exception { Instances instances = getInstances(); System.out.println(instances.toSummaryString()); }