List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:com.reactivetechnologies.analytics.core.eval.StackingWithBuiltClassifiers.java
License:Open Source License
/** * Generates the meta data/*from www . j a v a2 s . c o m*/ * * @param newData the data to work on * @param random the random number generator to use for cross-validation * @throws Exception if generation fails */ @Override protected void generateMetaLevel(Instances newData, Random random) throws Exception { Instances metaData = metaFormat(newData); m_MetaFormat = new Instances(metaData, 0); for (int j = 0; j < m_NumFolds; j++) { /** Changed here */ //Instances train = newData.trainCV(m_NumFolds, j, random); // DO NOT Build base classifiers /*for (int i = 0; i < m_Classifiers.length; i++) { getClassifier(i).buildClassifier(train); }*/ /** End change */ // Classify test instances and add to meta data Instances test = newData.testCV(m_NumFolds, j); for (int i = 0; i < test.numInstances(); i++) { metaData.add(metaInstance(test.instance(i))); } } m_MetaClassifier.buildClassifier(metaData); }
From source file:com.relationalcloud.main.Explanation.java
License:Open Source License
/** * @param args/*from w w w . j a v a 2s .co m*/ */ public static void main(String[] args) { // LOADING PROPERTY FILE AND DRIVER Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // LOAD PROPERTIES FROM CONFIGURATION FILE String connection = ini.getProperty("conn"); String schemaname = ini.getProperty("schema"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); String txnLogTable = ini.getProperty("txnLogTable"); String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates"); int numPart = Integer.parseInt(ini.getProperty("numPartitions")); // Initialize the Justification Handler ExplanationHandler jh = new ExplanationHandler(ini); System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :" + jh.dbPropertyFile); try { // CREATE A DB CONNEctioN Connection conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable, numb_trans_to_process, schemaname, conn, schema); // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN for (String tableProcessed : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + tableProcessed); // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn); // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE if (data.numAttributes() < 2) { System.out.println("No transactions touches this table, nothing to be done."); continue; } // INSTANTIATE THE CLASSIFIER String[] options; options = new String[3]; options[0] = "-P"; options[1] = "-C"; options[2] = ini.getProperty("Explanation.j48PruningConfidence"); J48 classifier = new J48(); // new instance of tree classifier.setOptions(options); // set the options Boolean attributeFilter = true; // ATTRIBUTE FILTERING Instances newData; if (data.numClasses() > 1 && attributeFilter) { AttributeSelection filter = new AttributeSelection(); //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES //InfoGainAttributeEval eval = new InfoGainAttributeEval(); //Ranker search = new Ranker(); //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2"))); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); newData = Filter.useFilter(data, filter); } else { newData = data; } String atts = ""; Enumeration e = newData.enumerateAttributes(); ArrayList<String> attributesForPopulation = new ArrayList<String>(); while (e.hasMoreElements()) { String s = ((Attribute) e.nextElement()).name(); attributesForPopulation.add(s); atts += s + ", "; } atts = atts.substring(0, atts.length() - 2); System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to " + (newData.numAttributes() - 1) + " (" + atts + ")"); data = null; System.gc(); if (newData.numInstances() < 1) { System.err.println("The are no data in the table, skipping classification"); continue; } if (newData.numInstances() > 0) { if (newData.classAttribute().numValues() > 1) { // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES ExplanationHandler.trainClassifier(newData, classifier); if (classifier.measureNumLeaves() == 1) { int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance()); System.out.println( "The classifier decided to put all the tuplesi in the table in one partition: " + partitionvalue); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } // POPULATING THE justifiedpartition column with the result of this // classifier if required else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn, numPart, newData.classAttribute().enumerateValues()); } } else { // easy case... the class attribute is unary!! int partitionvalue = ((int) newData.firstInstance() .value(newData.firstInstance().classIndex())); System.out.println("The table is all stored in one partition, no need to use classifier"); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } } else throw new Exception("The Instances is empty"); } // SET HASH PARTITION / REPLICATED PARTITION if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) { jh.populateHashPartition(conn); } if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) { jh.populateReplicatedPartition(conn, Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate"))); } conn.close(); } catch (SQLException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Train the given classifier//from ww w. j a v a 2 s. c om * * @param newData * @param classifier * @throws Exception */ public static void trainClassifier(Instances newData, Classifier classifier) throws Exception { // if the class attributed is not unary we proceed regularly // verify the Classifier can handle this dataset classifier.getCapabilities().testWithFail(newData); System.out.println("BUILDING CLASSIFIER ON INSTANCE:" + newData.toSummaryString()); long treeTstart = System.currentTimeMillis(); classifier.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFIER BUILDING TIME: " + (treeTend - treeTstart) + "ms FROM: " + newData.numInstances() + " instances \n" + classifier.toString()); }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Applies sample training to reduce the input Instances to a specified size. * //from www . j a va 2s. com * @param classifier_sampling_threshold * @param newData1 * @return * @throws Exception */ public static Instances sampleTraining(Double classifier_sampling_threshold, Instances newData1) throws Exception { String[] options; Instances newData; if (newData1.numInstances() > classifier_sampling_threshold) { double percentage = (double) 100 * ((double) classifier_sampling_threshold) / ((double) newData1.numInstances()); Resample r = new Resample(); options = new String[4]; options[0] = "-C"; options[1] = "last"; options[2] = "-Z"; options[3] = "" + percentage; r.setOptions(options); r.setInputFormat(newData1); newData = Filter.useFilter(newData1, r); } else { newData = newData1; } return newData; }
From source file:com.sliit.normalize.NormalizeDataset.java
public String normalizeDataset() { System.out.println("start normalizing data"); String filePathOut = ""; try {// w w w.ja va 2 s .c om CSVLoader loader = new CSVLoader(); if (reducedDiemensionFile != null) { loader.setSource(reducedDiemensionFile); } else { if (tempFIle != null && tempFIle.exists()) { loader.setSource(tempFIle); } else { loader.setSource(csvFile); } } Instances dataInstance = loader.getDataSet(); Normalize normalize = new Normalize(); dataInstance.setClassIndex(dataInstance.numAttributes() - 1); normalize.setInputFormat(dataInstance); String directory = csvFile.getParent(); outputFile = new File(directory + "/" + "normalized" + csvFile.getName()); if (!outputFile.exists()) { outputFile.createNewFile(); } CSVSaver saver = new CSVSaver(); saver.setFile(outputFile); for (int i = 1; i < dataInstance.numInstances(); i++) { normalize.input(dataInstance.instance(i)); } normalize.batchFinished(); Instances outPut = new Instances(dataInstance, 0); for (int i = 1; i < dataInstance.numInstances(); i++) { outPut.add(normalize.output()); } Attribute attribute = dataInstance.attribute(outPut.numAttributes() - 1); for (int j = 0; j < attribute.numValues(); j++) { if (attribute.value(j).equals("normal.")) { outPut.renameAttributeValue(attribute, attribute.value(j), "0"); } else { outPut.renameAttributeValue(attribute, attribute.value(j), "1"); } } saver.setInstances(outPut); saver.writeBatch(); writeToNewFile(directory); filePathOut = directory + "norm" + csvFile.getName(); if (tempFIle != null) { tempFIle.delete(); } if (reducedDiemensionFile != null) { reducedDiemensionFile.delete(); } outputFile.delete(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } catch (Exception e) { log.error("Error occurred:" + e.getMessage()); } return filePathOut; }
From source file:com.sliit.normalize.NormalizeDataset.java
public boolean updateStringValues(Map<Integer, String> values) { System.out.println("updating String Values"); boolean status = false; try {// w w w .j a v a 2 s . c o m csv.setSource(csvFile); Instances dataInstance = csv.getDataSet(); for (int i = 0; i < dataInstance.numInstances(); i++) { if (values.containsKey(i)) { Attribute attribute = dataInstance.attribute(i); for (int j = 0; j < attribute.numValues(); j++) { dataInstance.renameAttributeValue(attribute, attribute.value(j), j + ""); } } } tempFIle = new File(csvFile.getParent() + "/temp.csv"); CSVSaver saver = new CSVSaver(); saver.setInstances(dataInstance); saver.setFile(tempFIle); saver.writeBatch(); } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } return status; }
From source file:com.sliit.normalize.NormalizeDataset.java
public int whiteningData() { System.out.println("whiteningData"); int nums = 0; try {//from www . j ava 2s . co m if (tempFIle != null && tempFIle.exists()) { csv.setSource(tempFIle); } else { csv.setSource(csvFile); } Instances instances = csv.getDataSet(); if (instances.numAttributes() > 10) { instances.setClassIndex(instances.numAttributes() - 1); RandomProjection random = new RandomProjection(); random.setDistribution( new SelectedTag(RandomProjection.GAUSSIAN, RandomProjection.TAGS_DSTRS_TYPE)); reducedDiemensionFile = new File(csvFile.getParent() + "/tempwhite.csv"); if (!reducedDiemensionFile.exists()) { reducedDiemensionFile.createNewFile(); } // CSVSaver saver = new CSVSaver(); /// saver.setFile(reducedDiemensionFile); random.setInputFormat(instances); //saver.setRetrieval(AbstractSaver.INCREMENTAL); BufferedWriter writer = new BufferedWriter(new FileWriter(reducedDiemensionFile)); for (int i = 0; i < instances.numInstances(); i++) { random.input(instances.instance(i)); random.setNumberOfAttributes(10); random.setReplaceMissingValues(true); writer.write(random.output().toString()); writer.newLine(); //saver.writeIncremental(random.output()); } writer.flush(); writer.close(); nums = random.getNumberOfAttributes(); } else { nums = instances.numAttributes(); } } catch (IOException e) { log.error("Error occurred:" + e.getMessage()); } catch (Exception e) { log.error("Error occurred:" + e.getMessage()); } return nums; }
From source file:com.sliit.views.DataVisualizerPanel.java
void getRocCurve() { try {/* www. j av a2 s .c o m*/ Instances data; data = new Instances(new BufferedReader(new FileReader(datasetPathText.getText()))); data.setClassIndex(data.numAttributes() - 1); // train classifier Classifier cl = new NaiveBayes(); Evaluation eval = new Evaluation(data); eval.crossValidateModel(cl, data, 10, new Random(1)); // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); // plot curve ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")"); vmc.setName(result.relationName()); PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.addInstanceNumberAttribute(); // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) { cp[n] = true; } tempd.setConnectPoints(cp); // add plot vmc.addPlot(tempd); // display curve String plotName = vmc.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Classifier Visualize: " + plotName); jf.setSize(500, 400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vmc, BorderLayout.CENTER); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); } catch (IOException ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.sliit.views.KNNView.java
void getRocCurve() { try {/*from w w w . j a va 2s .co m*/ Instances data; data = new Instances(new BufferedReader(new java.io.FileReader(PredictorPanel.modalText.getText()))); data.setClassIndex(data.numAttributes() - 1); // train classifier Classifier cl = new NaiveBayes(); Evaluation eval = new Evaluation(data); eval.crossValidateModel(cl, data, 10, new Random(1)); // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); // plot curve ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")"); vmc.setName(result.relationName()); PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.addInstanceNumberAttribute(); // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) { cp[n] = true; } tempd.setConnectPoints(cp); // add plot vmc.addPlot(tempd); rocPanel.removeAll(); rocPanel.add(vmc, "vmc", 0); rocPanel.revalidate(); } catch (IOException ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.sliit.views.SVMView.java
/** * draw ROC curve/*from w w w .j ava 2 s. c o m*/ */ void getRocCurve() { try { Instances data; data = new Instances(new BufferedReader(new FileReader(PredictorPanel.modalText.getText()))); data.setClassIndex(data.numAttributes() - 1); //train classifier Classifier cl = new NaiveBayes(); Evaluation eval = new Evaluation(data); eval.crossValidateModel(cl, data, 10, new Random(1)); // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); // plot curve ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); vmc.setROCString("(Area under ROC = " + Utils.doubleToString(tc.getROCArea(result), 4) + ")"); vmc.setName(result.relationName()); PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.addInstanceNumberAttribute(); // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) { cp[n] = true; } tempd.setConnectPoints(cp); // add plot vmc.addPlot(tempd); // rocPanel.removeAll(); // rocPanel.add(vmc, "vmc", 0); // rocPanel.revalidate(); } catch (IOException ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(DataVisualizerPanel.class.getName()).log(Level.SEVERE, null, ex); } }