List of usage examples for weka.classifiers.trees J48 J48
J48
From source file:c4.pkg5crossv.Classifier.java
public static void C45() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); //Ustawienie atrybutu decyzyjnego (ostatni atrybut) data.setClassIndex(data.numAttributes() - 1); //OPCJE:/* w w w. j a va 2s . com*/ //-U -> budowa drzewa bez przycinania (ostre liscie) //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25) //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2) //Ustalenie opcji String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options); //Ustawienie opcji tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa) System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej System.out.println("TRAIN&TEST"); trainAndTest(); }
From source file:c4.pkg5crossv.Classifier.java
public static void trainAndTest() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); data.setClassIndex(data.numAttributes() - 1); //Losowy podzial tablicy data.randomize(new Random()); double percent = 60.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options);// www. j av a 2 s . c o m tree.buildClassifier(trainData); Evaluation eval2 = new Evaluation(trainData); eval2.crossValidateModel(tree, testData, 10, new Random(1)); // 5 - fold System.out.println(eval2.toSummaryString("Wyniki:", false)); //Wypisanie testovania cross validation }
From source file:ca.uottawa.balie.SentenceBoundariesRecognition.java
License:Open Source License
private WekaLearner TrainModel() { ArrayList<String> alSentence = null; try {//from w w w. j ava 2s .c o m alSentence = ReadCorpus(Balie.SBR_TRAINING_CORPUS_PC); } catch (Exception e) { throw new Error("SBD Training corpus was not found"); } ArrayList<TokenList> alTokenLists = GetTokenLists(alSentence); // Let's prepare the classifier WekaAttribute[] wekaAttributes = new WekaAttribute[NUM_FEATURES]; FastVector attrVal = new FastVector(NUM_ATTRIBUTES); attrVal.addElement(VAL_PERIOD); attrVal.addElement(VAL_PERIOD_LIKE); attrVal.addElement(VAL_OPEN_BRACKET); attrVal.addElement(VAL_CLOSE_BRACKET); attrVal.addElement(VAL_QUOTE); attrVal.addElement(VAL_PUNCT); attrVal.addElement(VAL_NEW_LINE); attrVal.addElement(VAL_LINE_FEED); attrVal.addElement(VAL_LF_IN_CAP); attrVal.addElement(VAL_NL_IN_CAP); attrVal.addElement(VAL_CAPITAL); attrVal.addElement(VAL_DIGIT); attrVal.addElement(VAL_ABBREVIATION); attrVal.addElement(VAL_OTHER); attrVal.addElement(VAL_NULL); wekaAttributes[0] = new WekaAttribute("SentenceBeginning", attrVal); wekaAttributes[1] = new WekaAttribute("LastToken", attrVal); wekaAttributes[2] = new WekaAttribute("Last2CurrentSpace"); wekaAttributes[3] = new WekaAttribute("CurrentToken", attrVal); wekaAttributes[4] = new WekaAttribute("Current2NextSpace"); wekaAttributes[5] = new WekaAttribute("NextToken", attrVal); String[] strClass = new String[] { IS_SENTENCE_BOUNDARY, IS_NOT_SENTENCE_BOUNDARY }; WekaLearner wl = new WekaLearner(wekaAttributes, strClass); // Let's create an attribute for each token transition. for (int i = 0; i != alTokenLists.size(); ++i) { TokenList alCurrentTokenList = (TokenList) alTokenLists.get(i); TokenList alNextTokenList = null; if (i != alTokenLists.size() - 1) { alNextTokenList = (TokenList) alTokenLists.get(i + 1); } // Describe current attribute for (int j = 0; j != alCurrentTokenList.Size(); ++j) { Object[] strInstance = new Object[NUM_FEATURES]; boolean bTrivialInstance = DescribeTrainTestInstance(alCurrentTokenList, alNextTokenList, j, strInstance); String curClass = IS_NOT_SENTENCE_BOUNDARY; if (j == alCurrentTokenList.Size() - 1) { curClass = IS_SENTENCE_BOUNDARY; YieldSentenceBeginning(); } // Do not add trivial examples if (!bTrivialInstance) { wl.AddTrainInstance(strInstance, curClass); } } } J48 j48 = new J48(); wl.CreateModel(j48); return wl; }
From source file:clasificador.Perceptron.java
public void J48() { try {/*from w w w.j a v a2s . c o m*/ //INSTANCIAS PARA ENTRENAMIENTO DEL CLASIFICADOR ConverterUtils.DataSource converU = new ConverterUtils.DataSource( "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro.arff"); Instances instancias = converU.getDataSet(); instancias.setClassIndex(instancias.numAttributes() - 1); //INSTANCIAS PARA TEST DEL MODELO ConverterUtils.DataSource convertest = new ConverterUtils.DataSource( "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro5.arff"); Instances testInstance = convertest.getDataSet(); testInstance.setClassIndex(testInstance.numAttributes() - 1); //INSTANCIAS PARA PREDICCIN ConverterUtils.DataSource converPredict = new ConverterUtils.DataSource( "C:\\Users\\Kathy\\Documents\\tutorial perl\\libro1.arff"); Instances predictInstance = converPredict.getDataSet(); predictInstance.setClassIndex(predictInstance.numAttributes() - 1); //CONTRUCCIN DEL CLASIFICADOR J48 perceptron = new J48(); perceptron.buildClassifier(instancias); //Evaluar las instancias Evaluation ev = new Evaluation(instancias); //EVALUAR MODELO DE ENTRENAMIENTO ev.evaluateModel(perceptron, instancias); //System.out.println(instancias); System.out.println("\n\nENTRENAMIENTO DEL MODELO ?RBOL DE DECISIN J48\n\n"); System.out.println(ev.toSummaryString("_____RESULTADO_____", true)); System.out.println(ev.toMatrixString("_____Matriz confusion___")); //PREDECIR CON EL MODELO Evaluation evPredict = new Evaluation(instancias); evPredict.evaluateModel(perceptron, predictInstance); //System.out.println(instancias); System.out.println("\n\nPREDICCIN DEL MODELO ?RBOL DE DECISIN J48\n\n"); System.out.println(evPredict.toSummaryString("_____RESULTADO_____", false)); System.out.println(evPredict.toMatrixString("_____Matriz confusion___")); //MOSTRAR VALORES for (int i = 0; i < evPredict.evaluateModel(perceptron, predictInstance).length; i++) { resultado = evPredict.evaluateModel(perceptron, predictInstance)[i]; polaridad += polaridad(resultado) + "\n"; //System.out.println("Se clasifica como: "+resultado + "que es: " + polaridad(resultado)); } archivoResultados(polaridad); //TEST DEL MODELO CON LOS DATOS DEL CLASIFICADOR Evaluation evtesting = new Evaluation(instancias); evtesting.evaluateModel(perceptron, testInstance); //System.out.println(instancias); System.out.println("\n\nTEST DEL MODELO ?RBOL DE DECISIN J48\n\n"); System.out.println(evtesting.toSummaryString("_____RESULTADO_____", false)); System.out.println(evtesting.toMatrixString("_____Matriz confusion___")); } catch (Exception ex) { Logger.getLogger(Perceptron.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:classif.ExperimentsLauncher.java
License:Open Source License
public void launchJ48() { try {//from w w w .j a v a 2s.co m String algo = "J48"; System.out.println(algo); double testError = 0.0; J48 dt = new J48(); dt.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(dt, test); testError = eval.errorRate(); System.out.println("TestError:" + testError + "\n"); System.out.println(dt.toSummaryString()); System.out.println(dt.graph()); System.out.println(eval.toSummaryString()); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];//from w w w .j av a2 s.c o m String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:com.github.fracpete.multisearch.optimize.J48ConfidenceFactor.java
License:Open Source License
/** * The first parameter must be dataset,//w ww .j a v a2 s . c o m * the (optional) second the class index (1-based, 'first' and 'last' * also supported). * * @param args the commandline options * @throws Exception if optimization fails for some reason */ public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("\nUsage: J48ConfidenceFactor <dataset> [classindex]\n"); System.exit(1); } // load data Instances data = ExampleHelper.loadData(args[0], (args.length > 1) ? args[1] : null); // configure classifier we want to optimize J48 j48 = new J48(); // configure multisearch MathParameter conf = new MathParameter(); conf.setProperty("confidenceFactor"); conf.setBase(10); conf.setMin(0.05); conf.setMax(0.75); conf.setStep(0.05); conf.setExpression("I"); MultiSearch multi = new MultiSearch(); multi.setClassifier(j48); multi.setSearchParameters(new AbstractParameter[] { conf }); SelectedTag tag = new SelectedTag(DefaultEvaluationMetrics.EVALUATION_AUC, new DefaultEvaluationMetrics().getTags()); multi.setEvaluation(tag); // output configuration System.out.println("\nMultiSearch commandline:\n" + Utils.toCommandLine(multi)); // optimize System.out.println("\nOptimizing...\n"); multi.buildClassifier(data); System.out.println("Best setup:\n" + Utils.toCommandLine(multi.getBestClassifier())); System.out.println("Best parameter: " + multi.getGenerator().evaluate(multi.getBestValues())); }
From source file:com.github.fracpete.multisearch.setupgenerator.J48ConfidenceFactor.java
License:Open Source License
/** * Outputs the commandlines.//w ww . j a va 2s .c o m * * @param args the commandline options * @throws Exception if setup generator fails for some reason */ public static void main(String[] args) throws Exception { // configure classifier we want to generate setups for J48 j48 = new J48(); // configure generator MathParameter conf = new MathParameter(); conf.setProperty("confidenceFactor"); conf.setBase(10); conf.setMin(0.05); conf.setMax(0.75); conf.setStep(0.05); conf.setExpression("I"); MultiSearch multi = new MultiSearch(); multi.setClassifier(j48); SetupGenerator generator = new SetupGenerator(); generator.setBaseObject(j48); generator.setParameters(new AbstractParameter[] { conf }); // output configuration System.out.println("\nSetupgenerator commandline:\n" + Utils.toCommandLine(generator)); // output commandlines System.out.println("\nCommandlines:\n"); Enumeration<Serializable> enm = generator.setups(); while (enm.hasMoreElements()) System.out.println(Utils.toCommandLine(enm.nextElement())); }
From source file:com.relationalcloud.main.Explanation.java
License:Open Source License
/** * @param args//from w ww . java 2 s .co m */ public static void main(String[] args) { // LOADING PROPERTY FILE AND DRIVER Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // LOAD PROPERTIES FROM CONFIGURATION FILE String connection = ini.getProperty("conn"); String schemaname = ini.getProperty("schema"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); String txnLogTable = ini.getProperty("txnLogTable"); String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates"); int numPart = Integer.parseInt(ini.getProperty("numPartitions")); // Initialize the Justification Handler ExplanationHandler jh = new ExplanationHandler(ini); System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :" + jh.dbPropertyFile); try { // CREATE A DB CONNEctioN Connection conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable, numb_trans_to_process, schemaname, conn, schema); // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN for (String tableProcessed : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + tableProcessed); // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn); // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE if (data.numAttributes() < 2) { System.out.println("No transactions touches this table, nothing to be done."); continue; } // INSTANTIATE THE CLASSIFIER String[] options; options = new String[3]; options[0] = "-P"; options[1] = "-C"; options[2] = ini.getProperty("Explanation.j48PruningConfidence"); J48 classifier = new J48(); // new instance of tree classifier.setOptions(options); // set the options Boolean attributeFilter = true; // ATTRIBUTE FILTERING Instances newData; if (data.numClasses() > 1 && attributeFilter) { AttributeSelection filter = new AttributeSelection(); //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES //InfoGainAttributeEval eval = new InfoGainAttributeEval(); //Ranker search = new Ranker(); //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2"))); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); newData = Filter.useFilter(data, filter); } else { newData = data; } String atts = ""; Enumeration e = newData.enumerateAttributes(); ArrayList<String> attributesForPopulation = new ArrayList<String>(); while (e.hasMoreElements()) { String s = ((Attribute) e.nextElement()).name(); attributesForPopulation.add(s); atts += s + ", "; } atts = atts.substring(0, atts.length() - 2); System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to " + (newData.numAttributes() - 1) + " (" + atts + ")"); data = null; System.gc(); if (newData.numInstances() < 1) { System.err.println("The are no data in the table, skipping classification"); continue; } if (newData.numInstances() > 0) { if (newData.classAttribute().numValues() > 1) { // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES ExplanationHandler.trainClassifier(newData, classifier); if (classifier.measureNumLeaves() == 1) { int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance()); System.out.println( "The classifier decided to put all the tuplesi in the table in one partition: " + partitionvalue); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } // POPULATING THE justifiedpartition column with the result of this // classifier if required else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn, numPart, newData.classAttribute().enumerateValues()); } } else { // easy case... the class attribute is unary!! int partitionvalue = ((int) newData.firstInstance() .value(newData.firstInstance().classIndex())); System.out.println("The table is all stored in one partition, no need to use classifier"); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } } else throw new Exception("The Instances is empty"); } // SET HASH PARTITION / REPLICATED PARTITION if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) { jh.populateHashPartition(conn); } if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) { jh.populateReplicatedPartition(conn, Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate"))); } conn.close(); } catch (SQLException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args/*from w ww .j a v a2 s . c o m*/ */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }