List of usage examples for weka.classifiers.trees J48 toString
@Override
public String toString()
From source file:c4.pkg5crossv.Classifier.java
public static void C45() throws FileNotFoundException, IOException, Exception { Instances data = DataLoad.loadData("./src/data/irysy.arff"); //Ustawienie atrybutu decyzyjnego (ostatni atrybut) data.setClassIndex(data.numAttributes() - 1); //OPCJE://from w w w .j a v a 2s . co m //-U -> budowa drzewa bez przycinania (ostre liscie) //-C -> <wspolczynnik dokladnosci> - ustawienie wspolczynnika dokladnosci dla lisci (default 0.25) //-M -> ustawienie minimalnej liczby obiektow w lisciu dla ktorej lisc nie jest dzielony (default 2) //Ustalenie opcji String[] options = Utils.splitOptions("-U -M 10"); J48 tree = new J48(); tree.setOptions(options); //Ustawienie opcji tree.buildClassifier(data); // Tworzenie klasyfikatora (drzewa) System.out.println(tree.toString()); //Wypisanie drzewa w formie tekstowej System.out.println("TRAIN&TEST"); trainAndTest(); }
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args// w w w .j ava2s . c o m */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * @param args/*from ww w . j a va 2 s. com*/ */ public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST // VARIOUS PARSER FUNCTIONALITIES System.out.println("Loading and processing TPCC traces..."); Connection conn; try { String schemaname = ini.getProperty("schema"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String txnLogTable = ini.getProperty("txnLogTable"); String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`"; ResultSet res = stmt.executeQuery(sqlstring); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); // System.out.println("SQL: " +sql); i++; } double tend = System.currentTimeMillis(); String accessLogTable = ini.getProperty("accessLogTable"); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); for (String str : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str + " s WHERE tableid = \"" + str + "\" AND s.id = g.id"; System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { a0.add(new Double(res.getObject(1).hashCode())); a1.add(new Double(res.getObject(2).hashCode())); } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { tree.buildClassifier(newData); // build classifier } System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n " + tree.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:controller.BothClassificationsServlet.java
@Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); String dir = "/data/"; String path = getServletContext().getRealPath(dir); String action = request.getParameter("action"); switch (action) { case "create": { String fileName = request.getParameter("file"); String aux = fileName.substring(0, fileName.indexOf(".")); String pathInput = path + "/" + request.getParameter("file"); String pathTrainingOutput = path + "/" + aux + "-training-arff.txt"; String pathTestOutput = path + "/" + aux + "-test-arff.txt"; String pathBothClassifications = path + "/" + aux + "-bothClassifications.txt"; String name = request.getParameter("name"); int range = Integer.parseInt(request.getParameter("range")); int size = Integer.parseInt(request.getParameter("counter")); String[] columns = new String[size]; String[] types = new String[size]; int[] positions = new int[size]; int counter = 0; for (int i = 0; i < size; i++) { if (request.getParameter("column-" + (i + 1)) != null) { columns[counter] = request.getParameter("column-" + (i + 1)); types[counter] = request.getParameter("type-" + (i + 1)); positions[counter] = Integer.parseInt(request.getParameter("position-" + (i + 1))); counter++;/* ww w .j a v a2s. co m*/ } } FormatFiles.convertTxtToArff(pathInput, pathTrainingOutput, pathTestOutput, name, columns, types, positions, counter, range); try { J48 j48 = new J48(); BufferedReader readerTraining = new BufferedReader(new FileReader(pathTrainingOutput)); Instances instancesTraining = new Instances(readerTraining); instancesTraining.setClassIndex(instancesTraining.numAttributes() - 1); j48.buildClassifier(instancesTraining); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestOutput)); //BufferedReader readerTest = new BufferedReader(new FileReader(pathTrainingOutput)); Instances instancesTest = new Instances(readerTest); instancesTest.setClassIndex(instancesTest.numAttributes() - 1); int correctsDecisionTree = 0; for (int i = 0; i < instancesTest.size(); i++) { Instance instance = instancesTest.get(i); double correctValue = instance.value(instance.attribute(instancesTest.numAttributes() - 1)); double classification = j48.classifyInstance(instance); if (correctValue == classification) { correctsDecisionTree++; } } Evaluation eval = new Evaluation(instancesTraining); eval.evaluateModel(j48, instancesTest); PrintWriter writer = new PrintWriter( new BufferedWriter(new FileWriter(pathBothClassifications, false))); writer.println("?rvore de Deciso\n\n"); writer.println(j48.toString()); writer.println(""); writer.println(""); writer.println("Results"); writer.println(eval.toSummaryString()); NaiveBayes naiveBayes = new NaiveBayes(); naiveBayes.buildClassifier(instancesTraining); eval = new Evaluation(instancesTraining); eval.evaluateModel(naiveBayes, instancesTest); int correctsNaiveBayes = 0; for (int i = 0; i < instancesTest.size(); i++) { Instance instance = instancesTest.get(i); double correctValue = instance.value(instance.attribute(instancesTest.numAttributes() - 1)); double classification = naiveBayes.classifyInstance(instance); if (correctValue == classification) { correctsNaiveBayes++; } } writer.println("Naive Bayes\n\n"); writer.println(naiveBayes.toString()); writer.println(""); writer.println(""); writer.println("Results"); writer.println(eval.toSummaryString()); writer.close(); response.sendRedirect("BothClassifications?action=view&correctsDecisionTree=" + correctsDecisionTree + "&correctsNaiveBayes=" + correctsNaiveBayes + "&totalTest=" + instancesTest.size() + "&totalTrainig=" + instancesTraining.size() + "&range=" + range + "&fileName=" + aux + "-bothClassifications.txt"); } catch (Exception e) { System.out.println(e.getMessage()); response.sendRedirect("Navigation?action=decisionTree"); } break; } default: response.sendError(404); } }
From source file:controller.DecisionTreeBean.java
public void generateTree(int positiontomine) { System.out.println("Start Generating Tree"); model = new DefaultDiagramModel(); model.setMaxConnections(-1);// w ww . j a v a2s . c o m try { inst.setClassIndex(positiontomine); J48 tree; String[] options = new String[1]; options[0] = "-U"; tree = new J48(); tree.setOptions(options); // set the options tree.buildClassifier(inst); // build classifier dis = inst.toSummaryString(); System.out.println(tree.graph()); System.out.println(tree.toString()); List<String> liNodeStr = new LinkedList<>(); List<String> liConnStr = new LinkedList<>(); liNodeElement = new LinkedList<>(); liConnElement = new LinkedList<>(); BufferedReader br = new BufferedReader(new StringReader(tree.graph())); br.readLine(); String tmp; while ((tmp = br.readLine()) != null) { if (tmp.contains("}")) { break; } else if (tmp.contains("->")) { liConnStr.add(tmp); } else { liNodeStr.add(tmp); } } System.out.println(liConnStr); System.out.println(liNodeStr); for (String s : liNodeStr) { String[] arr = s.split(" "); String entitie1 = arr[0]; arr = s.split("\""); String entitie2 = arr[1]; System.out.println("ID:" + entitie1 + " Name:" + entitie2); TreeElement te = new TreeElement(entitie1, entitie2); liNodeElement.add(te); } for (String s : liConnStr) { String[] arr = s.split(" "); arr = arr[0].split("->"); String from = arr[0]; String to = arr[1]; arr = s.split("\""); String label = arr[1]; System.out.println("From:" + from + " To:" + to + "Label:" + label); TreeConector ce = new TreeConector(from, to, label); liConnElement.add(ce); } //----------------------------------------------------------------------------------------------- for (TreeElement te : liNodeElement) { if (te.getID().equals("N0")) { System.out.println("inside"); genlevel(te, 0); te.setPosition(25); genposition(te, 50); } } for (TreeElement te : liNodeElement) { Element el = new Element(te, te.getPosition() + "em", te.getLevel() * 15 + "em"); el.addEndPoint(new BlankEndPoint(EndPointAnchor.TOP)); el.addEndPoint(new BlankEndPoint(EndPointAnchor.BOTTOM)); model.addElement(el); } List<Element> ellist = model.getElements(); for (TreeConector tc : liConnElement) { Element beginn = null; for (Element e : ellist) { TreeElement t; t = (TreeElement) e.getData(); if (t.getID().equals(tc.getFrom())) { beginn = e; break; } } Element endeee = null; for (Element e : ellist) { TreeElement t; t = (TreeElement) e.getData(); if (t.getID().equals(tc.getTo())) { endeee = e; break; } } StraightConnector connector = new StraightConnector(); connector.setPaintStyle("{strokeStyle:'#F28D2A', lineWidth:3}"); connector.setHoverPaintStyle("{strokeStyle:'#F28D2A'}"); Connection con = new Connection(beginn.getEndPoints().get(1), endeee.getEndPoints().get(0), connector); con.getOverlays().add(new LabelOverlay(tc.getLabel(), "flow-label", 0.5)); model.connect(con); } } catch (Exception ex) { Logger.getLogger(DecisionTreeBean.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:controller.DecisionTreeServlet.java
@Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { request.setCharacterEncoding("UTF-8"); String dir = "/data/"; String path = getServletContext().getRealPath(dir); String action = request.getParameter("action"); switch (action) { case "create": { String fileName = request.getParameter("file"); String aux = fileName.substring(0, fileName.indexOf(".")); String pathInput = path + "/" + request.getParameter("file"); String pathTrainingOutput = path + "/" + aux + "-training-arff.txt"; String pathTestOutput = path + "/" + aux + "-test-arff.txt"; String pathDecisionTree = path + "/" + aux + "-decisionTree.txt"; String name = request.getParameter("name"); int range = Integer.parseInt(request.getParameter("range")); int size = Integer.parseInt(request.getParameter("counter")); String[] columns = new String[size]; String[] types = new String[size]; int[] positions = new int[size]; int counter = 0; for (int i = 0; i < size; i++) { if (request.getParameter("column-" + (i + 1)) != null) { columns[counter] = request.getParameter("column-" + (i + 1)); types[counter] = request.getParameter("type-" + (i + 1)); positions[counter] = Integer.parseInt(request.getParameter("position-" + (i + 1))); counter++;//ww w . j a va 2 s .com } } FormatFiles.convertTxtToArff(pathInput, pathTrainingOutput, pathTestOutput, name, columns, types, positions, counter, range); try { J48 j48 = new J48(); BufferedReader readerTraining = new BufferedReader(new FileReader(pathTrainingOutput)); Instances instancesTraining = new Instances(readerTraining); instancesTraining.setClassIndex(instancesTraining.numAttributes() - 1); j48.buildClassifier(instancesTraining); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestOutput)); //BufferedReader readerTest = new BufferedReader(new FileReader(pathTrainingOutput)); Instances instancesTest = new Instances(readerTest); instancesTest.setClassIndex(instancesTest.numAttributes() - 1); int corrects = 0; int truePositive = 0; int trueNegative = 0; int falsePositive = 0; int falseNegative = 0; for (int i = 0; i < instancesTest.size(); i++) { Instance instance = instancesTest.get(i); double correctValue = instance.value(instance.attribute(instancesTest.numAttributes() - 1)); double classification = j48.classifyInstance(instance); if (correctValue == classification) { corrects++; } if (correctValue == 1 && classification == 1) { truePositive++; } if (correctValue == 1 && classification == 0) { falseNegative++; } if (correctValue == 0 && classification == 1) { falsePositive++; } if (correctValue == 0 && classification == 0) { trueNegative++; } } Evaluation eval = new Evaluation(instancesTraining); eval.evaluateModel(j48, instancesTest); PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(pathDecisionTree, false))); writer.println(j48.toString()); writer.println(""); writer.println(""); writer.println("Results"); writer.println(eval.toSummaryString()); writer.close(); response.sendRedirect("DecisionTree?action=view&corrects=" + corrects + "&totalTest=" + instancesTest.size() + "&totalTrainig=" + instancesTraining.size() + "&truePositive=" + truePositive + "&trueNegative=" + trueNegative + "&falsePositive=" + falsePositive + "&falseNegative=" + falseNegative + "&fileName=" + aux + "-decisionTree.txt"); } catch (Exception e) { System.out.println(e.getMessage()); response.sendRedirect("Navigation?action=decisionTree"); } break; } default: response.sendError(404); } }
From source file:DataMiningLogHistoriKIRI.DecisionTree.java
public String[] id3(Instances arff) { J48 tree = new J48(); try {/*from w w w . j a v a2 s. c om*/ tree.buildClassifier(arff); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } System.out.println(tree.toString()); int nilaiBenar = 0, resultInt; float result = 0; for (int i = 0; i < arff.numInstances(); i++) { try { result = (float) tree.classifyInstance(arff.instance(i)); resultInt = Math.round(result); //System.out.println(dataAfterPreprocessing.get(i)[6] + " " + arff.instance(i).stringValue(6)); if (resultInt == Integer.parseInt(arff.instance(i).stringValue(6))) { nilaiBenar++; } } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("nilai: " + nilaiBenar + " " + arff.numInstances()); double confident = nilaiBenar * 1.0 / arff.numInstances() * 100; System.out.println("Confident = " + confident + "%"); String[] result2 = new String[5]; return result2; }
From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java
License:Apache License
private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet) throws Exception { Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1]; IntStream.range(0, counts.length).forEach(val -> counts[val] = 0); for (Instances traindata : traindataSet) { J48 decisionTree = new J48(); decisionTree.buildClassifier(traindata); int k = 0; for (int j = 0; j < traindata.numAttributes(); j++) { if (j != traindata.classIndex()) { if (decisionTree.toString().contains(traindata.attribute(j).name())) { counts[k] = counts[k] + 1; }/*from w w w . j ava 2 s .com*/ k++; } } } int[] topkIndex = new int[counts.length]; IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val); SortUtils.quicksort(counts, topkIndex, true); // get CFSs for each training set List<Set<Integer>> cfsSets = new LinkedList<>(); for (Instances traindata : traindataSet) { boolean selectionSuccessful = false; boolean secondAttempt = false; Instances traindataCopy = null; do { try { if (secondAttempt) { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindataCopy); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } else { AttributeSelection attsel = new AttributeSelection(); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); attsel.setEvaluator(eval); attsel.setSearch(search); attsel.SelectAttributes(traindata); Set<Integer> cfsSet = new HashSet<>(); for (int attr : attsel.selectedAttributes()) { cfsSet.add(attr); } cfsSets.add(cfsSet); selectionSuccessful = true; } } catch (IllegalArgumentException e) { String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(e.getMessage()); if (!m.find()) { // cannot treat problem, rethrow exception throw e; } String attributeName = m.group(1); int attrIndex = traindata.attribute(attributeName).index(); if (secondAttempt) { traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex); } else { traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex); } Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training"); secondAttempt = true; continue; } } while (!selectionSuccessful); // dummy loop for internal continue } double[] coverages = new double[topkIndex.length]; for (Set<Integer> cfsSet : cfsSets) { Set<Integer> topkSet = new HashSet<>(); for (int k = 0; k < topkIndex.length; k++) { topkSet.add(topkIndex[k]); coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size()); } } double bestCoverageValue = Double.MIN_VALUE; int bestCoverageIndex = 0; for (int i = 0; i < coverages.length; i++) { if (coverages[i] > bestCoverageValue) { bestCoverageValue = coverages[i]; bestCoverageIndex = i; } } // build correlation matrix SpearmansCorrelation corr = new SpearmansCorrelation(); double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex]; for (Instances traindata : traindataSet) { double[][] vectors = new double[bestCoverageIndex][traindata.size()]; for (int i = 0; i < traindata.size(); i++) { for (int j = 0; j < bestCoverageIndex; j++) { vectors[j][i] = traindata.get(i).value(topkIndex[j]); } } for (int j = 0; j < bestCoverageIndex; j++) { for (int k = j + 1; k < bestCoverageIndex; k++) { correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k])); } } } Set<Integer> topkSetIndexSet = new TreeSet<>(); // j<30 ensures that the computational time does not explode since the powerset is 2^n in // complexity for (int j = 0; j < bestCoverageIndex && j < 30; j++) { topkSetIndexSet.add(j); } Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet); double bestOptCoverage = Double.MIN_VALUE; Set<Integer> opttopkSetIndexSet = null; for (Set<Integer> combination : allCombinations) { if (isUncorrelated(correlationMatrix, combination)) { double currentCoverage = 0.0; Set<Integer> topkCombination = new TreeSet<>(); for (Integer index : combination) { topkCombination.add(topkIndex[index]); } for (Set<Integer> cfsSet : cfsSets) { currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size()); } if (currentCoverage > bestOptCoverage) { bestOptCoverage = currentCoverage; opttopkSetIndexSet = combination; } } } Set<Integer> opttopkIndex = new TreeSet<>(); for (Integer index : opttopkSetIndexSet) { opttopkIndex.add(topkIndex[index]); } Console.traceln(Level.FINE, "selected the following metrics:"); for (Integer index : opttopkIndex) { Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name()); } // finally remove attributes for (int j = testdata.numAttributes() - 1; j >= 0; j--) { if (j != testdata.classIndex() && !opttopkIndex.contains(j)) { testdata.deleteAttributeAt(j); for (Instances traindata : traindataSet) { traindata.deleteAttributeAt(j); } } } }
From source file:KFST.featureSelection.embedded.TreeBasedMethods.DecisionTreeBasedMethod.java
License:Open Source License
/** * {@inheritDoc }//from ww w . j a v a 2 s. c o m */ @Override protected String buildClassifier(Instances dataTrain) { try { if (TREE_TYPE == TreeType.C45) { J48 decisionTreeC45 = new J48(); decisionTreeC45.setConfidenceFactor((float) confidenceValue); decisionTreeC45.setMinNumObj(minNumSampleInLeaf); decisionTreeC45.buildClassifier(dataTrain); return decisionTreeC45.toString(); } else if (TREE_TYPE == TreeType.RANDOM_TREE) { RandomTree decisionTreeRandomTree = new RandomTree(); decisionTreeRandomTree.setKValue(randomTreeKValue); decisionTreeRandomTree.setMaxDepth(randomTreeMaxDepth); decisionTreeRandomTree.setMinNum(randomTreeMinNum); decisionTreeRandomTree.setMinVarianceProp(randomTreeMinVarianceProp); decisionTreeRandomTree.buildClassifier(dataTrain); return decisionTreeRandomTree.toString(); } } catch (Exception ex) { Logger.getLogger(DecisionTreeBasedMethod.class.getName()).log(Level.SEVERE, null, ex); } return ""; }
From source file:meddle.TrainModelByDomainOS.java
License:Open Source License
/** * Given a classifier model, print out the tree graph and its html report. * * @param j48//from www. ja v a 2 s . c o m * - the classifier model * @param domainOS * - domain,os name * @param mem * - the measurement results * */ public static void doGraphicOutput(J48 j48, String domainOS, MetaEvaluationMeasures mem) { try { String on = RConfig.dtGraphFolder + domainOS + ".dot"; String png = RConfig.dtGraphFolder + domainOS + ".png"; BufferedWriter bw = new BufferedWriter(new FileWriter(on)); bw.write(j48.graph()); bw.close(); String sum = "<h3>" + mem.info.domain + "(" + mem.info.OS + ")</h3>\n"; sum += "Accuracy: " + mem.accuracy + "<br/> \n"; sum += "AUC: " + mem.AUC + "<br/> \n"; sum += "False Positive Rate: " + mem.falsePositiveRate + "<br/> \n"; sum += "False Negative Rate: " + mem.falseNegativeRate + "<br/> <br/> \n"; sum += "Initial Number of Samples: " + mem.info.initNumTotal + "<br/>\n"; sum += "Initial Number of Positive: " + mem.info.initNumPos + "<br/>\n"; sum += "Initial Number of Negative: " + mem.info.initNumNeg + "<br/><br/>\n"; sum += ">>> After balancing ...<br/> \n"; sum += "Trained Number of Samples: " + mem.numTotal + "<br/>\n"; sum += "Trained Number of Positive: " + mem.numPositive + "<br/>\n"; sum += "Trained Number of Negative: " + mem.numNegative + "<br/></br/>\n"; sum += "<img src='" + domainOS + ".png'/><br/> <br/> \n"; sum += j48.toString().replace("\n", "<br/>"); sum += "Number of Rules: " + j48.measureNumRules() + "<br/>\n"; sum += "<a href='../domain_os/" + mem.info.fileNameRelative + "'>training data</a>"; sum = "<html><body>" + sum; sum += "</body></html>"; String cmd = RConfig.DOT_PATH + " -o " + png + " " + on + " -Tpng"; // http://www.graphviz.org/content/output-formats#ddot // need to install graphviz tool // Mac OX: brew install graphviz // Ubuntu: sudo apt-get install graphviz String html = RConfig.dtGraphFolder + domainOS + ".html"; bw = new BufferedWriter(new FileWriter(html)); bw.write(sum); bw.close(); Runtime.getRuntime().exec(cmd); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }