List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:cerebro.Id3.java
License:Open Source License
/** * Method for building an Id3 tree.// w w w . j av a 2 s .com * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new Id3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new Id3(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:cerebro.Id3.java
License:Open Source License
/** * Computes the entropy of a dataset.// ww w.j a v a 2s . c om * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
From source file:cerebro.Id3.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *///from ww w . j a v a 2 s. co m private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0, minDis);//from w w w. ja v a2 s. c o m } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static void testCOMT2() throws Exception { BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff"); trainingSet.setClassIndex(trainingSet.numAttributes() - 1); Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false); samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes()); samplePoints.setClassIndex(samplePoints.numAttributes() - 1); COMT2 comt = new COMT2(samplePoints, COMT2Iteration); comt.buildClassifier(trainingSet);//w ww . ja v a 2 s.com Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(comt, trainingSet); System.err.println(eval.toSummaryString()); Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance()); Instances bestInstances = new Instances(trainingSet, 2); bestInstances.add(best); DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances); //now we output the training set with the class value updated as the predicted value Instances output = new Instances(trainingSet, trainingSet.numInstances()); Enumeration<Instance> enu = trainingSet.enumerateInstances(); while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); double[] values = ins.toDoubleArray(); values[values.length - 1] = comt.classifyInstance(ins); output.add(ins.copy(values)); } DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output); }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = -1; if (previousSet.attribute(PerformanceAttName) != null) pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double[] minDists = new double[2]; double val; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDists[0] = 1 - Double.MAX_VALUE; minDists[1] = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) { val = ins.value(i) - center.value(i); if (val < 0) minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[0]); else minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[1]); }/*w ww. ja v a 2s. c om*/ } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0]; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDis);//from w ww .j a v a 2 s .com } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java
License:Open Source License
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception { double retval = 0., y; Enumeration<Instance> enu = omega.enumerateInstances(); int idxClass = omega.classIndex(); Instance ins;/*w ww. j a v a 2 s . co m*/ while (enu.hasMoreElements()) { ins = enu.nextElement(); y = ins.value(idxClass); retval += Math.pow(y - model.classifyInstance(ins), 2) - Math.pow(y - modelPi.classifyInstance(ins), 2); } return retval; }
From source file:com.reactivetechnologies.analytics.core.IncrementalClassifierBean.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { try {/*from w w w. jav a2 s . c o m*/ if (isUpdateable()) { UpdateableClassifier u = (UpdateableClassifier) clazzifier; for (@SuppressWarnings("unchecked") Enumeration<Instance> e = data.enumerateInstances(); e.hasMoreElements();) { u.updateClassifier(e.nextElement()); } } else clazzifier.buildClassifier(data); lastBuildAt = System.currentTimeMillis(); } finally { } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Repeat the selection from the database removing duplicates, since they will * only increase the execution time. And run the tuples through the classifier * to populate the justifiedpartition column. * // ww w . ja va 2s. com * @param tableProcessed * @param classifier * @param wa * @throws SQLException * @throws Exception */ public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes, Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception { if (true) { labelTest(tableProcessed, classifier, conn); return; } tableProcessed = removeQuotes(tableProcessed); // get from the DB the tuples content and their partitioning column String sqlstring = "SELECT distinct g.tupleid, "; for (String sc : attributes) { sqlstring += "s." + sc + ", "; } sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s " + "WHERE s.relcloud_id = g.tupleid;"; System.out.println(sqlstring); Statement stmt = conn.createStatement(); // initializing the testing table to avoid complaints from classifier with // an hash partition like distribution if (!testingtable.equals(sampledtrainingtable)) { int i = 0; Object o = enumclassvalues.nextElement(); // set everything to an existing value to ensure that every field is // covered stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '" + tableProcessed + "'"); // and than sparkly in a bunch of other values (unsure whether it is // required); while (enumclassvalues.hasMoreElements()) { o = enumclassvalues.nextElement(); // FIXME there might still be an issue in which tupleid%i do not exists, // and thus one of the "o" never appears in the instance... stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%" + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'"); i++; } } ResultSet res = stmt.executeQuery(sqlstring); // create an instance from the resultset Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile); res.close(); data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1); Instances data_no_tupleid = makeLastNominal(data_tupleid); data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1); // remove tupleid from data_no_tupleid, still available in data_tupleid data_no_tupleid.deleteAttributeAt(0); // if(data_no_tupleid.classAttribute().numValues()>1){ System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol); // use data that still has the tupleid and newData for the classification Enumeration enum_data_tupleid = data_tupleid.enumerateInstances(); Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances(); PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `" + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;"); while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) { Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement(); Instance instance = (Instance) enum_data_no_tupleid.nextElement(); double part = classifier.classifyInstance(instance); if (part == Instance.missingValue()) System.err.println("No classification for:" + instance.toString()); updateJustCol.setInt(1, (int) part); updateJustCol.setInt(2, (int) tupIDinstance.value(0)); // System.out.println(tableProcessed+" "+ instance.value(0) + " " + // tupIDinstance.classValue() +" "+ part); updateJustCol.execute(); updateJustCol.clearParameters(); } updateJustCol.close(); }