Example usage for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances()

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.// w  w w  .  j av  a  2  s  .com
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Computes the entropy of a dataset.// ww  w.j a v a  2s  . c om
 *
 * @param data the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 * @throws Exception if computation fails
 */
private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Splits a dataset according to the values of a nominal attribute.
 *
 * @param data the data which is to be split
 * @param att the attribute to be used for splitting
 * @return the sets of instances produced by the split
 *///from ww  w . j  a  v a 2 s.  co m
private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
        splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
        splitData[i].compactify();
    }
    return splitData;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0,
                        minDis);//from w w w. ja  v a2 s.  c  o m
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static void testCOMT2() throws Exception {
    BestConf bestconf = new BestConf();
    Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
    trainingSet.setClassIndex(trainingSet.numAttributes() - 1);

    Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(),
            InitialSampleSetSize, false);
    samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
    samplePoints.setClassIndex(samplePoints.numAttributes() - 1);

    COMT2 comt = new COMT2(samplePoints, COMT2Iteration);

    comt.buildClassifier(trainingSet);//w  ww . ja  v a  2  s.com

    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(comt, trainingSet);
    System.err.println(eval.toSummaryString());

    Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
    Instances bestInstances = new Instances(trainingSet, 2);
    bestInstances.add(best);
    DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);

    //now we output the training set with the class value updated as the predicted value
    Instances output = new Instances(trainingSet, trainingSet.numInstances());
    Enumeration<Instance> enu = trainingSet.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance ins = enu.nextElement();
        double[] values = ins.toDoubleArray();
        values[values.length - 1] = comt.classifyInstance(ins);
        output.add(ins.copy(values));
    }
    DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = -1;
    if (previousSet.attribute(PerformanceAttName) != null)
        pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double[] minDists = new double[2];
    double val;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDists[0] = 1 - Double.MAX_VALUE;
        minDists[1] = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center)) {
                val = ins.value(i) - center.value(i);
                if (val < 0)
                    minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[0]);
                else
                    minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[1]);
            }/*w  ww.  ja  v a  2s. c  om*/
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0];

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                        minDis);//from  w  ww  .j a v  a 2 s .com
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception {
    double retval = 0., y;
    Enumeration<Instance> enu = omega.enumerateInstances();
    int idxClass = omega.classIndex();
    Instance ins;/*w ww. j a  v  a  2 s . co  m*/
    while (enu.hasMoreElements()) {
        ins = enu.nextElement();
        y = ins.value(idxClass);
        retval += Math.pow(y - model.classifyInstance(ins), 2) - Math.pow(y - modelPi.classifyInstance(ins), 2);
    }
    return retval;
}

From source file:com.reactivetechnologies.analytics.core.IncrementalClassifierBean.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    try {/*from  w w  w.  jav a2  s .  c o  m*/
        if (isUpdateable()) {
            UpdateableClassifier u = (UpdateableClassifier) clazzifier;
            for (@SuppressWarnings("unchecked")
            Enumeration<Instance> e = data.enumerateInstances(); e.hasMoreElements();) {
                u.updateClassifier(e.nextElement());
            }
        } else
            clazzifier.buildClassifier(data);

        lastBuildAt = System.currentTimeMillis();
    } finally {

    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Repeat the selection from the database removing duplicates, since they will
 * only increase the execution time. And run the tuples through the classifier
 * to populate the justifiedpartition column.
 * // ww  w  . ja  va  2s. com
 * @param tableProcessed
 * @param classifier
 * @param wa
 * @throws SQLException
 * @throws Exception
 */
public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes,
        Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception {
    if (true) {
        labelTest(tableProcessed, classifier, conn);
        return;
    }

    tableProcessed = removeQuotes(tableProcessed);

    // get from the DB the tuples content and their partitioning column
    String sqlstring = "SELECT distinct g.tupleid, ";
    for (String sc : attributes) {
        sqlstring += "s." + sc + ", ";
    }
    sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable
            + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s "
            + "WHERE s.relcloud_id = g.tupleid;";

    System.out.println(sqlstring);
    Statement stmt = conn.createStatement();

    // initializing the testing table to avoid complaints from classifier with
    // an hash partition like distribution
    if (!testingtable.equals(sampledtrainingtable)) {
        int i = 0;

        Object o = enumclassvalues.nextElement();

        // set everything to an existing value to ensure that every field is
        // covered
        stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '"
                + tableProcessed + "'");
        // and than sparkly in a bunch of other values (unsure whether it is
        // required);
        while (enumclassvalues.hasMoreElements()) {
            o = enumclassvalues.nextElement();

            // FIXME there might still be an issue in which tupleid%i do not exists,
            // and thus one of the "o" never appears in the instance...
            stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%"
                    + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'");
            i++;
        }
    }

    ResultSet res = stmt.executeQuery(sqlstring);
    // create an instance from the resultset
    Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile);
    res.close();

    data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1);
    Instances data_no_tupleid = makeLastNominal(data_tupleid);
    data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1);
    // remove tupleid from data_no_tupleid, still available in data_tupleid
    data_no_tupleid.deleteAttributeAt(0);

    // if(data_no_tupleid.classAttribute().numValues()>1){
    System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol);

    // use data that still has the tupleid and newData for the classification
    Enumeration enum_data_tupleid = data_tupleid.enumerateInstances();
    Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances();

    PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `"
            + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;");

    while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) {

        Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement();
        Instance instance = (Instance) enum_data_no_tupleid.nextElement();

        double part = classifier.classifyInstance(instance);
        if (part == Instance.missingValue())
            System.err.println("No classification for:" + instance.toString());
        updateJustCol.setInt(1, (int) part);
        updateJustCol.setInt(2, (int) tupIDinstance.value(0));

        // System.out.println(tableProcessed+" "+ instance.value(0) + " " +
        // tupIDinstance.classValue() +" "+ part);

        updateJustCol.execute();
        updateJustCol.clearParameters();

    }

    updateJustCol.close();

}