Example usage for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Recursively generates a tree./*from  www .j  a  v a  2s  .  co m*/
 * 
 * @param data
 *            the data to work with
 * @param classProbs
 *            the class distribution
 * @param header
 *            the header of the data
 * @param minNum
 *            the minimum number of instances per leaf
 * @param debug
 *            whether debugging is on
 * @param attIndicesWindow
 *            the attribute window to choose attributes from
 * @param random
 *            random number generator for choosing random attributes
 * @param depth
 *            the current depth
 * @param determineStructure
 *            whether to determine structure
 * @param m_distributionData
 *            HashMap to put distribution data if getSplitData is true in
 *            any node
 * @throws Exception
 *             if generation fails
 */
protected void buildTree(Instances data, double[] classProbs, Instances header, boolean debug, int depth,
        JsonNode node, int parent_index, HashMap m_distributionData, Instances requiredInstances,
        LinkedHashMap<String, Classifier> custom_classifiers, List<CustomSet> cSList,
        CustomClassifierService ccService, Dataset ds) throws Exception {

    if (mapper == null) {
        mapper = new ObjectMapper();
    }
    // Store structure of dataset, set minimum number of instances
    m_Info = header;
    m_Debug = debug;

    // if in dead json return
    if (node == null) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Make leaf if there are no training instances
    if (data.numInstances() == 0) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Check if node doesn't contain enough instances or is pure
    // or maximum depth reached
    m_ClassDistribution = classProbs.clone();
    cSetList = cSList;
    ccSer = ccService;
    d = ds;

    // if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum
    // || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)],
    // Utils
    // .sum(m_ClassDistribution))
    // || ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) {
    // // Make leaf
    // m_Attribute = -1;
    // m_Prop = null;
    // return;
    // }

    // Investigate the selected attribute
    int attIndex = parent_index;

    // options child added by web client developer
    // TODO work with him to make a more meaningful structure...
    JsonNode options = node.get("options");
    if (options == null) {
        return;
    }
    String kind = options.get("kind").asText();
    JsonNode att_name = options.get("attribute_name");
    Boolean getSplitData = false;
    Boolean getInstanceData = false;
    // this allows me to modify the json tree structure to add data about
    // the evaluation
    ObjectNode evalresults = (ObjectNode) options;
    ObjectNode _node = (ObjectNode) node;
    //For Roc - Node Match
    _node.set("roc_uid_0", null);
    _node.set("roc_uid_1", null);
    Map<String, JsonNode> sons = new HashMap<String, JsonNode>();
    // String name = node_name.asText();
    if (kind != null && kind.equals("split_node") && att_name != null) { //
        // attIndex = data.attribute(node_id.asText()).index();
        if (!att_name.asText().equals("") && !att_name.asText().contains("custom_classifier")
                && !att_name.asText().contains("custom_tree") && !att_name.asText().contains("custom_set")) {
            attIndex = data.attribute(att_name.asText()).index();
        } else {
            if (att_name.asText().contains("custom_set")) {
                int ctr = 0;
                for (CustomSet c : cSList) {
                    if (c.getId() == Long.valueOf(att_name.asText().replace("custom_set_", ""))) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + custom_classifiers.size() + ctr;
            } else {
                if (att_name.asText().contains("custom_classifier_new")) {
                    HashMap mp = ccSer.buildCustomClasifier(data,
                            Long.valueOf(att_name.asText().replace("custom_classifier_new_", "")));
                    Classifier fc = (Classifier) mp.get("classifier");
                    custom_classifiers.put("custom_classifier_" + mp.get("id"), fc);
                    evalresults.put("unique_id", "custom_classifier_" + mp.get("id"));
                    evalresults.put("attribute_name", "custom_classifier_" + mp.get("id"));
                    att_name = evalresults.get("attribute_name");
                }
                int ctr = 0;
                for (String key : custom_classifiers.keySet()) {
                    if (key.equals(att_name.asText())) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + ctr;
            }
        }
        if (node.get("getSplitData") != null) {
            getSplitData = node.get("getSplitData").asBoolean();
        }
        JsonNode split_values = node.get("children");
        int c = 0;
        if (split_values != null && split_values.size() > 0) {
            for (JsonNode svalue : split_values) {
                String key = svalue.get("name").asText();
                JsonNode son = svalue.get("children").get(0);
                if (key.contains("<")) {
                    key = "low";
                } else if (key.contains(">")) {
                    key = "high";
                }
                sons.put(key, son);
                c++;
            }
        }
        // LOGGER.debug("Id name "+att_name+" index "+attIndex+" type "+kind+" sons "+c);
    } else {
        // LOGGER.debug("non split node, name "+att_name+" type "+kind);
    }

    double[] vals = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    double[][][] dists = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0][0];
    double[][] props = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0];
    double[] splits = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    listOfFc = custom_classifiers;
    // Compute class distributions and value of splitting
    // criterion for each attribute
    HashMap<String, Double> mp = new HashMap<String, Double>();
    if (attIndex >= data.numAttributes() && attIndex < data.numAttributes() + custom_classifiers.size()) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else if (attIndex >= data.numAttributes() + custom_classifiers.size() - 1) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else {
        if (options.get("split_point") != null) {
            mp = distribution(props, dists, attIndex, data, options.get("split_point").asDouble(),
                    custom_classifiers);
        } else {
            mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
        }
    }

    splits[attIndex] = mp.get("split_point");
    vals[attIndex] = gain(dists[attIndex], priorVal(dists[attIndex]));

    m_Attribute = attIndex;
    double[][] distribution = dists[m_Attribute];

    // stop if input json tree does not contain any more children
    // replacing Utils.gr(vals[m_Attribute], 0)&&
    if (kind != null && kind.equals("split_node") && att_name != null) {
        //Assign Classes for custom sets(visual splits).
        m_ClassAssignment.put("Inside", Utils.maxIndex(dists[m_Attribute][1]));
        m_ClassAssignment.put("Outside", (Utils.maxIndex(dists[m_Attribute][1]) == 1) ? 0 : 1);
        // Build subtrees
        m_SplitPoint = splits[m_Attribute];
        m_Prop = props[m_Attribute];
        Instances[] subsets = splitData(data);
        m_Successors = new ManualTree[distribution.length];

        // record quantity and quality measures for node
        int quantity = 0;
        for (int i = 0; i < distribution.length; i++) {
            quantity += subsets[i].numInstances();
        }
        evalresults.put("bin_size", quantity);
        evalresults.put("infogain", vals[m_Attribute]);
        evalresults.put("majClass", m_Info.classAttribute().value(Utils.maxIndex(m_ClassDistribution)));
        evalresults.put("split_point", m_SplitPoint);
        evalresults.put("orig_split_point", mp.get("orig_split_point"));

        if (Boolean.TRUE.equals(getSplitData)) {
            addDistributionData(data, m_Attribute, m_distributionData);
        }

        int maxIndex = 0;
        double maxCount = 0;
        double errors = 0;
        double[] classDist = new double[2];
        double pct_correct = 0;
        double bin_size = 0;

        for (int i = 0; i < distribution.length; i++) {
            m_Successors[i] = new ManualTree();
            m_Successors[i].setKValue(m_KValue);
            m_Successors[i].setMaxDepth(getMaxDepth());

            //To compute class distribution for split node.
            for (int j = 0; j < distribution[i].length; j++) {
                classDist[j] += distribution[i][j];
            }
            // test an instance to see which child node to send its subset
            // down.
            // after split, should hold for all in set
            String child_name = "";
            Instances subset = subsets[i];
            if (subset == null || subset.numInstances() == 0) {
                continue;
            }
            Instance inst = subset.instance(0);
            if (m_Attribute >= data.numAttributes()
                    && m_Attribute < data.numAttributes() + custom_classifiers.size()) {
                double predictedClass = custom_classifiers.get(att_name.asText()).classifyInstance(inst);
                child_name = m_Info.classAttribute().value((int) predictedClass);

            } else if (m_Attribute >= data.numAttributes() + custom_classifiers.size() - 1) {
                CustomSet cSet = getReqCustomSet(
                        m_Attribute - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
                JsonNode vertices = mapper.readTree(cSet.getConstraints());
                ArrayList<double[]> attrVertices = generateVerticesList(vertices);
                List<Attribute> aList = generateAttributeList(cSet, data, ds);
                double[] testPoint = new double[2];
                testPoint[0] = inst.value(aList.get(0));
                testPoint[1] = inst.value(aList.get(1));
                int check = checkPointInPolygon(attrVertices, testPoint);
                if (check == 0) {
                    child_name = "Outside";
                } else {
                    child_name = "Inside";
                }
            } else {
                // which nominal attribute is this split linked to?
                if (subset.attribute(m_Attribute).isNominal()) {
                    child_name = inst.attribute(m_Attribute).value((int) inst.value(m_Attribute));
                }
                // otherwise, if we have a numeric attribute, are we going
                // high or low?
                else if (data.attribute(m_Attribute).isNumeric()) {
                    if (inst.value(m_Attribute) < m_SplitPoint) {
                        child_name = "low";
                    } else {
                        child_name = "high";
                    }
                }
            }
            m_Successors[i].setM_ClassAssignment((HashMap<String, Integer>) m_ClassAssignment.clone());
            JsonNode son = sons.get(child_name);
            if (son != null) {
                m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, son,
                        attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService,
                        ds);
            } else {
                // if we are a split node with no input children, we need to
                // add them into the tree
                // JsonNode split_values = node.get("children");
                if (kind != null && kind.equals("split_node")) {
                    ArrayNode children = (ArrayNode) node.get("children");
                    if (children == null) {
                        children = mapper.createArrayNode();
                    }
                    ObjectNode child = mapper.createObjectNode();
                    child.put("name", child_name);
                    ObjectNode c_options = mapper.createObjectNode();
                    c_options.put("attribute_name", child_name);
                    c_options.put("kind", "split_value");
                    child.put("options", c_options);
                    children.add(child);
                    _node.put("children", children);
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, child,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);

                } else {
                    // for leaf nodes, calling again ends the cycle and
                    // fills up the bins appropriately
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, node,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);
                }
            }
        }

        // Compute pct_correct from distributions and send to split_node
        bin_size = Utils.sum(classDist);
        maxIndex = Utils.maxIndex(classDist);
        maxCount = classDist[maxIndex];
        String class_name = m_Info.classAttribute().value(maxIndex);
        _node.put("majClass", class_name);
        errors += bin_size - maxCount;

        pct_correct = (quantity - errors) / quantity;
        evalresults.put("pct_correct", pct_correct);
        // If all successors are non-empty, we don't need to store the class
        // distribution
        boolean emptySuccessor = false;
        for (int i = 0; i < subsets.length; i++) {
            if (m_Successors[i].m_ClassDistribution == null) {
                emptySuccessor = true;
                break;
            }
        }
        if (!emptySuccessor) {
            m_ClassDistribution = null;
        }
    } else {
        m_Attribute = -1;
        if (kind != null && kind.equals("leaf_node")) {
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            if (node.get("pickInst") != null) {
                getInstanceData = node.get("pickInst").asBoolean();
            }
            if (Boolean.TRUE.equals(getInstanceData)) {
                requiredInstances.delete();
                for (int k = 0; k < data.numInstances(); k++) {
                    requiredInstances.add(data.instance(k));
                }
            }
            String class_name = m_Info.classAttribute().value(maxIndex);
            _node.put("majClass", class_name);
            if (node.get("setClass") != null) {
                String setClass = node.get("setClass").asText();
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(setClass));
            }
            _node.put("name", class_name);
            evalresults.put("attribute_name", class_name);
            evalresults.put("kind", "leaf_node");
            evalresults.put("bin_size", Utils.doubleToString(bin_size, 2));
            evalresults.put("errors", Utils.doubleToString(errors, 2));
            evalresults.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            this.setJsonnode(_node);
        } else {
            // Make leaf

            // add the data to the json object
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            ArrayNode children = (ArrayNode) node.get("children");
            if (children == null) {
                children = mapper.createArrayNode();
            }
            ObjectNode child = mapper.createObjectNode();
            String class_name = m_Info.classAttribute().value(maxIndex);
            child.put("majClass", class_name);
            String nodeName = node.get("name").asText();
            if (nodeName.equals("Inside") || nodeName.equals("Outside")) {
                child.put("setClass", nodeName);
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(nodeName));
            }
            child.put("name", class_name);
            ObjectNode c_options = mapper.createObjectNode();
            c_options.put("attribute_name", class_name);
            c_options.put("kind", "leaf_node");
            c_options.put("bin_size", Utils.doubleToString(bin_size, 2));
            c_options.put("errors", Utils.doubleToString(errors, 2));
            c_options.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            child.put("options", c_options);
            children.add(child);
            _node.put("children", children);
            this.setJsonnode(child);
        }
    }
}

From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java

License:Open Source License

public Instances getInstances(Instance inst) {
    Instances insts;//  www . j a v  a  2s  . c om
    FastVector atts = new FastVector();
    for (int i = 0; i < inst.numAttributes(); i++) {
        atts.addElement(inst.attribute(i));
    }
    insts = new Instances("CurrentTrain", atts, 0);
    insts.add(inst);
    insts.setClassIndex(inst.numAttributes() - 1);
    return insts;
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Get Prediction for a given instance based on current model
 *
 * @param instance/*from   www .ja v  a 2  s  . com*/
 */
public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (cell.getValue() instanceof String) {
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
            } else {
                double doubleValue = Double.parseDouble(cell.getValue().toString());
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
            }
        }
    }

    // add new value, check its type from the dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());

    double[] result = getPrediction(wekaInstance);
    // now convert this result into readable form
    ClassificationResult classificationResult = new ClassificationResult(result,
            wekaInstance.attribute(this.numberOfAttributes - 1));
    return classificationResult;
}

From source file:Reader.KnnClassifier.java

/**
 * Looks at the k closest known instances to try and guess which letter is in
 * an image.//from w  w  w.jav a2 s.  com
 * @param instance - The instance to classify
 * @param k - The number of neighbors to look at when determining the class
 * @return A double value representing the letter in the image
 */
public double classifyInstance(Instance instance, int k) {
    int size = trainingData.numInstances();
    int attributes = trainingData.numAttributes() - 1;
    float dist;
    Map<Float, Instance> neighbors = new TreeMap<>();

    Instance test;

    for (int i = 0; i < size; i++) {
        dist = 0;
        test = trainingData.instance(i);

        for (int j = 0; j < attributes; j++) {
            dist += Math.abs(test.value(test.attribute(j)) - instance.value(test.attribute(j)));
        }

        neighbors.put(dist, test);
    }

    return findMostCommon(neighbors, k);
}

From source file:regression.logisticRegression.LogisticRegressionCorrect.java

public void weka(JTextArea output) throws FileNotFoundException, IOException, Exception {
    this.finalPoints = new ArrayList<>();

    BufferedReader reader = new BufferedReader(new FileReader("weka.arff"));
    Instances instances = new Instances(reader);
    instances.setClassIndex(instances.numAttributes() - 1);
    String[] options = new String[4];
    options[0] = "-R";

    options[1] = "1.0E-8";
    options[2] = "-M";
    options[3] = "-1";

    logistic.setOptions(options);/*from  w w  w.ja  va2 s .com*/

    logistic.buildClassifier(instances);

    for (int i = 0; i < instances.numInstances(); i++) {
        weka.core.Instance inst = instances.instance(i);
        Double classifiedClass = 1.0;
        if (logistic.classifyInstance(inst) == 1.0) {
            classifiedClass = 0.0;
        }

        System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass);
        double[] distributions = logistic.distributionForInstance(inst);
        output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: "
                + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n");
        this.finalPoints.add(new Point(inst.value(0), classifiedClass));
        this.finalProbPoints.add(new Point(inst.value(0), distributions[0]));
        for (int j = 0; j < distributions.length; j++) {
            System.out.println("distribution: " + inst.value(0) + "->" + distributions[j]);

        }

    }

    // evaluate classifier and print some statistics
    Evaluation eval = new Evaluation(instances);

    eval.evaluateModel(logistic, instances);
    FastVector pred = eval.predictions();

    for (int i = 0; i < eval.predictions().size(); i++) {

    }
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:regression.logisticRegression.LogisticRegressionCorrect.java

public void singleTest(Instances instances, JTextArea output) throws Exception {

    for (int i = 0; i < instances.numInstances(); i++) {
        weka.core.Instance inst = instances.instance(i);

        Double classifiedClass = 1.0;
        if (logistic.classifyInstance(inst) == 1.0) {
            classifiedClass = 0.0;//w ww.  ja  v a 2  s . co m
        }
        System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass);
        double[] distributions = logistic.distributionForInstance(inst);
        output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: "
                + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n");
    }
}

From source file:script.OperationsHandler.java

private String getCreditStatus(Instance record) {
    return record.attribute(STATUS_ATTRIBUTE_NUMBER).value((int) record.value(STATUS_ATTRIBUTE_NUMBER));
}

From source file:sirius.nnsearcher.main.Constraints.java

License:Open Source License

public boolean isViolated(FastaFormat fastaFormat, weka.core.Instance instance,
        ApplicationData applicationData) {
    double attributeValue;
    if (instance == null) {
        //attribute not found in Instance
        //generate it
        attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData);
    } else {/*from   w  ww  .  ja va 2 s .  co  m*/
        if (this.index == -1)
            //find the index of attribute in instance
            findIndex(instance);
        else if (instance.attribute(this.index).name().compareTo(this.attributeName.name()) != 0)
            findIndex(instance);
        if (this.index == -1)
            //attribute not found in Instance
            //generate it
            attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData);
        else
            attributeValue = instance.value(this.index);
    }

    //Then check if it violate the constraint
    /*
     * 0) >= 
     * 1) >
     * 2) ==
     * 3) !=
     * 4) <=
     * 5) <      
     */
    boolean violated = true;
    switch (operator) {
    case 0:
        if (attributeValue >= this.value)
            violated = false;
        break;
    case 1:
        if (attributeValue > this.value)
            violated = false;
        break;
    case 2:
        if (attributeValue == this.value)
            violated = false;
        break;
    case 3:
        if (attributeValue != this.value)
            violated = false;
        break;
    case 4:
        if (attributeValue <= this.value)
            violated = false;
        break;
    case 5:
        if (attributeValue < this.value)
            violated = false;
        break;
    default:
        throw new Error("Unknown Operator");
    }
    return violated;
}

From source file:swm.project.mappings.OurDistance.java

@Override
public double distance(Instance instnc, Instance instnc1) {

    int num = instnc.numAttributes();
    List<Double> movieClusterRating1 = new ArrayList<Double>(), movieClusterRating2 = new ArrayList<Double>();

    Attribute id = instnc.attribute(0);
    for (int index = 1; index < num; index++) {

    }//from w w  w.j a  va 2s  .  c om
    return 1;
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

protected void selfTrain(Instance testInst) {
    int maxInstances = this.maxInstancesOption.getValue();
    int poolSizeRatio = poolSizeOption.getValue();
    int poolLimit = maxInstances / poolSizeRatio;
    int poolCount = 0;
    VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool();
    noOfClassesInPool = vInstPool.getNoOfClasses();

    System.out.println("No of instances in the pool: " + vInstPool.getSize());
    System.out.println("No of classes in the pool: " + noOfClassesInPool);

    if (vInstPool.getSize() > 10) {
        ArrayList<Attribute> attrs = new ArrayList<Attribute>();
        for (int i = 0; i < testInst.numAttributes(); i++) {
            attrs.add(testInst.attribute(i));
        }//from  w w  w .j  av  a 2 s.  co  m
        Instances instances = new Instances("instances", attrs, vInstPool.getSize());
        Iterator instanceIt = vInstPool.iterator();
        System.out.println("Size of pool: " + vInstPool.getSize());

        while (instanceIt.hasNext() && poolCount < poolLimit) {
            VotedInstance vInstance = (VotedInstance) instanceIt.next();
            ((Instances) instances).add(vInstance.getInstance());
            poolCount++;
        }

        System.out.println("Size of instances: " + instances.size());
        instances = clusterInstances(instances);
        InstanceStream activeStream = new CachedInstancesStream((Instances) instances);

        System.out.println("Selftraining have been started");
        System.out.println("Number of self training instances: " + instances.numInstances());

        long treeSize = vInstPool.getSize();
        long limit = treeSize / SAMPLING_LIMIT;
        Instance inst = null;

        for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) {
            inst = activeStream.nextInstance();
            if (inst.numAttributes() == attrs.size()) {
                model.trainOnInstance(inst);
            }
        }
    }

}