Example usage for weka.core Instance attribute

List of usage examples for weka.core Instance attribute

Introduction

In this page you can find the example usage for weka.core Instance attribute.

Prototype

public Attribute attribute(int index);

Source Link

Document

Returns the attribute with the given index.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Recursively generates a tree./*from  www .j  a  v a  2s  .  co m*/
 * 
 * @param data
 *            the data to work with
 * @param classProbs
 *            the class distribution
 * @param header
 *            the header of the data
 * @param minNum
 *            the minimum number of instances per leaf
 * @param debug
 *            whether debugging is on
 * @param attIndicesWindow
 *            the attribute window to choose attributes from
 * @param random
 *            random number generator for choosing random attributes
 * @param depth
 *            the current depth
 * @param determineStructure
 *            whether to determine structure
 * @param m_distributionData
 *            HashMap to put distribution data if getSplitData is true in
 *            any node
 * @throws Exception
 *             if generation fails
 */
protected void buildTree(Instances data, double[] classProbs, Instances header, boolean debug, int depth,
        JsonNode node, int parent_index, HashMap m_distributionData, Instances requiredInstances,
        LinkedHashMap<String, Classifier> custom_classifiers, List<CustomSet> cSList,
        CustomClassifierService ccService, Dataset ds) throws Exception {

    if (mapper == null) {
        mapper = new ObjectMapper();
    }
    // Store structure of dataset, set minimum number of instances
    m_Info = header;
    m_Debug = debug;

    // if in dead json return
    if (node == null) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Make leaf if there are no training instances
    if (data.numInstances() == 0) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Check if node doesn't contain enough instances or is pure
    // or maximum depth reached
    m_ClassDistribution = classProbs.clone();
    cSetList = cSList;
    ccSer = ccService;
    d = ds;

    // if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum
    // || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)],
    // Utils
    // .sum(m_ClassDistribution))
    // || ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) {
    // // Make leaf
    // m_Attribute = -1;
    // m_Prop = null;
    // return;
    // }

    // Investigate the selected attribute
    int attIndex = parent_index;

    // options child added by web client developer
    // TODO work with him to make a more meaningful structure...
    JsonNode options = node.get("options");
    if (options == null) {
        return;
    }
    String kind = options.get("kind").asText();
    JsonNode att_name = options.get("attribute_name");
    Boolean getSplitData = false;
    Boolean getInstanceData = false;
    // this allows me to modify the json tree structure to add data about
    // the evaluation
    ObjectNode evalresults = (ObjectNode) options;
    ObjectNode _node = (ObjectNode) node;
    //For Roc - Node Match
    _node.set("roc_uid_0", null);
    _node.set("roc_uid_1", null);
    Map<String, JsonNode> sons = new HashMap<String, JsonNode>();
    // String name = node_name.asText();
    if (kind != null && kind.equals("split_node") && att_name != null) { //
        // attIndex = data.attribute(node_id.asText()).index();
        if (!att_name.asText().equals("") && !att_name.asText().contains("custom_classifier")
                && !att_name.asText().contains("custom_tree") && !att_name.asText().contains("custom_set")) {
            attIndex = data.attribute(att_name.asText()).index();
        } else {
            if (att_name.asText().contains("custom_set")) {
                int ctr = 0;
                for (CustomSet c : cSList) {
                    if (c.getId() == Long.valueOf(att_name.asText().replace("custom_set_", ""))) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + custom_classifiers.size() + ctr;
            } else {
                if (att_name.asText().contains("custom_classifier_new")) {
                    HashMap mp = ccSer.buildCustomClasifier(data,
                            Long.valueOf(att_name.asText().replace("custom_classifier_new_", "")));
                    Classifier fc = (Classifier) mp.get("classifier");
                    custom_classifiers.put("custom_classifier_" + mp.get("id"), fc);
                    evalresults.put("unique_id", "custom_classifier_" + mp.get("id"));
                    evalresults.put("attribute_name", "custom_classifier_" + mp.get("id"));
                    att_name = evalresults.get("attribute_name");
                }
                int ctr = 0;
                for (String key : custom_classifiers.keySet()) {
                    if (key.equals(att_name.asText())) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + ctr;
            }
        }
        if (node.get("getSplitData") != null) {
            getSplitData = node.get("getSplitData").asBoolean();
        }
        JsonNode split_values = node.get("children");
        int c = 0;
        if (split_values != null && split_values.size() > 0) {
            for (JsonNode svalue : split_values) {
                String key = svalue.get("name").asText();
                JsonNode son = svalue.get("children").get(0);
                if (key.contains("<")) {
                    key = "low";
                } else if (key.contains(">")) {
                    key = "high";
                }
                sons.put(key, son);
                c++;
            }
        }
        // LOGGER.debug("Id name "+att_name+" index "+attIndex+" type "+kind+" sons "+c);
    } else {
        // LOGGER.debug("non split node, name "+att_name+" type "+kind);
    }

    double[] vals = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    double[][][] dists = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0][0];
    double[][] props = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0];
    double[] splits = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    listOfFc = custom_classifiers;
    // Compute class distributions and value of splitting
    // criterion for each attribute
    HashMap<String, Double> mp = new HashMap<String, Double>();
    if (attIndex >= data.numAttributes() && attIndex < data.numAttributes() + custom_classifiers.size()) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else if (attIndex >= data.numAttributes() + custom_classifiers.size() - 1) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else {
        if (options.get("split_point") != null) {
            mp = distribution(props, dists, attIndex, data, options.get("split_point").asDouble(),
                    custom_classifiers);
        } else {
            mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
        }
    }

    splits[attIndex] = mp.get("split_point");
    vals[attIndex] = gain(dists[attIndex], priorVal(dists[attIndex]));

    m_Attribute = attIndex;
    double[][] distribution = dists[m_Attribute];

    // stop if input json tree does not contain any more children
    // replacing Utils.gr(vals[m_Attribute], 0)&&
    if (kind != null && kind.equals("split_node") && att_name != null) {
        //Assign Classes for custom sets(visual splits).
        m_ClassAssignment.put("Inside", Utils.maxIndex(dists[m_Attribute][1]));
        m_ClassAssignment.put("Outside", (Utils.maxIndex(dists[m_Attribute][1]) == 1) ? 0 : 1);
        // Build subtrees
        m_SplitPoint = splits[m_Attribute];
        m_Prop = props[m_Attribute];
        Instances[] subsets = splitData(data);
        m_Successors = new ManualTree[distribution.length];

        // record quantity and quality measures for node
        int quantity = 0;
        for (int i = 0; i < distribution.length; i++) {
            quantity += subsets[i].numInstances();
        }
        evalresults.put("bin_size", quantity);
        evalresults.put("infogain", vals[m_Attribute]);
        evalresults.put("majClass", m_Info.classAttribute().value(Utils.maxIndex(m_ClassDistribution)));
        evalresults.put("split_point", m_SplitPoint);
        evalresults.put("orig_split_point", mp.get("orig_split_point"));

        if (Boolean.TRUE.equals(getSplitData)) {
            addDistributionData(data, m_Attribute, m_distributionData);
        }

        int maxIndex = 0;
        double maxCount = 0;
        double errors = 0;
        double[] classDist = new double[2];
        double pct_correct = 0;
        double bin_size = 0;

        for (int i = 0; i < distribution.length; i++) {
            m_Successors[i] = new ManualTree();
            m_Successors[i].setKValue(m_KValue);
            m_Successors[i].setMaxDepth(getMaxDepth());

            //To compute class distribution for split node.
            for (int j = 0; j < distribution[i].length; j++) {
                classDist[j] += distribution[i][j];
            }
            // test an instance to see which child node to send its subset
            // down.
            // after split, should hold for all in set
            String child_name = "";
            Instances subset = subsets[i];
            if (subset == null || subset.numInstances() == 0) {
                continue;
            }
            Instance inst = subset.instance(0);
            if (m_Attribute >= data.numAttributes()
                    && m_Attribute < data.numAttributes() + custom_classifiers.size()) {
                double predictedClass = custom_classifiers.get(att_name.asText()).classifyInstance(inst);
                child_name = m_Info.classAttribute().value((int) predictedClass);

            } else if (m_Attribute >= data.numAttributes() + custom_classifiers.size() - 1) {
                CustomSet cSet = getReqCustomSet(
                        m_Attribute - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
                JsonNode vertices = mapper.readTree(cSet.getConstraints());
                ArrayList<double[]> attrVertices = generateVerticesList(vertices);
                List<Attribute> aList = generateAttributeList(cSet, data, ds);
                double[] testPoint = new double[2];
                testPoint[0] = inst.value(aList.get(0));
                testPoint[1] = inst.value(aList.get(1));
                int check = checkPointInPolygon(attrVertices, testPoint);
                if (check == 0) {
                    child_name = "Outside";
                } else {
                    child_name = "Inside";
                }
            } else {
                // which nominal attribute is this split linked to?
                if (subset.attribute(m_Attribute).isNominal()) {
                    child_name = inst.attribute(m_Attribute).value((int) inst.value(m_Attribute));
                }
                // otherwise, if we have a numeric attribute, are we going
                // high or low?
                else if (data.attribute(m_Attribute).isNumeric()) {
                    if (inst.value(m_Attribute) < m_SplitPoint) {
                        child_name = "low";
                    } else {
                        child_name = "high";
                    }
                }
            }
            m_Successors[i].setM_ClassAssignment((HashMap<String, Integer>) m_ClassAssignment.clone());
            JsonNode son = sons.get(child_name);
            if (son != null) {
                m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, son,
                        attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService,
                        ds);
            } else {
                // if we are a split node with no input children, we need to
                // add them into the tree
                // JsonNode split_values = node.get("children");
                if (kind != null && kind.equals("split_node")) {
                    ArrayNode children = (ArrayNode) node.get("children");
                    if (children == null) {
                        children = mapper.createArrayNode();
                    }
                    ObjectNode child = mapper.createObjectNode();
                    child.put("name", child_name);
                    ObjectNode c_options = mapper.createObjectNode();
                    c_options.put("attribute_name", child_name);
                    c_options.put("kind", "split_value");
                    child.put("options", c_options);
                    children.add(child);
                    _node.put("children", children);
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, child,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);

                } else {
                    // for leaf nodes, calling again ends the cycle and
                    // fills up the bins appropriately
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, node,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);
                }
            }
        }

        // Compute pct_correct from distributions and send to split_node
        bin_size = Utils.sum(classDist);
        maxIndex = Utils.maxIndex(classDist);
        maxCount = classDist[maxIndex];
        String class_name = m_Info.classAttribute().value(maxIndex);
        _node.put("majClass", class_name);
        errors += bin_size - maxCount;

        pct_correct = (quantity - errors) / quantity;
        evalresults.put("pct_correct", pct_correct);
        // If all successors are non-empty, we don't need to store the class
        // distribution
        boolean emptySuccessor = false;
        for (int i = 0; i < subsets.length; i++) {
            if (m_Successors[i].m_ClassDistribution == null) {
                emptySuccessor = true;
                break;
            }
        }
        if (!emptySuccessor) {
            m_ClassDistribution = null;
        }
    } else {
        m_Attribute = -1;
        if (kind != null && kind.equals("leaf_node")) {
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            if (node.get("pickInst") != null) {
                getInstanceData = node.get("pickInst").asBoolean();
            }
            if (Boolean.TRUE.equals(getInstanceData)) {
                requiredInstances.delete();
                for (int k = 0; k < data.numInstances(); k++) {
                    requiredInstances.add(data.instance(k));
                }
            }
            String class_name = m_Info.classAttribute().value(maxIndex);
            _node.put("majClass", class_name);
            if (node.get("setClass") != null) {
                String setClass = node.get("setClass").asText();
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(setClass));
            }
            _node.put("name", class_name);
            evalresults.put("attribute_name", class_name);
            evalresults.put("kind", "leaf_node");
            evalresults.put("bin_size", Utils.doubleToString(bin_size, 2));
            evalresults.put("errors", Utils.doubleToString(errors, 2));
            evalresults.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            this.setJsonnode(_node);
        } else {
            // Make leaf

            // add the data to the json object
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            ArrayNode children = (ArrayNode) node.get("children");
            if (children == null) {
                children = mapper.createArrayNode();
            }
            ObjectNode child = mapper.createObjectNode();
            String class_name = m_Info.classAttribute().value(maxIndex);
            child.put("majClass", class_name);
            String nodeName = node.get("name").asText();
            if (nodeName.equals("Inside") || nodeName.equals("Outside")) {
                child.put("setClass", nodeName);
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(nodeName));
            }
            child.put("name", class_name);
            ObjectNode c_options = mapper.createObjectNode();
            c_options.put("attribute_name", class_name);
            c_options.put("kind", "leaf_node");
            c_options.put("bin_size", Utils.doubleToString(bin_size, 2));
            c_options.put("errors", Utils.doubleToString(errors, 2));
            c_options.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            child.put("options", c_options);
            children.add(child);
            _node.put("children", children);
            this.setJsonnode(child);
        }
    }
}

From source file:org.ssase.debt.classification.OnlineMultilayerPerceptron.java

License:Open Source License

public Instances getInstances(Instance inst) {
    Instances insts;//  www . j a v  a  2s  . c om
    FastVector atts = new FastVector();
    for (int i = 0; i < inst.numAttributes(); i++) {
        atts.addElement(inst.attribute(i));
    }
    insts = new Instances("CurrentTrain", atts, 0);
    insts.add(inst);
    insts.setClassIndex(inst.numAttributes() - 1);
    return insts;
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Get Prediction for a given instance based on current model
 *
 * @param instance/*from   www .ja v  a 2  s  . com*/
 */
public ClassificationResult getPrediction(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (cell.getValue() instanceof String) {
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
            } else {
                double doubleValue = Double.parseDouble(cell.getValue().toString());
                wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
            }
        }
    }

    // add new value, check its type from the dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());

    double[] result = getPrediction(wekaInstance);
    // now convert this result into readable form
    ClassificationResult classificationResult = new ClassificationResult(result,
            wekaInstance.attribute(this.numberOfAttributes - 1));
    return classificationResult;
}

From source file:Reader.KnnClassifier.java

/**
 * Looks at the k closest known instances to try and guess which letter is in
 * an image.//from w  w  w.jav a2 s.  com
 * @param instance - The instance to classify
 * @param k - The number of neighbors to look at when determining the class
 * @return A double value representing the letter in the image
 */
public double classifyInstance(Instance instance, int k) {
    int size = trainingData.numInstances();
    int attributes = trainingData.numAttributes() - 1;
    float dist;
    Map<Float, Instance> neighbors = new TreeMap<>();

    Instance test;

    for (int i = 0; i < size; i++) {
        dist = 0;
        test = trainingData.instance(i);

        for (int j = 0; j < attributes; j++) {
            dist += Math.abs(test.value(test.attribute(j)) - instance.value(test.attribute(j)));
        }

        neighbors.put(dist, test);
    }

    return findMostCommon(neighbors, k);
}

From source file:regression.logisticRegression.LogisticRegressionCorrect.java

public void weka(JTextArea output) throws FileNotFoundException, IOException, Exception {
    this.finalPoints = new ArrayList<>();

    BufferedReader reader = new BufferedReader(new FileReader("weka.arff"));
    Instances instances = new Instances(reader);
    instances.setClassIndex(instances.numAttributes() - 1);
    String[] options = new String[4];
    options[0] = "-R";

    options[1] = "1.0E-8";
    options[2] = "-M";
    options[3] = "-1";

    logistic.setOptions(options);/*from  w w  w.ja  va2 s .com*/

    logistic.buildClassifier(instances);

    for (int i = 0; i < instances.numInstances(); i++) {
        weka.core.Instance inst = instances.instance(i);
        Double classifiedClass = 1.0;
        if (logistic.classifyInstance(inst) == 1.0) {
            classifiedClass = 0.0;
        }

        System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass);
        double[] distributions = logistic.distributionForInstance(inst);
        output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: "
                + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n");
        this.finalPoints.add(new Point(inst.value(0), classifiedClass));
        this.finalProbPoints.add(new Point(inst.value(0), distributions[0]));
        for (int j = 0; j < distributions.length; j++) {
            System.out.println("distribution: " + inst.value(0) + "->" + distributions[j]);

        }

    }

    // evaluate classifier and print some statistics
    Evaluation eval = new Evaluation(instances);

    eval.evaluateModel(logistic, instances);
    FastVector pred = eval.predictions();

    for (int i = 0; i < eval.predictions().size(); i++) {

    }
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
}

From source file:regression.logisticRegression.LogisticRegressionCorrect.java

public void singleTest(Instances instances, JTextArea output) throws Exception {

    for (int i = 0; i < instances.numInstances(); i++) {
        weka.core.Instance inst = instances.instance(i);

        Double classifiedClass = 1.0;
        if (logistic.classifyInstance(inst) == 1.0) {
            classifiedClass = 0.0;//w ww.  ja  v a 2  s . co m
        }
        System.out.println("classify: " + inst.attribute(0) + "|" + inst.value(0) + "->" + classifiedClass);
        double[] distributions = logistic.distributionForInstance(inst);
        output.append("Dla x= " + inst.value(0) + " prawdopodobiestwo wystpnienia zdarzenia wynosi: "
                + distributions[0] + " zatem naley on do klasy: " + classifiedClass + "\n");
    }
}

From source file:script.OperationsHandler.java

private String getCreditStatus(Instance record) {
    return record.attribute(STATUS_ATTRIBUTE_NUMBER).value((int) record.value(STATUS_ATTRIBUTE_NUMBER));
}

From source file:sirius.nnsearcher.main.Constraints.java

License:Open Source License

public boolean isViolated(FastaFormat fastaFormat, weka.core.Instance instance,
        ApplicationData applicationData) {
    double attributeValue;
    if (instance == null) {
        //attribute not found in Instance
        //generate it
        attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData);
    } else {/*from   w  ww  .  ja va 2 s .  co  m*/
        if (this.index == -1)
            //find the index of attribute in instance
            findIndex(instance);
        else if (instance.attribute(this.index).name().compareTo(this.attributeName.name()) != 0)
            findIndex(instance);
        if (this.index == -1)
            //attribute not found in Instance
            //generate it
            attributeValue = GenerateFeatures.getValue(fastaFormat, this.featureData, applicationData);
        else
            attributeValue = instance.value(this.index);
    }

    //Then check if it violate the constraint
    /*
     * 0) >= 
     * 1) >
     * 2) ==
     * 3) !=
     * 4) <=
     * 5) <      
     */
    boolean violated = true;
    switch (operator) {
    case 0:
        if (attributeValue >= this.value)
            violated = false;
        break;
    case 1:
        if (attributeValue > this.value)
            violated = false;
        break;
    case 2:
        if (attributeValue == this.value)
            violated = false;
        break;
    case 3:
        if (attributeValue != this.value)
            violated = false;
        break;
    case 4:
        if (attributeValue <= this.value)
            violated = false;
        break;
    case 5:
        if (attributeValue < this.value)
            violated = false;
        break;
    default:
        throw new Error("Unknown Operator");
    }
    return violated;
}

From source file:swm.project.mappings.OurDistance.java

@Override
public double distance(Instance instnc, Instance instnc1) {

    int num = instnc.numAttributes();
    List<Double> movieClusterRating1 = new ArrayList<Double>(), movieClusterRating2 = new ArrayList<Double>();

    Attribute id = instnc.attribute(0);
    for (int index = 1; index < num; index++) {

    }//from w w  w.j a  va 2s  .  c om
    return 1;
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

protected void selfTrain(Instance testInst) {
    int maxInstances = this.maxInstancesOption.getValue();
    int poolSizeRatio = poolSizeOption.getValue();
    int poolLimit = maxInstances / poolSizeRatio;
    int poolCount = 0;
    VotedInstancePool vInstPool = SelfOzaBoostID.getVotedInstancePool();
    noOfClassesInPool = vInstPool.getNoOfClasses();

    System.out.println("No of instances in the pool: " + vInstPool.getSize());
    System.out.println("No of classes in the pool: " + noOfClassesInPool);

    if (vInstPool.getSize() > 10) {
        ArrayList<Attribute> attrs = new ArrayList<Attribute>();
        for (int i = 0; i < testInst.numAttributes(); i++) {
            attrs.add(testInst.attribute(i));
        }//from  w w  w .j  av  a 2 s.  co  m
        Instances instances = new Instances("instances", attrs, vInstPool.getSize());
        Iterator instanceIt = vInstPool.iterator();
        System.out.println("Size of pool: " + vInstPool.getSize());

        while (instanceIt.hasNext() && poolCount < poolLimit) {
            VotedInstance vInstance = (VotedInstance) instanceIt.next();
            ((Instances) instances).add(vInstance.getInstance());
            poolCount++;
        }

        System.out.println("Size of instances: " + instances.size());
        instances = clusterInstances(instances);
        InstanceStream activeStream = new CachedInstancesStream((Instances) instances);

        System.out.println("Selftraining have been started");
        System.out.println("Number of self training instances: " + instances.numInstances());

        long treeSize = vInstPool.getSize();
        long limit = treeSize / SAMPLING_LIMIT;
        Instance inst = null;

        for (long j = 0; j < limit && activeStream.hasMoreInstances(); j++) {
            inst = activeStream.nextInstance();
            if (inst.numAttributes() == attrs.size()) {
                model.trainOnInstance(inst);
            }
        }
    }

}