Example usage for weka.core Instances delete

List of usage examples for weka.core Instances delete

Introduction

In this page you can find the example usage for weka.core Instances delete.

Prototype

public void delete() 

Source Link

Document

Removes all instances from the set.

Usage

From source file:org.hypknowsys.wumprep.WUMprepWrapper.java

License:Open Source License

/**
 * Creates a dummy dataset from the input format, sends it to the script and
 * reads the script output's ARFF information that in turn is used to set
 * <code>this</code>' output format.
 * /* ww w  .  j a  va2s  .c o  m*/
 * This mechanism allows a WUMprep script to alter the recordset layout as
 * long as this change is documented by the output ARFF header. For example,
 * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field
 * to <code>host_dns</code> when performing IP lookups.
 * 
 * @param instanceInfo
 *          The input format.
 * @return Object containing the output instance structure.
 */
public Instances getScriptOutputFormat(Instances instanceInfo) {
    Instances outputFormat = instanceInfo;
    Instances testData = new Instances(instanceInfo);
    Instance testInstance = new Instance(testData.numAttributes());

    testData.delete();
    testInstance.setDataset(testData);

    // Initialize the testInstance's attribute values
    for (int i = 0; i < testInstance.numAttributes(); i++) {
        String aName = testInstance.attribute(i).name();
        if (aName.equals("host_ip"))
            testInstance.setValue(i, "127.0.0.1");
        else if (aName.equals("ts_day"))
            testInstance.setValue(i, "01");
        else if (aName.equals("ts_month"))
            testInstance.setValue(i, "Jan");
        else if (aName.equals("ts_year"))
            testInstance.setValue(i, "2005");
        else if (aName.equals("ts_hour"))
            testInstance.setValue(i, "11");
        else if (aName.equals("ts_minutes"))
            testInstance.setValue(i, "55");
        else if (aName.equals("ts_seconds"))
            testInstance.setValue(i, "00");
        else if (aName.equals("tz"))
            testInstance.setValue(i, "+0200");
        else
            testInstance.setValue(i, aName + "-dummy");
    }

    testData.add(testInstance);

    WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args);
    testWrapper.start();
    testWrapper.push(testData.toString());
    testWrapper.push((Instance) null);

    class ErrorReader extends Thread implements Serializable {
        /**  */
        private static final long serialVersionUID = -488779846603045891L;
        PipedReader m_input = null;

        /**
         * Helper class for reading stderr output from the WUMprep script
         * 
         * @param input The script's wrapper's stderr pipe reader
         */
        ErrorReader(PipedReader input) {
            m_input = input;
            this.start();
        }

        public void run() {
            try {
                while (m_input.read() >= 0)
                    ;
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    // read the stderr output
    new ErrorReader(testWrapper.getErrorPipe());

    try {
        // ignore the stderr output
        outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe());

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    return outputFormat;
}

From source file:org.opentox.jaqpot3.qsar.trainer.FastRbfNnTrainer.java

License:Open Source License

@Override
public Model train(Instances training) throws JaqpotException {
    /*/*from   ww w .j  a va  2 s. c o m*/
     * For this algorithm we need to remove all string and nominal attributes
     * and additionally we will remove the target attribute too.
     */

    Instances cleanedTraining = training;

    Attribute targetAttribute = cleanedTraining.attribute(targetUri.toString());
    if (targetAttribute == null) {
        throw new JaqpotException("The prediction feature you provided was not found in the dataset. "
                + "Prediction Feature provided by the client: " + targetUri.toString());
    } else {
        if (!targetAttribute.isNumeric()) {
            throw new JaqpotException("The prediction feature you provided is not numeric.");
        }
    }
    double[] targetValues = new double[cleanedTraining.numInstances()];
    for (int i = 0; i < cleanedTraining.numInstances(); i++) {
        targetValues[i] = cleanedTraining.instance(i).value(targetAttribute);
    }
    cleanedTraining.deleteAttributeAt(targetAttribute.index());

    Instances rbfNnNodes = new Instances(cleanedTraining);
    rbfNnNodes.delete();
    double[] potential = calculatePotential(cleanedTraining);

    int L = 1;
    int i_star = locationOfMax(potential);
    double potential_star = potential[i_star];
    double potential_star_1 = potential_star;
    do {
        rbfNnNodes.add(cleanedTraining.instance(i_star));
        potential = updatePotential(potential, i_star, cleanedTraining);
        i_star = locationOfMax(potential);
        double diff = potential[i_star] - e * potential_star_1;
        if (Double.isNaN(diff)) {
            throw new JaqpotException("Not converging");
        }
        if (potential[i_star] <= e * potential_star_1) {
            break;
        } else {
            L = L + 1;
            potential_star = potential[i_star];
        }
    } while (true);

    /* P-nearest neighbors */
    double[] pNn = null;
    double[] sigma = new double[rbfNnNodes.numInstances()];
    double s = 0;
    for (int i = 0; i < rbfNnNodes.numInstances(); i++) {
        pNn = new double[cleanedTraining.numInstances()];
        s = 0;
        for (int j = 0; j < cleanedTraining.numInstances(); j++) {
            if (j != i) {
                pNn[j] = squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(j));
            } else {
                pNn[j] = 0;
            }
        }
        int[] minPoints = locationOfpMinimum(p, pNn); // indices refer to 'cleanedTraining'
        for (int q : minPoints) {
            s += squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(q));
        }
        sigma[i] = Math.sqrt(s / p);
    }

    /* Caclulate the matrix X = (l_{i,j})_{i,j} */
    double[][] X = new double[cleanedTraining.numInstances()][rbfNnNodes.numInstances()];
    for (int i = 0; i < cleanedTraining.numInstances(); i++) {

        //for DoA
        for (int j = 0; j < rbfNnNodes.numInstances(); j++) {
            X[i][j] = rbf(sigma[j], cleanedTraining.instance(i), rbfNnNodes.instance(j));
        }
    }

    Jama.Matrix X_matr = new Matrix(X);
    Jama.Matrix Y_matr = new Matrix(targetValues, targetValues.length);
    Jama.Matrix coeffs = (X_matr.transpose().times(X_matr)).inverse().times(X_matr.transpose()).times(Y_matr);

    FastRbfNnModel actualModel = new FastRbfNnModel();
    actualModel.setAlpha(a);
    actualModel.setBeta(b);
    actualModel.setEpsilon(e);
    actualModel.setNodes(rbfNnNodes);
    actualModel.setSigma(sigma);
    actualModel.setLrCoefficients(coeffs.getColumnPackedCopy());

    Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));
    m.setAlgorithm(getAlgorithm());
    m.setCreatedBy(getTask().getCreatedBy());
    m.setDataset(datasetUri);
    m.addDependentFeatures(dependentFeature);

    Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
            "Created as prediction feature for the RBF NN model " + m.getUri(), datasetUri, featureService);
    m.addPredictedFeatures(predictedFeature);

    m.setIndependentFeatures(independentFeatures);
    try {
        m.setActualModel(new ActualModel(actualModel));
    } catch (NotSerializableException ex) {
        logger.error("The provided instance of model cannot be serialized! Critical Error!", ex);
    }
    m.setParameters(new HashSet<Parameter>());
    Parameter<Double> aParam = new Parameter("a", new LiteralValue<Double>(a))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    aParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
    Parameter<Double> bParam = new Parameter("b", new LiteralValue<Double>(b))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    bParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
    Parameter<Double> eParam = new Parameter("e", new LiteralValue<Double>(e))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    eParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));

    m.getParameters().add(aParam);
    m.getParameters().add(bParam);
    m.getParameters().add(eParam);

    //save the instances being predicted to abstract trainer and set the features to be excluded for calculating DoA
    predictedInstances = training;
    excludeAttributesDoA.add(dependentFeature.getUri().toString());

    return m;
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Recursively generates a tree.//from ww w  .  j  av a  2 s .  co  m
 * 
 * @param data
 *            the data to work with
 * @param classProbs
 *            the class distribution
 * @param header
 *            the header of the data
 * @param minNum
 *            the minimum number of instances per leaf
 * @param debug
 *            whether debugging is on
 * @param attIndicesWindow
 *            the attribute window to choose attributes from
 * @param random
 *            random number generator for choosing random attributes
 * @param depth
 *            the current depth
 * @param determineStructure
 *            whether to determine structure
 * @param m_distributionData
 *            HashMap to put distribution data if getSplitData is true in
 *            any node
 * @throws Exception
 *             if generation fails
 */
protected void buildTree(Instances data, double[] classProbs, Instances header, boolean debug, int depth,
        JsonNode node, int parent_index, HashMap m_distributionData, Instances requiredInstances,
        LinkedHashMap<String, Classifier> custom_classifiers, List<CustomSet> cSList,
        CustomClassifierService ccService, Dataset ds) throws Exception {

    if (mapper == null) {
        mapper = new ObjectMapper();
    }
    // Store structure of dataset, set minimum number of instances
    m_Info = header;
    m_Debug = debug;

    // if in dead json return
    if (node == null) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Make leaf if there are no training instances
    if (data.numInstances() == 0) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Check if node doesn't contain enough instances or is pure
    // or maximum depth reached
    m_ClassDistribution = classProbs.clone();
    cSetList = cSList;
    ccSer = ccService;
    d = ds;

    // if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum
    // || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)],
    // Utils
    // .sum(m_ClassDistribution))
    // || ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) {
    // // Make leaf
    // m_Attribute = -1;
    // m_Prop = null;
    // return;
    // }

    // Investigate the selected attribute
    int attIndex = parent_index;

    // options child added by web client developer
    // TODO work with him to make a more meaningful structure...
    JsonNode options = node.get("options");
    if (options == null) {
        return;
    }
    String kind = options.get("kind").asText();
    JsonNode att_name = options.get("attribute_name");
    Boolean getSplitData = false;
    Boolean getInstanceData = false;
    // this allows me to modify the json tree structure to add data about
    // the evaluation
    ObjectNode evalresults = (ObjectNode) options;
    ObjectNode _node = (ObjectNode) node;
    //For Roc - Node Match
    _node.set("roc_uid_0", null);
    _node.set("roc_uid_1", null);
    Map<String, JsonNode> sons = new HashMap<String, JsonNode>();
    // String name = node_name.asText();
    if (kind != null && kind.equals("split_node") && att_name != null) { //
        // attIndex = data.attribute(node_id.asText()).index();
        if (!att_name.asText().equals("") && !att_name.asText().contains("custom_classifier")
                && !att_name.asText().contains("custom_tree") && !att_name.asText().contains("custom_set")) {
            attIndex = data.attribute(att_name.asText()).index();
        } else {
            if (att_name.asText().contains("custom_set")) {
                int ctr = 0;
                for (CustomSet c : cSList) {
                    if (c.getId() == Long.valueOf(att_name.asText().replace("custom_set_", ""))) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + custom_classifiers.size() + ctr;
            } else {
                if (att_name.asText().contains("custom_classifier_new")) {
                    HashMap mp = ccSer.buildCustomClasifier(data,
                            Long.valueOf(att_name.asText().replace("custom_classifier_new_", "")));
                    Classifier fc = (Classifier) mp.get("classifier");
                    custom_classifiers.put("custom_classifier_" + mp.get("id"), fc);
                    evalresults.put("unique_id", "custom_classifier_" + mp.get("id"));
                    evalresults.put("attribute_name", "custom_classifier_" + mp.get("id"));
                    att_name = evalresults.get("attribute_name");
                }
                int ctr = 0;
                for (String key : custom_classifiers.keySet()) {
                    if (key.equals(att_name.asText())) {
                        break;
                    }
                    ctr++;
                }
                attIndex = (data.numAttributes() - 1) + ctr;
            }
        }
        if (node.get("getSplitData") != null) {
            getSplitData = node.get("getSplitData").asBoolean();
        }
        JsonNode split_values = node.get("children");
        int c = 0;
        if (split_values != null && split_values.size() > 0) {
            for (JsonNode svalue : split_values) {
                String key = svalue.get("name").asText();
                JsonNode son = svalue.get("children").get(0);
                if (key.contains("<")) {
                    key = "low";
                } else if (key.contains(">")) {
                    key = "high";
                }
                sons.put(key, son);
                c++;
            }
        }
        // LOGGER.debug("Id name "+att_name+" index "+attIndex+" type "+kind+" sons "+c);
    } else {
        // LOGGER.debug("non split node, name "+att_name+" type "+kind);
    }

    double[] vals = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    double[][][] dists = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0][0];
    double[][] props = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()][0];
    double[] splits = new double[data.numAttributes() + custom_classifiers.size() + cSetList.size()];
    listOfFc = custom_classifiers;
    // Compute class distributions and value of splitting
    // criterion for each attribute
    HashMap<String, Double> mp = new HashMap<String, Double>();
    if (attIndex >= data.numAttributes() && attIndex < data.numAttributes() + custom_classifiers.size()) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else if (attIndex >= data.numAttributes() + custom_classifiers.size() - 1) {
        mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
    } else {
        if (options.get("split_point") != null) {
            mp = distribution(props, dists, attIndex, data, options.get("split_point").asDouble(),
                    custom_classifiers);
        } else {
            mp = distribution(props, dists, attIndex, data, Double.NaN, custom_classifiers);
        }
    }

    splits[attIndex] = mp.get("split_point");
    vals[attIndex] = gain(dists[attIndex], priorVal(dists[attIndex]));

    m_Attribute = attIndex;
    double[][] distribution = dists[m_Attribute];

    // stop if input json tree does not contain any more children
    // replacing Utils.gr(vals[m_Attribute], 0)&&
    if (kind != null && kind.equals("split_node") && att_name != null) {
        //Assign Classes for custom sets(visual splits).
        m_ClassAssignment.put("Inside", Utils.maxIndex(dists[m_Attribute][1]));
        m_ClassAssignment.put("Outside", (Utils.maxIndex(dists[m_Attribute][1]) == 1) ? 0 : 1);
        // Build subtrees
        m_SplitPoint = splits[m_Attribute];
        m_Prop = props[m_Attribute];
        Instances[] subsets = splitData(data);
        m_Successors = new ManualTree[distribution.length];

        // record quantity and quality measures for node
        int quantity = 0;
        for (int i = 0; i < distribution.length; i++) {
            quantity += subsets[i].numInstances();
        }
        evalresults.put("bin_size", quantity);
        evalresults.put("infogain", vals[m_Attribute]);
        evalresults.put("majClass", m_Info.classAttribute().value(Utils.maxIndex(m_ClassDistribution)));
        evalresults.put("split_point", m_SplitPoint);
        evalresults.put("orig_split_point", mp.get("orig_split_point"));

        if (Boolean.TRUE.equals(getSplitData)) {
            addDistributionData(data, m_Attribute, m_distributionData);
        }

        int maxIndex = 0;
        double maxCount = 0;
        double errors = 0;
        double[] classDist = new double[2];
        double pct_correct = 0;
        double bin_size = 0;

        for (int i = 0; i < distribution.length; i++) {
            m_Successors[i] = new ManualTree();
            m_Successors[i].setKValue(m_KValue);
            m_Successors[i].setMaxDepth(getMaxDepth());

            //To compute class distribution for split node.
            for (int j = 0; j < distribution[i].length; j++) {
                classDist[j] += distribution[i][j];
            }
            // test an instance to see which child node to send its subset
            // down.
            // after split, should hold for all in set
            String child_name = "";
            Instances subset = subsets[i];
            if (subset == null || subset.numInstances() == 0) {
                continue;
            }
            Instance inst = subset.instance(0);
            if (m_Attribute >= data.numAttributes()
                    && m_Attribute < data.numAttributes() + custom_classifiers.size()) {
                double predictedClass = custom_classifiers.get(att_name.asText()).classifyInstance(inst);
                child_name = m_Info.classAttribute().value((int) predictedClass);

            } else if (m_Attribute >= data.numAttributes() + custom_classifiers.size() - 1) {
                CustomSet cSet = getReqCustomSet(
                        m_Attribute - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
                JsonNode vertices = mapper.readTree(cSet.getConstraints());
                ArrayList<double[]> attrVertices = generateVerticesList(vertices);
                List<Attribute> aList = generateAttributeList(cSet, data, ds);
                double[] testPoint = new double[2];
                testPoint[0] = inst.value(aList.get(0));
                testPoint[1] = inst.value(aList.get(1));
                int check = checkPointInPolygon(attrVertices, testPoint);
                if (check == 0) {
                    child_name = "Outside";
                } else {
                    child_name = "Inside";
                }
            } else {
                // which nominal attribute is this split linked to?
                if (subset.attribute(m_Attribute).isNominal()) {
                    child_name = inst.attribute(m_Attribute).value((int) inst.value(m_Attribute));
                }
                // otherwise, if we have a numeric attribute, are we going
                // high or low?
                else if (data.attribute(m_Attribute).isNumeric()) {
                    if (inst.value(m_Attribute) < m_SplitPoint) {
                        child_name = "low";
                    } else {
                        child_name = "high";
                    }
                }
            }
            m_Successors[i].setM_ClassAssignment((HashMap<String, Integer>) m_ClassAssignment.clone());
            JsonNode son = sons.get(child_name);
            if (son != null) {
                m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, son,
                        attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList, ccService,
                        ds);
            } else {
                // if we are a split node with no input children, we need to
                // add them into the tree
                // JsonNode split_values = node.get("children");
                if (kind != null && kind.equals("split_node")) {
                    ArrayNode children = (ArrayNode) node.get("children");
                    if (children == null) {
                        children = mapper.createArrayNode();
                    }
                    ObjectNode child = mapper.createObjectNode();
                    child.put("name", child_name);
                    ObjectNode c_options = mapper.createObjectNode();
                    c_options.put("attribute_name", child_name);
                    c_options.put("kind", "split_value");
                    child.put("options", c_options);
                    children.add(child);
                    _node.put("children", children);
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, child,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);

                } else {
                    // for leaf nodes, calling again ends the cycle and
                    // fills up the bins appropriately
                    m_Successors[i].buildTree(subsets[i], distribution[i], header, m_Debug, depth + 1, node,
                            attIndex, m_distributionData, requiredInstances, custom_classifiers, cSList,
                            ccService, ds);
                }
            }
        }

        // Compute pct_correct from distributions and send to split_node
        bin_size = Utils.sum(classDist);
        maxIndex = Utils.maxIndex(classDist);
        maxCount = classDist[maxIndex];
        String class_name = m_Info.classAttribute().value(maxIndex);
        _node.put("majClass", class_name);
        errors += bin_size - maxCount;

        pct_correct = (quantity - errors) / quantity;
        evalresults.put("pct_correct", pct_correct);
        // If all successors are non-empty, we don't need to store the class
        // distribution
        boolean emptySuccessor = false;
        for (int i = 0; i < subsets.length; i++) {
            if (m_Successors[i].m_ClassDistribution == null) {
                emptySuccessor = true;
                break;
            }
        }
        if (!emptySuccessor) {
            m_ClassDistribution = null;
        }
    } else {
        m_Attribute = -1;
        if (kind != null && kind.equals("leaf_node")) {
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            if (node.get("pickInst") != null) {
                getInstanceData = node.get("pickInst").asBoolean();
            }
            if (Boolean.TRUE.equals(getInstanceData)) {
                requiredInstances.delete();
                for (int k = 0; k < data.numInstances(); k++) {
                    requiredInstances.add(data.instance(k));
                }
            }
            String class_name = m_Info.classAttribute().value(maxIndex);
            _node.put("majClass", class_name);
            if (node.get("setClass") != null) {
                String setClass = node.get("setClass").asText();
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(setClass));
            }
            _node.put("name", class_name);
            evalresults.put("attribute_name", class_name);
            evalresults.put("kind", "leaf_node");
            evalresults.put("bin_size", Utils.doubleToString(bin_size, 2));
            evalresults.put("errors", Utils.doubleToString(errors, 2));
            evalresults.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            this.setJsonnode(_node);
        } else {
            // Make leaf

            // add the data to the json object
            double bin_size = 0, maxCount = 0;
            int maxIndex = 0;
            double errors = 0;
            double pct_correct = 0;
            if (m_ClassDistribution != null) {
                bin_size = Utils.sum(m_ClassDistribution);
                maxIndex = Utils.maxIndex(m_ClassDistribution); // this is
                // where it
                // decides
                // what
                // class the
                // leaf is..
                // takes the
                // majority.
                maxCount = m_ClassDistribution[maxIndex];
                errors = bin_size - maxCount;
                pct_correct = (bin_size - errors) / bin_size;
            }
            ArrayNode children = (ArrayNode) node.get("children");
            if (children == null) {
                children = mapper.createArrayNode();
            }
            ObjectNode child = mapper.createObjectNode();
            String class_name = m_Info.classAttribute().value(maxIndex);
            child.put("majClass", class_name);
            String nodeName = node.get("name").asText();
            if (nodeName.equals("Inside") || nodeName.equals("Outside")) {
                child.put("setClass", nodeName);
                class_name = m_Info.classAttribute().value(m_ClassAssignment.get(nodeName));
            }
            child.put("name", class_name);
            ObjectNode c_options = mapper.createObjectNode();
            c_options.put("attribute_name", class_name);
            c_options.put("kind", "leaf_node");
            c_options.put("bin_size", Utils.doubleToString(bin_size, 2));
            c_options.put("errors", Utils.doubleToString(errors, 2));
            c_options.put("pct_correct", Utils.doubleToString(pct_correct, 2));
            child.put("options", c_options);
            children.add(child);
            _node.put("children", children);
            this.setJsonnode(child);
        }
    }
}

From source file:tubes1.myClassifiers.myC45.java

private Instances filterInstanceWithAttributeValue(Instances instances, Attribute attribute, String value) {
    Instances newInstances = new Instances(instances);
    newInstances.delete();
    int numInstances = instances.numInstances();
    for (int i = 0; i < numInstances; i++) {
        Instance instance = instances.instance(i);
        if (instance.stringValue(attribute).equals(value)) {
            newInstances.add(instance);//  w w  w.  ja  v a  2  s  .  co  m
        }
    }
    return newInstances;
}

From source file:view.centerPanels.ClusteringPredictPnlCenter.java

private void btnStartActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_btnStartActionPerformed

    Instances test = new Instances(Data.getInstance().getInstances());
    test.delete();

    //proverava da li su dobro unete vrednosti
    //ako nesto nije doro uneseno nekaa iskoci JoptionPane
    //sta je lose uneseno, naziv aributa recimo
    for (int i = 0; i < fields.size(); i++) {
        String text = fields.get(i).getText().trim();

        //prekace prazna pollja jer za klasterizaciju znaci da se ona ignorisu
        //to za klasifikaciju nije slucaj
        if (!text.equals("")) {

            if (test.attribute(i).isNominal()) {
                boolean correct = false;
                for (int j = 0; j < test.attribute(i).numValues(); j++) {
                    if (text.equals(test.attribute(i).value(j))) {
                        correct = true;/*from w  w w.j  av  a 2 s. c  o m*/
                    }
                }
                if (!correct) {
                    JOptionPane.showMessageDialog(this,
                            "Incorrect format for attribute " + test.attribute(i).name());
                    break;
                }
            }

            if (test.attribute(i).isNumeric()) {
                try {
                    double value = Double.parseDouble(text);
                } catch (Exception e) {
                    JOptionPane.showMessageDialog(this,
                            "Incorrect format for attribute " + test.attribute(i).name());
                    break;
                }
            }

        }
    }

    int numAttributes = test.numAttributes();

    Instance instance = new Instance(numAttributes);

    //ovaj remove je potreban samo zaklasterizaciju
    String remove = "";

    boolean hasRemove = false;
    for (int i = 0; i < fields.size(); i++) {
        String text = fields.get(i).getText().trim();

        //vama ne sme da se pojavi prazan string
        if (text.equals("")) {
            remove = remove + (i + 1) + ",";
            hasRemove = true;
        } else {
            try {
                double value = Double.parseDouble(text);
                instance.setValue(i, value);

            } catch (Exception e) {

                instance.setValue(i, text);
            }
        }

    }
    if (hasRemove) {
        remove = remove.substring(0, remove.length() - 1);
    }

    //meni se InstanceS zove test a vama instances, ovako se dodaje ta jedna instanca
    test.add(instance);
    //sad radite vasu evaluaciju ovo je klaserizacija ostalo

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(remove);

    FilteredClusterer filteredClusterer = new FilteredClusterer();
    try {

        filteredClusterer.setClusterer(kMeans);
        filteredClusterer.setFilter(removeFilter);
        filteredClusterer.buildClusterer(Data.getInstance().getInstances());

    } catch (Exception e) {

    }

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(filteredClusterer);
    try {
        eval.evaluateClusterer(test);
    } catch (Exception ex) {
        Logger.getLogger(ClusteringPredictPnlCenter.class.getName()).log(Level.SEVERE, null, ex);
    }

    String[] results = eval.clusterResultsToString().split("\n");

    String cluster = results[results.length - 1].split(" ")[0];

    textAreaResult.setText("This instance belongs to \ncluster number:  " + cluster + ".\n\n"
            + "Take a look on visualization \nfor better feeleing about \nthis instance");

    test.delete();

}