Example usage for weka.classifiers Classifier classifyInstance

List of usage examples for weka.classifiers Classifier classifyInstance

Introduction

In this page you can find the example usage for weka.classifiers Classifier classifyInstance.

Prototype

public double classifyInstance(Instance instance) throws Exception;

Source Link

Document

Classifies the given test instance.

Usage

From source file:org.opentox.qsar.processors.predictors.SimplePredictor.java

License:Open Source License

/**
 * Perform the prediction which is based on the serialized model file on the server.
 * @param data/*  w  ww .  j a  va2  s.  com*/
 *      Input data for with respect to which the predicitons are calculated
 * @return
 *      A dataset containing the compounds submitted along with their predicted values.
 * @throws QSARException
 *      In case the prediction (as a whole) is not feasible. If the prediction is not
 *      feasible for a single instance, the prediction is set to <code>?</code> (unknown/undefined/missing).
 *      If the prediction is not feasible for all instances, an exception (QSARException) is thrown.
 */
@Override
public Instances predict(final Instances data) throws QSARException {

    Instances dataClone = new Instances(data);
    /**
     * IMPORTANT!
     * String attributes have to be removed from the dataset before
     * applying the prediciton
     */
    dataClone = new AttributeCleanup(ATTRIBUTE_TYPE.string).filter(dataClone);

    /**
     * Set the class attribute of the incoming data to any arbitrary attribute
     * (Choose the last for instance).
     */
    dataClone.setClass(dataClone.attribute(model.getDependentFeature().getURI()));

    /**
     *
     * Create the Instances that will host the predictions. This object contains
     * only two attributes: the compound_uri and the target feature of the model.
     */
    Instances predictions = null;
    FastVector attributes = new FastVector();
    final Attribute compoundAttribute = new Attribute("compound_uri", (FastVector) null);
    final Attribute targetAttribute = dataClone.classAttribute();
    attributes.addElement(compoundAttribute);
    attributes.addElement(targetAttribute);

    predictions = new Instances("predictions", attributes, 0);
    predictions.setClassIndex(1);

    Instance predictionInstance = new Instance(2);
    try {
        final Classifier cls = (Classifier) SerializationHelper.read(filePath);

        for (int i = 0; i < data.numInstances(); i++) {
            try {
                String currentCompound = data.instance(i).stringValue(0);
                predictionInstance.setValue(compoundAttribute, currentCompound);

                if (targetAttribute.type() == Attribute.NUMERIC) {
                    double clsLabel = cls.classifyInstance(dataClone.instance(i));
                    predictionInstance.setValue(targetAttribute, clsLabel);
                } else if (targetAttribute.type() == Attribute.NOMINAL) {
                    double[] clsLable = cls.distributionForInstance(dataClone.instance(i));
                    int indexForNominalElement = maxInArray(clsLable).getPosition();
                    Enumeration nominalValues = targetAttribute.enumerateValues();
                    int counter = 0;
                    String nomValue = "";
                    while (nominalValues.hasMoreElements()) {
                        if (counter == indexForNominalElement) {
                            nomValue = nominalValues.nextElement().toString();
                            break;
                        }
                        counter++;
                    }
                    predictionInstance.setValue(targetAttribute, nomValue);

                    predictionInstance.setValue(targetAttribute, cls.classifyInstance(dataClone.instance(i)));
                }

                predictions.add(predictionInstance);
            } catch (Exception ex) {
                System.out.println(ex);
            }
        }

    } catch (Exception ex) {
    }

    return predictions;
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution for an attribute.
 * //from w w  w  . ja v  a 2  s  .co  m
 * @param props
 * @param dists
 * @param att
 *            the attribute index
 * @param data
 *            the data to work with
 * @throws Exception
 *             if something goes wrong
 */
protected HashMap<String, Double> distribution(double[][] props, double[][][] dists, int att, Instances data,
        double givenSplitPoint, HashMap<String, Classifier> custom_classifiers) throws Exception {

    HashMap<String, Double> mp = new HashMap<String, Double>();
    double splitPoint = givenSplitPoint;
    double origSplitPoint = 0;
    Attribute attribute = null;
    double[][] dist = null;
    int indexOfFirstMissingValue = -1;
    String CustomClassifierId = null;
    CustomSet cSet = null;
    if (att >= data.numAttributes() && att < data.numAttributes() + custom_classifiers.size()) {
        CustomClassifierId = getKeyinMap(custom_classifiers, att, data);
    } else if (att >= data.numAttributes() + custom_classifiers.size()) {
        cSet = getReqCustomSet(att - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
    } else {
        attribute = data.attribute(att);
    }
    if (CustomClassifierId == null && cSet == null) {
        if (attribute.isNominal()) {
            // For nominal attributes
            dist = new double[attribute.numValues()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                if (inst.isMissing(att)) {

                    // Skip missing values at this stage
                    if (indexOfFirstMissingValue < 0) {
                        indexOfFirstMissingValue = i;
                    }
                    continue;
                }
                dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight();
            }
        } else {

            // For numeric attributes
            double[][] currDist = new double[2][data.numClasses()];
            dist = new double[2][data.numClasses()];

            // Sort data
            data.sort(att);

            // Move all instances into second subset
            for (int j = 0; j < data.numInstances(); j++) {
                Instance inst = data.instance(j);
                if (inst.isMissing(att)) {

                    // Can stop as soon as we hit a missing value
                    indexOfFirstMissingValue = j;
                    break;
                }
                currDist[1][(int) inst.classValue()] += inst.weight();
            }

            // Value before splitting
            double priorVal = priorVal(currDist);

            // Save initial distribution
            for (int j = 0; j < currDist.length; j++) {
                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
            }

            if (Double.isNaN(splitPoint)) {
                // Try all possible split points
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {

                        // Can stop as soon as we hit a missing value
                        break;
                    }

                    // Can we place a sensible split point here?
                    if (inst.value(att) > currSplit) {

                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);

                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {

                            // Store value of current point
                            bestVal = currVal;

                            // Save split point
                            splitPoint = (inst.value(att) + currSplit) / 2.0;
                            origSplitPoint = splitPoint;

                            // Save distribution
                            for (int j = 0; j < currDist.length; j++) {
                                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                            }
                        }
                    }
                    currSplit = inst.value(att);

                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                }
            } else {
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                // Split data set using given split point.
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {
                        // Can stop as soon as we hit a missing value
                        break;
                    }
                    if (inst.value(att) > currSplit) {
                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);
                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {
                            // Store value of current point
                            bestVal = currVal;
                            // Save computed split point
                            origSplitPoint = (inst.value(att) + currSplit) / 2.0;
                        }
                    }
                    currSplit = inst.value(att);
                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                    if (inst.value(att) <= splitPoint) {
                        // Save distribution since split point is specified
                        for (int j = 0; j < currDist.length; j++) {
                            System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                        }
                    }
                }
            }
        }
    } else if (CustomClassifierId != null) {
        Classifier fc = custom_classifiers.get(CustomClassifierId);
        dist = new double[data.numClasses()][data.numClasses()];
        Instance inst;
        for (int i = 0; i < data.numInstances(); i++) {
            inst = data.instance(i);
            double predictedClass = fc.classifyInstance(inst);
            if (predictedClass != Instance.missingValue()) {
                dist[(int) predictedClass][(int) inst.classValue()] += inst.weight();
            }
        }
    } else if (cSet != null) {
        dist = new double[data.numClasses()][data.numClasses()];
        JsonNode vertices = mapper.readTree(cSet.getConstraints());
        ArrayList<double[]> attrVertices = generateVerticesList(vertices);
        List<Attribute> aList = generateAttributeList(cSet, data, d);
        double[] testPoint = new double[2];
        int ctr = 0;
        for (int k = 0; k < data.numInstances(); k++) {
            testPoint = new double[2];
            ctr = 0;
            for (Attribute a : aList) {
                if (!data.instance(k).isMissing(a)) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
            }
            int check = checkPointInPolygon(attrVertices, testPoint);
            dist[check][(int) data.instance(k).classValue()] += data.instance(k).weight();
        }
    }

    // Compute weights for subsetsCustomClassifierIndex
    props[att] = new double[dist.length];
    for (int k = 0; k < props[att].length; k++) {
        props[att][k] = Utils.sum(dist[k]);
    }
    if (Utils.eq(Utils.sum(props[att]), 0)) {
        for (int k = 0; k < props[att].length; k++) {
            props[att][k] = 1.0 / props[att].length;
        }
    } else {
        Utils.normalize(props[att]);
    }

    // Any instances with missing values ?
    if (indexOfFirstMissingValue > -1) {

        // Distribute weights for instances with missing values
        for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            if (attribute.isNominal()) {

                // Need to check if attribute value is missing
                if (inst.isMissing(att)) {
                    for (int j = 0; j < dist.length; j++) {
                        dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                    }
                }
            } else {

                // Can be sure that value is missing, so no test required
                for (int j = 0; j < dist.length; j++) {
                    dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                }
            }
        }
    }

    // Return distribution and split point
    dists[att] = dist;
    mp.put("split_point", splitPoint);
    mp.put("orig_split_point", origSplitPoint);
    return mp;
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution of an instance using the decision tree.
 * //w w  w  .j a  v a  2 s.co  m
 * @param instance
 *            the instance to compute the distribution for
 * @return the computed class distribution
 * @throws Exception
 *             if computation fails
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    // default model?
    if (m_ZeroR != null) {
        return m_ZeroR.distributionForInstance(instance);
    }

    double[] returnedDist = null;

    //Set Parent Node to set m_pred in case custom set occurs.
    if (m_Successors != null) {
        for (int i = 0; i < m_Successors.length; i++) {
            m_Successors[i].setParentNode(this.parentNode);
        }
    }

    if (m_Info != null) {
        if (m_Attribute > -1 && m_Attribute < m_Info.numAttributes()) {

            // Node is not a leaf
            if (instance.isMissing(m_Attribute)) {
                LOGGER.debug("Missing attribute");
                // Value is missing
                returnedDist = new double[m_Info.numClasses()];

                // Split instance up
                for (int i = 0; i < m_Successors.length; i++) {
                    double[] help = m_Successors[i].distributionForInstance(instance);
                    if (help != null) {
                        for (int j = 0; j < help.length; j++) {
                            returnedDist[j] += m_Prop[i] * help[j];
                        }
                    }
                }
                LOGGER.debug("Missing Instance");
            } else if (m_Info.attribute(m_Attribute).isNominal()) {

                // For nominal attributes
                returnedDist = m_Successors[(int) instance.value(m_Attribute)]
                        .distributionForInstance(instance);
            } else {

                // For numeric attributes
                if (instance.value(m_Attribute) < m_SplitPoint) {
                    returnedDist = m_Successors[0].distributionForInstance(instance);
                } else {
                    returnedDist = m_Successors[1].distributionForInstance(instance);
                }
            }
        } else if (m_Attribute >= m_Info.numAttributes() - 1) {
            if (m_Attribute >= (listOfFc.size() + m_Info.numAttributes()) - 1) {
                CustomSet cSet = getReqCustomSet(m_Attribute - (listOfFc.size() - 1 + m_Info.numAttributes()),
                        cSetList);
                JsonNode vertices = mapper.readTree(cSet.getConstraints());
                ArrayList<double[]> attrVertices = generateVerticesList(vertices);
                List<Attribute> aList = generateAttributeList(cSet, m_Info, d);
                double[] testPoint = new double[2];
                testPoint[0] = instance.value(aList.get(0));
                testPoint[1] = instance.value(aList.get(1));
                int check = checkPointInPolygon(attrVertices, testPoint);
                if (m_Successors[check].getM_Attribute() == -1) {
                    parentNode.setM_pred(m_ClassAssignment.get((check == 0) ? "Outside" : "Inside"));
                }
                returnedDist = m_Successors[check].distributionForInstance(instance);

            } else {
                String classifierId = "";
                classifierId = getKeyinMap(listOfFc, m_Attribute, m_Info);
                Classifier fc = listOfFc.get(classifierId);
                double predictedClass = fc.classifyInstance(instance);
                if (predictedClass != Instance.missingValue()) {
                    returnedDist = m_Successors[(int) predictedClass].distributionForInstance(instance);
                }
            }
        }
    }

    // Node is a leaf or successor is empty?
    if ((m_Attribute == -1) || (returnedDist == null)) {

        // Is node empty?
        if (m_ClassDistribution == null) {
            if (getAllowUnclassifiedInstances()) {
                return new double[m_Info.numClasses()];
            } else {
                return null;
            }
        }

        // Else return normalized distribution
        double[] normalizedDistribution = m_ClassDistribution.clone();
        if (this.parentNode != null) {
            this.parentNode.setJsonnode(this.getJsonnode());
        }
        try {
            Utils.normalize(normalizedDistribution);
        } catch (Exception e) {
            LOGGER.error("Sum is 0. Coudln't Normalize");
        }
        return normalizedDistribution;
    } else {
        return returnedDist;
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution of an instance using the decision tree.
 * /*w  ww  . j a v  a 2s .  c o  m*/
 * @param instance
 *            the instance to compute the distribution for
 * @return the computed class distribution
 * @throws Exception
 *             if computation fails
 */
public double[] predForInstance(Instance instance) throws Exception {

    // default model?
    if (m_ZeroR != null) {
        return m_ZeroR.distributionForInstance(instance);
    }

    double[] returnedDist = null;

    if (m_Attribute > -1 && m_Attribute < m_Info.numAttributes()) {

        // Node is not a leaf
        if (instance.isMissing(m_Attribute)) {

            // Value is missing
            returnedDist = new double[m_Info.numClasses()];

            // Split instance up
            for (int i = 0; i < m_Successors.length; i++) {
                double[] help = m_Successors[i].distributionForInstance(instance);
                if (help != null) {
                    for (int j = 0; j < help.length; j++) {
                        returnedDist[j] += m_Prop[i] * help[j];
                    }
                }
            }
        } else if (m_Info.attribute(m_Attribute).isNominal()) {

            // For nominal attributes
            returnedDist = m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance);
        } else {

            // For numeric attributes
            if (instance.value(m_Attribute) < m_SplitPoint) {
                returnedDist = m_Successors[0].distributionForInstance(instance);
            } else {
                returnedDist = m_Successors[1].distributionForInstance(instance);
            }
        }
    } else if (m_Attribute >= m_Info.numAttributes() - 1) {
        if (m_Attribute >= (listOfFc.size() + m_Info.numAttributes()) - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (listOfFc.size() - 1 + m_Info.numAttributes()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, m_Info, d);
            double[] testPoint = new double[2];
            testPoint[0] = instance.value(aList.get(0));
            testPoint[1] = instance.value(aList.get(1));
            int check = checkPointInPolygon(attrVertices, testPoint);
            returnedDist = m_Successors[check].distributionForInstance(instance);

        } else {
            String classifierId = "";
            classifierId = getKeyinMap(listOfFc, m_Attribute, m_Info);
            Classifier fc = listOfFc.get(classifierId);
            double predictedClass = fc.classifyInstance(instance);
            if (predictedClass != Instance.missingValue()) {
                returnedDist = m_Successors[(int) predictedClass].distributionForInstance(instance);
            }
        }
    }

    // Node is a leaf or successor is empty?
    if ((m_Attribute == -1) || (returnedDist == null)) {

        // Is node empty?
        if (m_ClassDistribution == null) {
            if (getAllowUnclassifiedInstances()) {
                return new double[m_Info.numClasses()];
            } else {
                return null;
            }
        }

        // Else return normalized distribution
        double[] normalizedDistribution = m_ClassDistribution.clone();
        Utils.normalize(normalizedDistribution);
        return normalizedDistribution;
    } else {
        return returnedDist;
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * /*from  w  w  w  .  ja v  a 2  s .  c om*/
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:sg.edu.nus.comp.nlp.ims.classifiers.CWekaEvaluator.java

License:Open Source License

@Override
public Object evaluate(Object p_Lexelt) throws Exception {
    ILexelt lexelt = (ILexelt) p_Lexelt;
    String lexeltID = lexelt.getID();
    IStatistic stat = (IStatistic) this.getStatistic(lexeltID);
    int type = 2;
    String firstSense = this.m_UnknownSense;
    if (stat == null) {
        type = 1;//  w ww .j  av a  2s  .  c  o m
        if (this.m_SenseIndex != null) {
            String first = this.m_SenseIndex.getFirstSense(lexeltID);
            if (first != null) {
                firstSense = first;
            }
        }
    } else {
        if (stat.getTags().size() == 1) {
            type = 1;
            firstSense = stat.getTags().iterator().next();
        } else {
            type = stat.getTags().size();
        }
    }
    int classIdx = this.m_ClassIndex;
    CResultInfo retVal = new CResultInfo();
    switch (type) {
    case 0:
        throw new Exception("no tag for lexelt " + lexeltID + ".");
    case 1:
        retVal.lexelt = lexelt.getID();
        retVal.docs = new String[lexelt.size()];
        retVal.ids = new String[lexelt.size()];
        retVal.classes = new String[] { firstSense };
        retVal.probabilities = new double[lexelt.size()][1];
        for (int i = 0; i < retVal.probabilities.length; i++) {
            retVal.probabilities[i][0] = 1;
            retVal.docs[i] = lexelt.getInstanceDocID(i);
            retVal.ids[i] = lexelt.getInstanceID(i);
        }
        break;
    default:
        lexelt.setStatistic(stat);
        Classifier classifier = (Classifier) this.getModel(lexeltID);
        ILexeltWriter lexeltWriter = new CWekaSparseLexeltWriter();
        Instances instances = (Instances) lexeltWriter.getInstances(lexelt);
        if (classIdx < 0) {
            classIdx = instances.numAttributes() - 1;
        }
        instances.setClassIndex(classIdx);
        retVal.lexelt = lexelt.getID();
        retVal.docs = new String[lexelt.size()];
        retVal.ids = new String[lexelt.size()];
        retVal.probabilities = new double[instances.numInstances()][];
        retVal.classes = new String[instances.classAttribute().numValues()];
        for (int i = 0; i < instances.classAttribute().numValues(); i++) {
            retVal.classes[i] = instances.classAttribute().value(i);
        }
        if (instances.classAttribute().isNumeric()) {
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.instance(i);
                retVal.docs[i] = lexelt.getInstanceDocID(i);
                retVal.ids[i] = lexelt.getInstanceID(i);
                retVal.probabilities[i] = new double[retVal.classes.length];
                retVal.probabilities[i][(int) classifier.classifyInstance(instance)] = 1;
            }
        } else {
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.instance(i);
                retVal.docs[i] = lexelt.getInstanceDocID(i);
                retVal.ids[i] = lexelt.getInstanceID(i);
                retVal.probabilities[i] = classifier.distributionForInstance(instance);
            }
        }
    }
    return retVal;
}

From source file:SupervisedMetablocking.SupervisedWEP.java

License:Open Source License

@Override
protected void applyClassifier(Classifier classifier) throws Exception {
    for (AbstractBlock block : blocks) {
        ComparisonIterator iterator = block.getComparisonIterator();
        while (iterator.hasNext()) {
            Comparison comparison = iterator.next();
            final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(block.getBlockIndex(),
                    comparison);// ww  w.  jav a2 s  . co m
            if (commonBlockIndices == null) {
                continue;
            }

            if (trainingSet.contains(comparison)) {
                continue;
            }

            Instance currentInstance = getFeatures(NON_DUPLICATE, commonBlockIndices, comparison);
            int instanceLabel = (int) classifier.classifyInstance(currentInstance);
            if (instanceLabel == DUPLICATE) {
                retainedEntities1.add(comparison.getEntityId1());
                retainedEntities2.add(comparison.getEntityId2());
            }
        }
    }
}

From source file:tubes1.Main.java

/**
 * @param args the command line arguments
 *///from   ww  w  .j  a va 2s .  c o  m
public static void main(String[] args) throws IOException, Exception {
    // TODO code application logic here
    String filename = "weather";

    //Masih belum mengerti tipe .csv yang dapat dibaca seperti apa
    //CsvToArff convert = new CsvToArff(filename+".csv");

    //LOAD FILE
    BufferedReader datafile = readDataFile("src/" + filename + ".arff");
    Instances data = new Instances(datafile);
    data.setClassIndex(data.numAttributes() - 1);
    //END OF LOAD FILE

    CustomFilter fil = new CustomFilter();

    //REMOVE USELESS ATTRIBUTE
    data = fil.removeAttribute(data);
    System.out.println(data);

    Instances[] allData = new Instances[4];
    //data for Id3
    allData[0] = fil.resampling(fil.convertNumericToNominal(data));
    //data for J48
    allData[1] = fil.convertNumericToNominal(fil.resampling(data));
    //data for myId3
    allData[2] = allData[0];
    //data for myC4.5
    allData[3] = fil.resampling(fil.convertNumericToNominal(fil.convertNumericRange(data)));

    data = fil.convertNumericToNominal(data);
    // BUILD CLASSIFIERS
    Classifier[] models = { new Id3(), //C4.5
            new J48(), new myID3(), new myC45() };

    for (int j = 0; j < models.length; j++) {
        FastVector predictions = new FastVector();
        //FOR TEN-FOLD CROSS VALIDATION
        Instances[][] split = crossValidationSplit(allData[j], 10);
        // Separate split into training and testing arrays
        Instances[] trainingSplits = split[0];
        Instances[] testingSplits = split[1];
        System.out.println("\n---------------------------------");
        for (int i = 0; i < trainingSplits.length; i++) {
            try {
                //                    System.out.println("Building for training Split : " + i);
                Evaluation validation = classify(models[j], trainingSplits[i], testingSplits[i]);

                predictions.appendElements(validation.predictions());

                // Uncomment to see the summary for each training-testing pair.
                //                    System.out.println(models[j].toString());
            } catch (Exception ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            }
            // Calculate overall accuracy of current classifier on all splits
            double accuracy = calculateAccuracy(predictions);

            // Print current classifier's name and accuracy in a complicated,
            // but nice-looking way.
            System.out.println(String.format("%.2f%%", accuracy));
        }
        models[j].buildClassifier(allData[j]);
        Model.save(models[j], models[j].getClass().getSimpleName());
    }

    //test instance
    Instances trainingSet = new Instances("Rel", getFvWekaAttributes(data), 10);
    trainingSet.setClassIndex(data.numAttributes() - 1);

    Instance testInstance = new Instance(data.numAttributes());
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        System.out.print("Masukkan " + data.attribute(i).name() + " : ");
        Scanner in = new Scanner(System.in);
        String att = in.nextLine();
        if (isNumeric(att)) {
            att = fil.convertToFit(att, data, i);
        }
        testInstance.setValue(data.attribute(i), att);
    }

    //        System.out.println(testInstance);
    //        System.out.println(testInstance.classAttribute().index());

    trainingSet.add(testInstance);

    Classifier Id3 = Model.load("Id3");
    Classifier J48 = Model.load("J48");
    Classifier myID3 = Model.load("myID3");
    Classifier MyC45 = Model.load("myC45");
    //        Classifier MyId3 = Model.load("myID3");

    Instance A = trainingSet.instance(0);
    Instance B = trainingSet.instance(0);
    Instance C = trainingSet.instance(0);
    Instance D = trainingSet.instance(0);

    //test with ID3 WEKA
    A.setClassValue(Id3.classifyInstance(trainingSet.instance(0)));
    System.out.println("Id3 Weka : " + A);

    //test with C4.5 WEKA
    B.setClassValue(J48.classifyInstance(trainingSet.instance(0)));
    System.out.println("C4.5 Weka : " + B);

    //test with my C4.5
    C.setClassValue(MyC45.classifyInstance(trainingSet.instance(0)));
    System.out.println("My C4.5 : " + C);

    //test with my ID3
    D.setClassValue(myID3.classifyInstance(trainingSet.instance(0)));
    System.out.println("My ID3 : " + D);
}

From source file:tucil2ai.Tucil2AI.java

/**
 *
 * @param cls/*w  w  w  .java 2 s. co m*/
 * @param filename
 * @param f
 * @throws Exception
 */
public static void ClassifyJ48(Classifier cls, String filename, Discretize f) throws Exception {
    Instances unlabeled = loadfile(filename, f);
    Instances labeled = new Instances(unlabeled);
    for (int i = 0; i < unlabeled.numInstances(); ++i) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }

    System.out.println(labeled.toString());
}