Example usage for weka.core Instance copy

Introduction

In this page you can find the example usage for weka.core Instance copy.

Prototype

Object copy();

Source Link

Document

This method produces a shallow copy of an object.

Usage

From source file:moa.tasks.EvaluatePrequentialIntervalAccuracy.java

License:Open Source License

@Override
protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
    Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
    InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
    ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption(
            this.evaluatorOption);
    LearningCurve learningCurve = new LearningCurve("learning evaluation instances");

    //New for prequential methods
    if (evaluator instanceof WindowClassificationPerformanceEvaluator) {
        //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue());
        if (widthOption.getValue() != 1000) {
            System.out.println(/* w w w. j  a  v a 2s. c  om*/
                    "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w "
                            + widthOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof EWMAClassificationPerformanceEvaluator) {
        //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) {
        //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    //End New for prequential methods

    learner.setModelContext(stream.getHeader());
    int maxInstances = this.instanceLimitOption.getValue();
    long instancesProcessed = 0;
    int maxSeconds = this.timeLimitOption.getValue();
    int secondsElapsed = 0;
    monitor.setCurrentActivity("Evaluating learner...", -1.0);

    File dumpFile = this.dumpFileOption.getFile();
    PrintStream immediateResultStream = null;
    if (dumpFile != null) {
        try {
            if (dumpFile.exists()) {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true);
            } else {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex);
        }
    }
    //File for output predictions
    File outputPredictionFile = this.outputPredictionFileOption.getFile();
    PrintStream outputPredictionResultStream = null;
    if (outputPredictionFile != null) {
        try {
            if (outputPredictionFile.exists()) {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true),
                        true);
            } else {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile),
                        true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex);
        }
    }
    boolean firstDump = true;
    boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
    long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
    long lastEvaluateStartTime = evaluateStartTime;
    double RAMHours = 0.0;
    while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances))
            && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
        Instance trainInst = stream.nextInstance();
        Instance testInst = (Instance) trainInst.copy();
        int trueClass = (int) trainInst.classValue();
        //testInst.setClassMissing();
        double[] prediction = learner.getVotesForInstance(testInst);
        // Output prediction
        if (outputPredictionFile != null) {
            outputPredictionResultStream.println(Utils.maxIndex(prediction) + "," + trueClass);
        }

        //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight());
        evaluator.addResult(testInst, prediction);
        learner.trainOnInstance(trainInst);
        instancesProcessed++;
        if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
                || stream.hasMoreInstances() == false) {
            long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
            double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
            double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
            double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
            RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
            RAMHours += RAMHoursIncrement;
            lastEvaluateStartTime = evaluateTime;
            learningCurve.insertEntry(new LearningEvaluation(
                    new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed),
                            new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)",
                                    time),
                            new Measurement("model cost (RAM-Hours)", RAMHours) },
                    evaluator, learner));

            if (immediateResultStream != null) {
                if (firstDump) {
                    immediateResultStream.println(learningCurve.headerToString());
                    firstDump = false;
                }
                immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
                immediateResultStream.flush();
            }

            /************************************* 2013/2/25, paul, modified
             *       for interval accuracy       * 
             *************************************/
            evaluator.reset();
        }
        if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
            if (monitor.taskShouldAbort()) {
                return null;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();
            if (maxInstances > 0) {
                long maxRemaining = maxInstances - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
                    : (double) instancesProcessed
                            / (double) (instancesProcessed + estimatedRemainingInstances));
            if (monitor.resultPreviewRequested()) {
                monitor.setLatestResultPreview(learningCurve.copy());
            }
            secondsElapsed = (int) TimingUtils
                    .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime);
        }
    }
    if (immediateResultStream != null) {
        immediateResultStream.close();
    }
    if (outputPredictionResultStream != null) {
        outputPredictionResultStream.close();
    }
    return learningCurve;
}

From source file:moa.tasks.EvaluatePrequentialRegression.java

License:Open Source License

@Override
protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) {
    Classifier learner = (Classifier) getPreparedClassOption(this.learnerOption);
    InstanceStream stream = (InstanceStream) getPreparedClassOption(this.streamOption);
    RegressionPerformanceEvaluator evaluator = (RegressionPerformanceEvaluator) getPreparedClassOption(
            this.evaluatorOption);
    LearningCurve learningCurve = new LearningCurve("learning evaluation instances");

    //New for prequential methods
    if (evaluator instanceof WindowClassificationPerformanceEvaluator) {
        //((WindowClassificationPerformanceEvaluator) evaluator).setWindowWidth(widthOption.getValue());
        if (widthOption.getValue() != 1000) {
            System.out.println(//w  w w. j  av  a 2s .  c  o  m
                    "DEPRECATED! Use EvaluatePrequential -e (WindowClassificationPerformanceEvaluator -w "
                            + widthOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof EWMAClassificationPerformanceEvaluator) {
        //((EWMAClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (EWMAClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    if (evaluator instanceof FadingFactorClassificationPerformanceEvaluator) {
        //((FadingFactorClassificationPerformanceEvaluator) evaluator).setalpha(alphaOption.getValue());
        if (alphaOption.getValue() != .01) {
            System.out.println(
                    "DEPRECATED! Use EvaluatePrequential -e (FadingFactorClassificationPerformanceEvaluator -a "
                            + alphaOption.getValue() + ")");
            return learningCurve;
        }
    }
    //End New for prequential methods

    learner.setModelContext(stream.getHeader());
    int maxInstances = this.instanceLimitOption.getValue();
    long instancesProcessed = 0;
    int maxSeconds = this.timeLimitOption.getValue();
    int secondsElapsed = 0;
    monitor.setCurrentActivity("Evaluating learner...", -1.0);

    File dumpFile = this.dumpFileOption.getFile();
    PrintStream immediateResultStream = null;
    if (dumpFile != null) {
        try {
            if (dumpFile.exists()) {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true);
            } else {
                immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex);
        }
    }
    //File for output predictions
    File outputPredictionFile = this.outputPredictionFileOption.getFile();
    PrintStream outputPredictionResultStream = null;
    if (outputPredictionFile != null) {
        try {
            if (outputPredictionFile.exists()) {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile, true),
                        true);
            } else {
                outputPredictionResultStream = new PrintStream(new FileOutputStream(outputPredictionFile),
                        true);
            }
        } catch (Exception ex) {
            throw new RuntimeException("Unable to open prediction result file: " + outputPredictionFile, ex);
        }
    }
    boolean firstDump = true;
    boolean preciseCPUTiming = TimingUtils.enablePreciseTiming();
    long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
    long lastEvaluateStartTime = evaluateStartTime;
    double RAMHours = 0.0;
    while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances))
            && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) {
        Instance trainInst = stream.nextInstance();
        Instance testInst = (Instance) trainInst.copy();
        double trueClass = trainInst.classValue();
        //testInst.setClassMissing();
        double[] prediction = learner.getVotesForInstance(testInst);
        // Output prediction
        if (outputPredictionFile != null) {
            outputPredictionResultStream.println(prediction[0] + "," + trueClass);
        }

        //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight());
        evaluator.addResult(testInst, prediction);
        learner.trainOnInstance(trainInst);
        instancesProcessed++;
        if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0
                || stream.hasMoreInstances() == false) {
            long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread();
            double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime);
            double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime);
            double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs
            RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours
            RAMHours += RAMHoursIncrement;
            lastEvaluateStartTime = evaluateTime;
            learningCurve.insertEntry(new LearningEvaluation(
                    new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed),
                            new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)",
                                    time),
                            new Measurement("model cost (RAM-Hours)", RAMHours) },
                    evaluator, learner));

            if (immediateResultStream != null) {
                if (firstDump) {
                    immediateResultStream.println(learningCurve.headerToString());
                    firstDump = false;
                }
                immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1));
                immediateResultStream.flush();
            }
        }
        if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) {
            if (monitor.taskShouldAbort()) {
                return null;
            }
            long estimatedRemainingInstances = stream.estimatedRemainingInstances();
            if (maxInstances > 0) {
                long maxRemaining = maxInstances - instancesProcessed;
                if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) {
                    estimatedRemainingInstances = maxRemaining;
                }
            }
            monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0
                    : (double) instancesProcessed
                            / (double) (instancesProcessed + estimatedRemainingInstances));
            if (monitor.resultPreviewRequested()) {
                monitor.setLatestResultPreview(learningCurve.copy());
            }
            secondsElapsed = (int) TimingUtils
                    .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime);
        }
    }
    if (immediateResultStream != null) {
        immediateResultStream.close();
    }
    if (outputPredictionResultStream != null) {
        outputPredictionResultStream.close();
    }
    return learningCurve;
}

From source file:org.packDataMining.SMOTE.java

License:Open Source License

/**
 * The procedure implementing the SMOTE algorithm. The output
 * instances are pushed onto the output queue for collection.
 * /*w  w  w  .  j a v  a 2  s  .co m*/
 * @throws Exception    if provided options cannot be executed 
 *          on input instances
 */
protected void doSMOTE() throws Exception {
    int minIndex = 0;
    int min = Integer.MAX_VALUE;
    if (m_DetectMinorityClass) {
        // find minority class
        int[] classCounts = getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts;
        for (int i = 0; i < classCounts.length; i++) {
            if (classCounts[i] != 0 && classCounts[i] < min) {
                min = classCounts[i];
                minIndex = i;
            }
        }
    } else {
        String classVal = getClassValue();
        if (classVal.equalsIgnoreCase("first")) {
            minIndex = 1;
        } else if (classVal.equalsIgnoreCase("last")) {
            minIndex = getInputFormat().numClasses();
        } else {
            minIndex = Integer.parseInt(classVal);
        }
        if (minIndex > getInputFormat().numClasses()) {
            throw new Exception("value index must be <= the number of classes");
        }
        minIndex--; // make it an index
    }

    int nearestNeighbors;
    if (min <= getNearestNeighbors()) {
        nearestNeighbors = min - 1;
    } else {
        nearestNeighbors = getNearestNeighbors();
    }
    if (nearestNeighbors < 1)
        throw new Exception("Cannot use 0 neighbors!");

    // compose minority class dataset
    // also push all dataset instances
    Instances sample = getInputFormat().stringFreeStructure();
    Enumeration instanceEnum = getInputFormat().enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = (Instance) instanceEnum.nextElement();
        push((Instance) instance.copy());
        if ((int) instance.classValue() == minIndex) {
            sample.add(instance);
        }
    }

    // compute Value Distance Metric matrices for nominal features
    Map vdmMap = new HashMap();
    Enumeration attrEnum = getInputFormat().enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (!attr.equals(getInputFormat().classAttribute())) {
            if (attr.isNominal() || attr.isString()) {
                double[][] vdm = new double[attr.numValues()][attr.numValues()];
                vdmMap.put(attr, vdm);
                int[] featureValueCounts = new int[attr.numValues()];
                int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr
                        .numValues()];
                instanceEnum = getInputFormat().enumerateInstances();
                while (instanceEnum.hasMoreElements()) {
                    Instance instance = (Instance) instanceEnum.nextElement();
                    int value = (int) instance.value(attr);
                    int classValue = (int) instance.classValue();
                    featureValueCounts[value]++;
                    featureValueCountsByClass[classValue][value]++;
                }
                for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) {
                    for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) {
                        double sum = 0;
                        for (int classValueIndex = 0; classValueIndex < getInputFormat()
                                .numClasses(); classValueIndex++) {
                            double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1];
                            double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2];
                            double c1 = (double) featureValueCounts[valueIndex1];
                            double c2 = (double) featureValueCounts[valueIndex2];
                            double term1 = c1i / c1;
                            double term2 = c2i / c2;
                            sum += Math.abs(term1 - term2);
                        }
                        vdm[valueIndex1][valueIndex2] = sum;
                    }
                }
            }
        }
    }

    // use this random source for all required randomness
    Random rand = new Random(getRandomSeed());

    // find the set of extra indices to use if the percentage is not evenly divisible by 100
    List extraIndices = new LinkedList();
    double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0);
    int extraIndicesCount = (int) (percentageRemainder * sample.numInstances());
    if (extraIndicesCount >= 1) {
        for (int i = 0; i < sample.numInstances(); i++) {
            extraIndices.add(i);
        }
    }
    Collections.shuffle(extraIndices, rand);
    extraIndices = extraIndices.subList(0, extraIndicesCount);
    Set extraIndexSet = new HashSet(extraIndices);

    // the main loop to handle computing nearest neighbors and generating SMOTE
    // examples from each instance in the original minority class data
    Instance[] nnArray = new Instance[nearestNeighbors];
    for (int i = 0; i < sample.numInstances(); i++) {
        Instance instanceI = sample.instance(i);
        // find k nearest neighbors for each instance
        List distanceToInstance = new LinkedList();
        for (int j = 0; j < sample.numInstances(); j++) {
            Instance instanceJ = sample.instance(j);
            if (i != j) {
                double distance = 0;
                attrEnum = getInputFormat().enumerateAttributes();
                while (attrEnum.hasMoreElements()) {
                    Attribute attr = (Attribute) attrEnum.nextElement();
                    if (!attr.equals(getInputFormat().classAttribute())) {
                        double iVal = instanceI.value(attr);
                        double jVal = instanceJ.value(attr);
                        if (attr.isNumeric()) {
                            distance += Math.pow(iVal - jVal, 2);
                        } else {
                            distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal];
                        }
                    }
                }
                distance = Math.pow(distance, .5);
                distanceToInstance.add(new Object[] { distance, instanceJ });
            }
        }

        // sort the neighbors according to distance
        Collections.sort(distanceToInstance, new Comparator() {
            public int compare(Object o1, Object o2) {
                double distance1 = (Double) ((Object[]) o1)[0];
                double distance2 = (Double) ((Object[]) o2)[0];
                return (int) Math.ceil(distance1 - distance2);
            }
        });

        // populate the actual nearest neighbor instance array
        Iterator entryIterator = distanceToInstance.iterator();
        int j = 0;
        while (entryIterator.hasNext() && j < nearestNeighbors) {
            nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1];
            j++;
        }

        // create synthetic examples
        int n = (int) Math.floor(getPercentage() / 100);
        while (n > 0 || extraIndexSet.remove(i)) {
            double[] values = new double[sample.numAttributes()];
            int nn = rand.nextInt(nearestNeighbors);
            attrEnum = getInputFormat().enumerateAttributes();
            while (attrEnum.hasMoreElements()) {
                Attribute attr = (Attribute) attrEnum.nextElement();
                if (!attr.equals(getInputFormat().classAttribute())) {
                    if (attr.isNumeric()) {
                        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
                        double gap = rand.nextDouble();
                        values[attr.index()] = (double) (instanceI.value(attr) + gap * dif);
                    } else if (attr.isDate()) {
                        double dif = nnArray[nn].value(attr) - instanceI.value(attr);
                        double gap = rand.nextDouble();
                        values[attr.index()] = (long) (instanceI.value(attr) + gap * dif);
                    } else {
                        int[] valueCounts = new int[attr.numValues()];
                        int iVal = (int) instanceI.value(attr);
                        valueCounts[iVal]++;
                        for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) {
                            int val = (int) nnArray[nnEx].value(attr);
                            valueCounts[val]++;
                        }
                        int maxIndex = 0;
                        int max = Integer.MIN_VALUE;
                        for (int index = 0; index < attr.numValues(); index++) {
                            if (valueCounts[index] > max) {
                                max = valueCounts[index];
                                maxIndex = index;
                            }
                        }
                        values[attr.index()] = maxIndex;
                    }
                }
            }
            values[sample.classIndex()] = minIndex;
            Instance synthetic = new Instance(1.0, values);
            push(synthetic);
            n--;
        }
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * //from  ww w . j  a v a  2 s  .c om
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.CoOzaBagASHT.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int trueClass = (int) inst.classValue();
    for (int i = 0; i < this.ensemble.length; i++) {
        int k = MiscUtils.poisson(1.0, this.classifierRandom);
        if (k > 0) {
            Instance weightedInst = (Instance) inst.copy();
            weightedInst.setWeight(inst.weight() * k);
            if (Utils.maxIndex(this.ensemble[i].getVotesForInstance(inst)) == trueClass) { // Here we used the getVotesForInstanceFunction of HoeffdingTree
                this.error[i] += alpha * (0.0 - this.error[i]); // EWMA
            } else {
                this.error[i] += alpha * (1.0 - this.error[i]); // EWMA
            }// www. j  a  va 2s  . c  om
            this.ensemble[i].trainOnInstance(weightedInst);
        }
    }
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.LeveragingBag.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int numClasses = inst.numClasses();
    //Output Codes
    if (this.initMatrixCodes == true) {
        this.matrixCodes = new int[this.ensemble.length][inst.numClasses()];
        for (int i = 0; i < this.ensemble.length; i++) {
            int numberOnes;
            int numberZeros;

            do { // until we have the same number of zeros and ones
                numberOnes = 0;//  w ww . j  av  a2 s. co m
                numberZeros = 0;
                for (int j = 0; j < numClasses; j++) {
                    int result = 0;
                    if (j == 1 && numClasses == 2) {
                        result = 1 - this.matrixCodes[i][0];
                    } else {
                        result = (this.classifierRandom.nextBoolean() ? 1 : 0);
                    }
                    this.matrixCodes[i][j] = result;
                    if (result == 1)
                        numberOnes++;
                    else
                        numberZeros++;
                }
            } while ((numberOnes - numberZeros) * (numberOnes - numberZeros) > (this.ensemble.length % 2));

        }
        this.initMatrixCodes = false;
    }

    boolean Change = false;
    double w = 1.0;
    double mt = 0.0;
    Instance weightedInst = (Instance) inst.copy();
    /*for (int i = 0; i < this.ensemble.length; i++) {
      if (this.outputCodesOption.isSet()) {
          weightedInst.setClassValue((double) this.matrixCodes[i][(int) inst.classValue()] );
      }
      if(!this.ensemble[i].correctlyClassifies(weightedInst)) {
          mt++;
      }
      }*/
    //update w
    w = this.weightShrinkOption.getValue(); //1.0 +mt/2.0;
    //Train ensemble of classifiers
    for (int i = 0; i < this.ensemble.length; i++) {
        int k = MiscUtils.poisson(w, this.classifierRandom);
        if (k > 0) {
            if (this.outputCodesOption.isSet()) {
                weightedInst.setClassValue((double) this.matrixCodes[i][(int) inst.classValue()]);
            }
            weightedInst.setWeight(inst.weight() * k);
            this.ensemble[i].trainOnInstance(weightedInst);
        }
        boolean correctlyClassifies = this.ensemble[i].correctlyClassifies(weightedInst);
        double ErrEstim = this.ADError[i].getEstimation();
        if (this.ADError[i].setInput(correctlyClassifies ? 0 : 1))
            if (this.ADError[i].getEstimation() > ErrEstim)
                Change = true;
    }
    if (Change) {
        numberOfChangesDetected++;
        double max = 0.0;
        int imax = -1;
        for (int i = 0; i < this.ensemble.length; i++) {
            if (max < this.ADError[i].getEstimation()) {
                max = this.ADError[i].getEstimation();
                imax = i;
            }
        }
        if (imax != -1) {
            this.ensemble[imax].resetLearning();
            //this.ensemble[imax].trainOnInstance(inst);
            this.ADError[imax] = new ADWIN((double) this.deltaAdwinOption.getValue());
        }
    }
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.LeveragingBagHalf.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int numClasses = inst.numClasses();
    //Output Codes
    if (this.initMatrixCodes == true) {
        this.matrixCodes = new int[this.ensemble.length][inst.numClasses()];
        for (int i = 0; i < this.ensemble.length; i++) {
            int numberOnes;
            int numberZeros;

            do { // until we have the same number of zeros and ones
                numberOnes = 0;/*from  w  w w. jav a  2 s  .c o m*/
                numberZeros = 0;
                for (int j = 0; j < numClasses; j++) {
                    int result = 0;
                    if (j == 1 && numClasses == 2) {
                        result = 1 - this.matrixCodes[i][0];
                    } else {
                        result = (this.classifierRandom.nextBoolean() ? 1 : 0);
                    }
                    this.matrixCodes[i][j] = result;
                    if (result == 1)
                        numberOnes++;
                    else
                        numberZeros++;
                }
            } while ((numberOnes - numberZeros) * (numberOnes - numberZeros) > (this.ensemble.length % 2));

        }
        this.initMatrixCodes = false;
    }

    boolean Change = false;
    double w = 1.0;
    double mt = 0.0;
    Instance weightedInst = (Instance) inst.copy();
    //Train ensemble of classifiers
    for (int i = 0; i < this.ensemble.length; i++) {
        int k = this.classifierRandom.nextBoolean() ? 0 : (int) this.weightShrinkOption.getValue(); //half bagging
        if (k > 0) {
            if (this.outputCodesOption.isSet()) {
                weightedInst.setClassValue((double) this.matrixCodes[i][(int) inst.classValue()]);
            }
            weightedInst.setWeight(k);
            this.ensemble[i].trainOnInstance(weightedInst);
        }
        boolean correctlyClassifies = this.ensemble[i].correctlyClassifies(weightedInst);
        double ErrEstim = this.ADError[i].getEstimation();
        if (this.ADError[i].setInput(correctlyClassifies ? 0 : 1))
            if (this.ADError[i].getEstimation() > ErrEstim)
                Change = true;
    }
    if (Change) {
        numberOfChangesDetected++;
        double max = 0.0;
        int imax = -1;
        for (int i = 0; i < this.ensemble.length; i++) {
            if (max < this.ADError[i].getEstimation()) {
                max = this.ADError[i].getEstimation();
                imax = i;
            }
        }
        if (imax != -1) {
            this.ensemble[imax].resetLearning();
            //this.ensemble[imax].trainOnInstance(inst);
            this.ADError[imax] = new ADWIN((double) this.deltaAdwinOption.getValue());
        }
    }
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.LeveragingBagWT.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int numClasses = inst.numClasses();
    //Output Codes
    if (this.initMatrixCodes == true) {
        this.matrixCodes = new int[this.ensemble.length][inst.numClasses()];
        for (int i = 0; i < this.ensemble.length; i++) {
            int numberOnes;
            int numberZeros;

            do { // until we have the same number of zeros and ones
                numberOnes = 0;/* www.j a  v a 2 s.c o m*/
                numberZeros = 0;
                for (int j = 0; j < numClasses; j++) {
                    int result = 0;
                    if (j == 1 && numClasses == 2) {
                        result = 1 - this.matrixCodes[i][0];
                    } else {
                        result = (this.classifierRandom.nextBoolean() ? 1 : 0);
                    }
                    this.matrixCodes[i][j] = result;
                    if (result == 1)
                        numberOnes++;
                    else
                        numberZeros++;
                }
            } while ((numberOnes - numberZeros) * (numberOnes - numberZeros) > (this.ensemble.length % 2));

        }
        this.initMatrixCodes = false;
    }

    boolean Change = false;
    double w = 1.0;
    double mt = 0.0;
    Instance weightedInst = (Instance) inst.copy();
    //update w
    w = this.weightShrinkOption.getValue();
    //Train ensemble of classifiers
    for (int i = 0; i < this.ensemble.length; i++) {
        int k = 1 + MiscUtils.poisson(w, this.classifierRandom);
        if (k > 0) {
            if (this.outputCodesOption.isSet()) {
                weightedInst.setClassValue((double) this.matrixCodes[i][(int) inst.classValue()]);
            }
            weightedInst.setWeight(inst.weight() * k);
            this.ensemble[i].trainOnInstance(weightedInst);
        }
        boolean correctlyClassifies = this.ensemble[i].correctlyClassifies(weightedInst);
        double ErrEstim = this.ADError[i].getEstimation();
        if (this.ADError[i].setInput(correctlyClassifies ? 0 : 1))
            if (this.ADError[i].getEstimation() > ErrEstim)
                Change = true;
    }
    if (Change) {
        numberOfChangesDetected++;
        double max = 0.0;
        int imax = -1;
        for (int i = 0; i < this.ensemble.length; i++) {
            if (max < this.ADError[i].getEstimation()) {
                max = this.ADError[i].getEstimation();
                imax = i;
            }
        }
        if (imax != -1) {
            this.ensemble[imax].resetLearning();
            //this.ensemble[imax].trainOnInstance(inst);
            this.ADError[imax] = new ADWIN((double) this.deltaAdwinOption.getValue());
        }
    }
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.LeveragingSubag.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    int numClasses = inst.numClasses();
    //Output Codes
    if (this.initMatrixCodes == true) {
        this.matrixCodes = new int[this.ensemble.length][inst.numClasses()];
        for (int i = 0; i < this.ensemble.length; i++) {
            int numberOnes;
            int numberZeros;

            do { // until we have the same number of zeros and ones
                numberOnes = 0;/*from  w w w .  ja va  2s  .  c o  m*/
                numberZeros = 0;
                for (int j = 0; j < numClasses; j++) {
                    int result = 0;
                    if (j == 1 && numClasses == 2) {
                        result = 1 - this.matrixCodes[i][0];
                    } else {
                        result = (this.classifierRandom.nextBoolean() ? 1 : 0);
                    }
                    this.matrixCodes[i][j] = result;
                    if (result == 1)
                        numberOnes++;
                    else
                        numberZeros++;
                }
            } while ((numberOnes - numberZeros) * (numberOnes - numberZeros) > (this.ensemble.length % 2));

        }
        this.initMatrixCodes = false;
    }

    boolean Change = false;
    double w = 1.0;
    double mt = 0.0;
    Instance weightedInst = (Instance) inst.copy();

    //Train ensemble of classifiers
    for (int i = 0; i < this.ensemble.length; i++) {
        int k = MiscUtils.poisson(1, this.classifierRandom);
        k = (k > 0) ? (int) this.weightShrinkOption.getValue() : 0;
        if (k > 0) {
            if (this.outputCodesOption.isSet()) {
                weightedInst.setClassValue((double) this.matrixCodes[i][(int) inst.classValue()]);
            }
            weightedInst.setWeight(k);
            this.ensemble[i].trainOnInstance(weightedInst);
        }
        boolean correctlyClassifies = this.ensemble[i].correctlyClassifies(weightedInst);
        double ErrEstim = this.ADError[i].getEstimation();
        if (this.ADError[i].setInput(correctlyClassifies ? 0 : 1))
            if (this.ADError[i].getEstimation() > ErrEstim)
                Change = true;
    }
    if (Change) {
        numberOfChangesDetected++;
        double max = 0.0;
        int imax = -1;
        for (int i = 0; i < this.ensemble.length; i++) {
            if (max < this.ADError[i].getEstimation()) {
                max = this.ADError[i].getEstimation();
                imax = i;
            }
        }
        if (imax != -1) {
            this.ensemble[imax].resetLearning();
            this.ADError[imax] = new ADWIN((double) this.deltaAdwinOption.getValue());
        }
    }
}

From source file:tr.gov.ulakbim.jDenetX.classifiers.OCBoost.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    double d = 1.0;
    int[] m = new int[this.ensemble.length];
    for (int j = 0; j < this.ensemble.length; j++) {
        int j0 = 0; //max(0,j-K)
        pipos[j] = 1.0;//from w  w  w  .  j a  v a2s .c o  m
        pineg[j] = 1.0;
        m[j] = -1;
        if (this.ensemble[j].correctlyClassifies(inst)) {
            m[j] = 1;
        }
        for (int k = j0; k <= j - 1; k++) {
            pipos[j] *= wpos[j][k] / wpos[j][j] * Math.exp(-alphainc[k])
                    + (1.0 - wpos[j][k] / wpos[j][j]) * Math.exp(alphainc[k]);
            pineg[j] *= wneg[j][k] / wneg[j][j] * Math.exp(-alphainc[k])
                    + (1.0 - wneg[j][k] / wneg[j][j]) * Math.exp(alphainc[k]);
        }
        for (int k = 0; k <= j; k++) {
            wpos[j][k] = wpos[j][k] * pipos[j] + d * (m[k] == 1 ? 1 : 0) * (m[j] == 1 ? 1 : 0);
            wneg[j][k] = wneg[j][k] * pineg[j] + d * (m[k] == -1 ? 1 : 0) * (m[j] == -1 ? 1 : 0);
        }
        alphainc[j] = -alpha[j];
        alpha[j] = 0.5 * Math.log(wpos[j][j] / wneg[j][j]);
        alphainc[j] += alpha[j];

        d = d * Math.exp(-alpha[j] * m[j]);

        if (d > 0.0) {
            Instance weightedInst = (Instance) inst.copy();
            weightedInst.setWeight(inst.weight() * d);
            this.ensemble[j].trainOnInstance(weightedInst);
        }
    }
}