Example usage for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Backfits the given data into the tree.
 *//*from w  w w  . j a v a 2s  . c  o  m*/
public void backfitData(Instances data) throws Exception {

    // Compute initial class counts
    double[] classProbs = new double[data.numClasses()];
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    // Fit data into tree
    backfitData(data, classProbs);
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Recursively backfits data into the tree.
 * // w  w w  .j  a v a  2s .co m
 * @param data
 *            the data to work with
 * @param classProbs
 *            the class distribution
 * @throws Exception
 *             if generation fails
 */
protected void backfitData(Instances data, double[] classProbs) throws Exception {

    // Make leaf if there are no training instances
    if (data.numInstances() == 0) {
        m_Attribute = -1;
        m_ClassDistribution = null;
        m_Prop = null;
        return;
    }

    // Check if node doesn't contain enough instances or is pure
    // or maximum depth reached
    m_ClassDistribution = classProbs.clone();

    /*
     * if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum ||
     * Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)],
     * Utils .sum(m_ClassDistribution))) {
     * 
     * // Make leaf m_Attribute = -1; m_Prop = null; return; }
     */

    // Are we at an inner node
    if (m_Attribute > -1) {

        // Compute new weights for subsets based on backfit data
        m_Prop = new double[m_Successors.length];
        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            if (!inst.isMissing(m_Attribute)) {
                if (data.attribute(m_Attribute).isNominal()) {
                    m_Prop[(int) inst.value(m_Attribute)] += inst.weight();
                } else {
                    m_Prop[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1] += inst.weight();
                }
            }
        }

        // If we only have missing values we can make this node into a leaf
        if (Utils.sum(m_Prop) <= 0) {
            m_Attribute = -1;
            m_Prop = null;
            return;
        }

        // Otherwise normalize the proportions
        Utils.normalize(m_Prop);

        // Split data
        Instances[] subsets = splitData(data);

        // Go through subsets
        for (int i = 0; i < subsets.length; i++) {

            // Compute distribution for current subset
            double[] dist = new double[data.numClasses()];
            for (int j = 0; j < subsets[i].numInstances(); j++) {
                dist[(int) subsets[i].instance(j).classValue()] += subsets[i].instance(j).weight();
            }

            // Backfit subset
            m_Successors[i].backfitData(subsets[i], dist);
        }

        // If unclassified instances are allowed, we don't need to store the
        // class distribution
        if (getAllowUnclassifiedInstances()) {
            m_ClassDistribution = null;
            return;
        }

        // Otherwise, if all successors are non-empty, we don't need to
        // store the class distribution
        boolean emptySuccessor = false;
        for (int i = 0; i < subsets.length; i++) {
            if (m_Successors[i].m_ClassDistribution == null) {
                emptySuccessor = true;
                return;
            }
        }
        m_ClassDistribution = null;

        // If we have a least two non-empty successors, we should keep this
        // tree
        /*
         * int nonEmptySuccessors = 0; for (int i = 0; i < subsets.length;
         * i++) { if (m_Successors[i].m_ClassDistribution != null) {
         * nonEmptySuccessors++; if (nonEmptySuccessors > 1) { return; } } }
         * 
         * // Otherwise, this node is a leaf or should become a leaf
         * m_Successors = null; m_Attribute = -1; m_Prop = null; return;
         */
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Builds classifier./*from w  w w.  ja v a2s  .  c  om*/
 * 
 * @param data
 *            the data to train with
 * @throws Exception
 *             if something goes wrong or the data doesn't fit
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    // Make sure K value is in range
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    // Figure out appropriate datasets
    Instances train = null;
    Instances backfit = null;
    Random rand = data.getRandomNumberGenerator(m_randomSeed);
    if (m_NumFolds <= 0) {
        train = data;
    } else {
        data.randomize(rand);
        data.stratify(m_NumFolds);
        train = data.trainCV(m_NumFolds, 1, rand);
        backfit = data.testCV(m_NumFolds, 1);
    }

    //Set Default Instances for selection.
    setRequiredInst(data);

    // Create the attribute indices window
    int[] attIndicesWindow = new int[data.numAttributes() - 1];
    int j = 0;
    for (int i = 0; i < attIndicesWindow.length; i++) {
        if (j == data.classIndex())
            j++; // do not include the class
        attIndicesWindow[i] = j++;
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    Instances requiredInstances = getRequiredInst();
    // Build tree
    if (jsontree != null) {
        buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData,
                requiredInstances, listOfFc, cSetList, ccSer, d);
    } else {
        System.out.println("No json tree specified, failing to process tree");
    }
    setRequiredInst(requiredInstances);
    // Backfit if required
    if (backfit != null) {
        backfitData(backfit);
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Computes class distribution for an attribute.
 * /*from   w ww  .  ja  va2 s .  c  om*/
 * @param props
 * @param dists
 * @param att
 *            the attribute index
 * @param data
 *            the data to work with
 * @throws Exception
 *             if something goes wrong
 */
protected HashMap<String, Double> distribution(double[][] props, double[][][] dists, int att, Instances data,
        double givenSplitPoint, HashMap<String, Classifier> custom_classifiers) throws Exception {

    HashMap<String, Double> mp = new HashMap<String, Double>();
    double splitPoint = givenSplitPoint;
    double origSplitPoint = 0;
    Attribute attribute = null;
    double[][] dist = null;
    int indexOfFirstMissingValue = -1;
    String CustomClassifierId = null;
    CustomSet cSet = null;
    if (att >= data.numAttributes() && att < data.numAttributes() + custom_classifiers.size()) {
        CustomClassifierId = getKeyinMap(custom_classifiers, att, data);
    } else if (att >= data.numAttributes() + custom_classifiers.size()) {
        cSet = getReqCustomSet(att - (data.numAttributes() - 1 + custom_classifiers.size()), cSetList);
    } else {
        attribute = data.attribute(att);
    }
    if (CustomClassifierId == null && cSet == null) {
        if (attribute.isNominal()) {
            // For nominal attributes
            dist = new double[attribute.numValues()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = data.instance(i);
                if (inst.isMissing(att)) {

                    // Skip missing values at this stage
                    if (indexOfFirstMissingValue < 0) {
                        indexOfFirstMissingValue = i;
                    }
                    continue;
                }
                dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight();
            }
        } else {

            // For numeric attributes
            double[][] currDist = new double[2][data.numClasses()];
            dist = new double[2][data.numClasses()];

            // Sort data
            data.sort(att);

            // Move all instances into second subset
            for (int j = 0; j < data.numInstances(); j++) {
                Instance inst = data.instance(j);
                if (inst.isMissing(att)) {

                    // Can stop as soon as we hit a missing value
                    indexOfFirstMissingValue = j;
                    break;
                }
                currDist[1][(int) inst.classValue()] += inst.weight();
            }

            // Value before splitting
            double priorVal = priorVal(currDist);

            // Save initial distribution
            for (int j = 0; j < currDist.length; j++) {
                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
            }

            if (Double.isNaN(splitPoint)) {
                // Try all possible split points
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {

                        // Can stop as soon as we hit a missing value
                        break;
                    }

                    // Can we place a sensible split point here?
                    if (inst.value(att) > currSplit) {

                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);

                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {

                            // Store value of current point
                            bestVal = currVal;

                            // Save split point
                            splitPoint = (inst.value(att) + currSplit) / 2.0;
                            origSplitPoint = splitPoint;

                            // Save distribution
                            for (int j = 0; j < currDist.length; j++) {
                                System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                            }
                        }
                    }
                    currSplit = inst.value(att);

                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                }
            } else {
                double currSplit = data.instance(0).value(att);
                double currVal, bestVal = -Double.MAX_VALUE;
                // Split data set using given split point.
                for (int i = 0; i < data.numInstances(); i++) {
                    Instance inst = data.instance(i);
                    if (inst.isMissing(att)) {
                        // Can stop as soon as we hit a missing value
                        break;
                    }
                    if (inst.value(att) > currSplit) {
                        // Compute gain for split point
                        currVal = gain(currDist, priorVal);
                        // Is the current split point the best point so far?
                        if (currVal > bestVal) {
                            // Store value of current point
                            bestVal = currVal;
                            // Save computed split point
                            origSplitPoint = (inst.value(att) + currSplit) / 2.0;
                        }
                    }
                    currSplit = inst.value(att);
                    // Shift over the weight
                    currDist[0][(int) inst.classValue()] += inst.weight();
                    currDist[1][(int) inst.classValue()] -= inst.weight();
                    if (inst.value(att) <= splitPoint) {
                        // Save distribution since split point is specified
                        for (int j = 0; j < currDist.length; j++) {
                            System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
                        }
                    }
                }
            }
        }
    } else if (CustomClassifierId != null) {
        Classifier fc = custom_classifiers.get(CustomClassifierId);
        dist = new double[data.numClasses()][data.numClasses()];
        Instance inst;
        for (int i = 0; i < data.numInstances(); i++) {
            inst = data.instance(i);
            double predictedClass = fc.classifyInstance(inst);
            if (predictedClass != Instance.missingValue()) {
                dist[(int) predictedClass][(int) inst.classValue()] += inst.weight();
            }
        }
    } else if (cSet != null) {
        dist = new double[data.numClasses()][data.numClasses()];
        JsonNode vertices = mapper.readTree(cSet.getConstraints());
        ArrayList<double[]> attrVertices = generateVerticesList(vertices);
        List<Attribute> aList = generateAttributeList(cSet, data, d);
        double[] testPoint = new double[2];
        int ctr = 0;
        for (int k = 0; k < data.numInstances(); k++) {
            testPoint = new double[2];
            ctr = 0;
            for (Attribute a : aList) {
                if (!data.instance(k).isMissing(a)) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
            }
            int check = checkPointInPolygon(attrVertices, testPoint);
            dist[check][(int) data.instance(k).classValue()] += data.instance(k).weight();
        }
    }

    // Compute weights for subsetsCustomClassifierIndex
    props[att] = new double[dist.length];
    for (int k = 0; k < props[att].length; k++) {
        props[att][k] = Utils.sum(dist[k]);
    }
    if (Utils.eq(Utils.sum(props[att]), 0)) {
        for (int k = 0; k < props[att].length; k++) {
            props[att][k] = 1.0 / props[att].length;
        }
    } else {
        Utils.normalize(props[att]);
    }

    // Any instances with missing values ?
    if (indexOfFirstMissingValue > -1) {

        // Distribute weights for instances with missing values
        for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) {
            Instance inst = data.instance(i);
            if (attribute.isNominal()) {

                // Need to check if attribute value is missing
                if (inst.isMissing(att)) {
                    for (int j = 0; j < dist.length; j++) {
                        dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                    }
                }
            } else {

                // Can be sure that value is missing, so no test required
                for (int j = 0; j < dist.length; j++) {
                    dist[j][(int) inst.classValue()] += props[att][j] * inst.weight();
                }
            }
        }
    }

    // Return distribution and split point
    dists[att] = dist;
    mp.put("split_point", splitPoint);
    mp.put("orig_split_point", origSplitPoint);
    return mp;
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Splits instances into subsets based on the given split.
 * /*from ww  w  .j a  va  2s.co  m*/
 * @param data
 *            the data to work with
 * @return the subsets of instances
 * @throws Exception
 *             if something goes wrong
 */
protected Instances[] splitData(Instances data) throws Exception {

    // Allocate array of Instances objects
    Instances[] subsets = new Instances[m_Prop.length];
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i] = new Instances(data, data.numInstances());
    }

    if (m_Attribute >= data.numAttributes()) {
        if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) {
            CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()),
                    cSetList);
            JsonNode vertices = mapper.readTree(cSet.getConstraints());
            ArrayList<double[]> attrVertices = generateVerticesList(vertices);
            List<Attribute> aList = generateAttributeList(cSet, data, d);
            double[] testPoint = new double[2];
            int ctr = 0;
            for (int k = 0; k < data.numInstances(); k++) {
                ctr = 0;
                for (Attribute a : aList) {
                    testPoint[ctr] = data.instance(k).value(a);
                    ctr++;
                }
                int check = checkPointInPolygon(attrVertices, testPoint);
                subsets[check].add(data.instance(k));
                continue;
            }
        } else {
            Classifier fc;
            double predictedClass;
            // Go through the data
            for (int i = 0; i < data.numInstances(); i++) {

                // Get instance
                Instance inst = data.instance(i);
                String classifierId = getKeyinMap(listOfFc, m_Attribute, data);
                fc = listOfFc.get(classifierId);
                predictedClass = fc.classifyInstance(inst);
                if (predictedClass != Instance.missingValue()) {
                    subsets[(int) predictedClass].add(inst);
                    continue;
                }

                // Else throw an exception
                throw new IllegalArgumentException("Unknown attribute type");
            }
        }
    } else {
        // Go through the data
        for (int i = 0; i < data.numInstances(); i++) {

            // Get instance
            Instance inst = data.instance(i);

            // Does the instance have a missing value?
            if (inst.isMissing(m_Attribute)) {

                // Split instance up
                for (int k = 0; k < m_Prop.length; k++) {
                    if (m_Prop[k] > 0) {
                        Instance copy = (Instance) inst.copy();
                        copy.setWeight(m_Prop[k] * inst.weight());
                        subsets[k].add(copy);
                    }
                }

                // Proceed to next instance
                continue;
            }

            // Do we have a nominal attribute?
            if (data.attribute(m_Attribute).isNominal()) {
                subsets[(int) inst.value(m_Attribute)].add(inst);

                // Proceed to next instance
                continue;
            }

            // Do we have a numeric attribute?
            if (data.attribute(m_Attribute).isNumeric()) {
                subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);

                // Proceed to next instance
                continue;
            }

            // Else throw an exception
            throw new IllegalArgumentException("Unknown attribute type");
        }
    }

    // Save memory
    for (int i = 0; i < m_Prop.length; i++) {
        subsets[i].compactify();
    }

    // Return the subsets
    return subsets;
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from   w  w  w  .j av a2  s .  c om
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        //if ((getInputFormat().attribute(j).type() == Attribute.STRING) 
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    //Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    //Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:resample.OverSubsample.java

License:Open Source License

/**
 * Creates a subsample of the current set of input instances. The output
 * instances are pushed onto the output queue for collection.
 *//*from   w ww  .j a  va  2 s  .  c  om*/
private void createSubsample() {

    int classI = getInputFormat().classIndex();
    // Sort according to class attribute.
    getInputFormat().sort(classI);
    // Determine where each class starts in the sorted dataset
    int[] classIndices = getClassIndices();

    // Get the existing class distribution
    int[] counts = new int[getInputFormat().numClasses()];
    double[] weights = new double[getInputFormat().numClasses()];
    int max = -1;
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance current = getInputFormat().instance(i);
        if (current.classIsMissing() == false) {
            counts[(int) current.classValue()]++;
            weights[(int) current.classValue()] += current.weight();
        }
    }

    // Convert from total weight to average weight
    for (int i = 0; i < counts.length; i++) {
        if (counts[i] > 0) {
            weights[i] = weights[i] / counts[i];
        }
        /*
        System.err.println("Class:" + i + " " + getInputFormat().classAttribute().value(i)
                 + " Count:" + counts[i]
                 + " Total:" + weights[i] * counts[i]
                 + " Avg:" + weights[i]);
         */
    }

    // find the class with the minimum number of instances
    int maxIndex = -1;
    for (int i = 0; i < counts.length; i++) {
        if ((max < 0) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        } else if ((counts[i] > max) && (counts[i] > 0)) {
            max = counts[i];
            maxIndex = i;
        }
    }

    if (max < 0) {
        System.err.println("SpreadSubsample: *warning* none of the classes have any values in them.");
        return;
    }

    // determine the new distribution 
    int[] new_counts = new int[getInputFormat().numClasses()];
    for (int i = 0; i < counts.length; i++) {
        new_counts[i] = (int) Math.abs(Math.max(counts[i], max * m_DistributionSpread));
        if (i == maxIndex) {
            if (m_DistributionSpread > 0 && m_DistributionSpread < 1.0) {
                // don't undersample the majority class!
                new_counts[i] = counts[i];
            }
        }
        if (m_DistributionSpread == 0) {
            new_counts[i] = counts[i];
        }

        if (m_MaxCount > 0) {
            new_counts[i] = Math.min(new_counts[i], m_MaxCount);
        }
    }

    // Sample with replacement
    Random random = new Random(m_RandomSeed);
    //Hashtable t = new Hashtable();
    for (int j = 0; j < new_counts.length; j++) {
        double newWeight = 1.0;
        if (m_AdjustWeights && (new_counts[j] > 0)) {
            newWeight = weights[j] * counts[j] / new_counts[j];
            /*
            System.err.println("Class:" + j + " " + getInputFormat().classAttribute().value(j) 
                   + " Count:" + counts[j]
                   + " Total:" + weights[j] * counts[j]
                   + " Avg:" + weights[j]
                   + " NewCount:" + new_counts[j]
                   + " NewAvg:" + newWeight);
             */
        }
        int index = -1;
        for (int k = 0; k < new_counts[j]; k++) {
            //boolean ok = false;
            //do {
            index = classIndices[j] + (Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j]));
            // Have we used this instance before?
            //if (t.get("" + index) == null) {
            // if not, add it to the hashtable and use it
            //t.put("" + index, "");
            //ok = true;
            if (index >= 0) {
                Instance newInst = (Instance) getInputFormat().instance(index).copy();
                if (m_AdjustWeights) {
                    newInst.setWeight(newWeight);
                }
                push(newInst);
            }
            //}
            //} while (!ok);
        }
    }
}

From source file:svmal.SVMStrategy.java

public static Instances InstancesToInstances2(Instances insts) {
    Instances result = new Instances(insts, 0, 0);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance orig = insts.get(i);
        Instance2 inst2 = new Instance2(orig.weight(), orig.toDoubleArray());
        inst2.setDataset(result);/*from  www .j a va  2  s .c om*/
        result.add(inst2);
    }
    return result;
}

From source file:test.org.moa.opencl.IBk.java

License:Open Source License

/**
 * Turn the list of nearest neighbors into a probability distribution.
 *
 * @param neighbours the list of nearest neighboring instances
 * @param distances the distances of the neighbors
 * @return the probability distribution//from  w  w w. jav a 2 s .c  om
 * @throws Exception if computation goes wrong or has no class attribute
 */
protected double[] makeDistribution(Instances neighbours, double[] distances) throws Exception {

    double total = 0, weight;
    double[] distribution = new double[m_NumClasses];

    // Set up a correction to the estimator
    if (m_ClassType == Attribute.NOMINAL) {
        for (int i = 0; i < m_NumClasses; i++) {
            distribution[i] = 1.0 / Math.max(1, m_Train.numInstances());
        }
        total = (double) m_NumClasses / Math.max(1, m_Train.numInstances());
    }

    for (int i = 0; i < neighbours.numInstances(); i++) {
        // Collect class counts
        Instance current = neighbours.instance(i);
        distances[i] = distances[i] * distances[i];
        distances[i] = Math.sqrt(distances[i] / m_NumAttributesUsed);
        switch (m_DistanceWeighting) {
        case WEIGHT_INVERSE:
            weight = 1.0 / (distances[i] + 0.001); // to avoid div by zero
            break;
        case WEIGHT_SIMILARITY:
            weight = 1.0 - distances[i];
            break;
        default: // WEIGHT_NONE:
            weight = 1.0;
            break;
        }
        weight *= current.weight();
        try {
            switch (m_ClassType) {
            case Attribute.NOMINAL:
                distribution[(int) current.classValue()] += weight;
                break;
            case Attribute.NUMERIC:
                distribution[0] += current.classValue() * weight;
                break;
            }
        } catch (Exception ex) {
            throw new Error("Data has no class attribute!");
        }
        total += weight;
    }

    // Normalise distribution
    if (total > 0) {
        Utils.normalize(distribution, total);
    }
    return distribution;
}

From source file:themeextractor.filters.MauiFilter.java

License:Open Source License

/**
 * Builds the classifier./*from ww  w.  j a  v  a 2  s.c  o m*/
 */
private void buildClassifier() throws Exception {

    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Term_frequency")); // 2
            atts.addElement(new Attribute("IDF")); // 
            atts.addElement(new Attribute("TFxIDF")); // 
            atts.addElement(new Attribute("First_occurrence")); // 
            atts.addElement(new Attribute("Last_occurrence")); // 
            atts.addElement(new Attribute("Spread")); // 
            atts.addElement(new Attribute("Domain_keyphraseness")); // 
            atts.addElement(new Attribute("Length")); //
            atts.addElement(new Attribute("Generality")); //
            atts.addElement(new Attribute("Node_degree")); // 
            atts.addElement(new Attribute("Semantic_relatedness")); // 
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 
            atts.addElement(new Attribute("Inverse_Wikip_frequency")); // 
            atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        }
    }

    classifierData = new Instances("ClassifierData", atts, 0);

    classifierData.setClassIndex(numFeatures);

    if (debugMode) {
        System.err.println("--- Converting instances for classifier");
    }
    int totalDocuments = getInputFormat().numInstances();
    // Convert pending input instances into data for classifier
    for (int i = 0; i < totalDocuments; i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(keyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases);

        // Get the phrases for the document
        HashMap<String, Candidate> candidateList = allCandidates.get(current);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier
        int countPos = 0;
        int countNeg = 0;

        if (debugMode) {
            System.err
                    .println("--- Computing features for document " + i + " out of " + totalDocuments + "...");
        }

        for (Candidate candidate : candidateList.values()) {

            // ignore all candidates that appear less than a threshold
            if (candidate.getFrequency() < minOccurFrequency) {
                continue;
            }

            // compute feature values
            double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList);

            if (vals[vals.length - 1] == 0) {
                countNeg++;
            } else {
                countPos++;
            }
            Instance inst = new Instance(current.weight(), vals);
            // System.out.println(candidate + "\t" + inst);
            classifierData.add(inst);

        }
        if (debugMode) {
            System.err.println(countPos + " positive; " + countNeg + " negative instances");
        }
    }

    if (debugMode) {
        System.err.println("--- Building classifier");
    }

    if (classifier == null) {
        // Build classifier
        if (nominalClassValue) {

            //         FilteredClassifier fclass = new FilteredClassifier();
            //         fclass.setClassifier(new NaiveBayesSimple());
            //         fclass.setFilter(new Discretize());
            //         classifier = fclass;

            classifier = new Bagging(); // try also //
            classifier.setOptions(
                    Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));

        } else {

            classifier = new Bagging();
            // try also
            // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W
            // weka.classifiers.trees.J48 -- -U -M 2")) ;
            String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0";
            String[] options = Utils.splitOptions(optionsString);
            classifier.setOptions(options);

        }
    }

    classifier.buildClassifier(classifierData);

    if (debugMode) {
        System.err.println(classifier);
    }

    // Save space
    classifierData = new Instances(classifierData, 0);
}