Example usage for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:org.wkwk.classifier.MyC45.java

@Override
public double classifyInstance(Instance data) {
    if (splitAttribute == null) {
        return classValue;
    } else {//from w w w  .  j av  a 2s  .com
        if (splitAttribute.isNominal()) {
            return successors[(int) data.value(splitAttribute)].classifyInstance(data);
        } else if (splitAttribute.isNumeric()) {
            if (data.value(splitAttribute) < attrThreshold) {
                return successors[0].classifyInstance(data);
            } else {
                return successors[1].classifyInstance(data);
            }
        } else {
            return -1;
        }
    }
}

From source file:org.wkwk.classifier.MyC45.java

public Instances[] splitData(Instances data, Attribute attr) {
    Instances[] splitData = new Instances[attr.numValues()];
    for (int i = 0; i < attr.numValues(); i++) {
        splitData[i] = new Instances(data, data.numInstances());
    }/* w  ww . j  ava  2  s  .c  o  m*/

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(attr)].add(inst);
    }
    return splitData;
}

From source file:org.wkwk.classifier.MyC45.java

public double bestThreshold(Instances data, Attribute attr) {
    data.sort(attr);/*  w ww  .  ja  va  2 s.  c  om*/

    double m_ig = 0;
    double bestThr = 0;
    double classTemp = data.get(0).classValue();
    double valueTemp = data.get(0).value(attr);

    Enumeration instEnum = data.enumerateInstances();
    double dt;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (classTemp != inst.classValue()) {
            classTemp = inst.classValue();
            dt = valueTemp;
            valueTemp = inst.value(attr);
            double threshold = dt + ((valueTemp - dt) / 2);
            double igTemp = computeInfoGainCont(data, attr, threshold);
            if (m_ig < igTemp) {
                m_ig = igTemp;
                bestThr = threshold;
            }
        }
    }
    return bestThr;
}

From source file:Part2.HierarchicalClusterer.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *///www  . j  av  a  2s  . com
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    //double alpha = 1;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:Part2.HierarchicalClustererEx.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *///w w  w. ja  va  2  s  .  c  o  m
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    double alpha = 0.5;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * /*from  ww  w.ja v a2 s .c om*/
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        //if ((getInputFormat().attribute(j).type() == Attribute.STRING) 
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    //Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    //Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:probcog.bayesnets.core.WEKADiscretizationFilter.java

License:Open Source License

public String getValueForContinuous(double continuous) {
    Instance inst = new Instance(1);
    inst.setValue(0, continuous);/*  w  w w  . j ava2  s . com*/
    try {
        filter.input(inst);
        filter.batchFinished();
        Instance newInst = filter.output();
        int value = (int) newInst.value(0);
        return outputValues[value];
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}

From source file:probcog.bayesnets.learning.CPTLearner.java

License:Open Source License

/**
 * learns all the examples in the instances. Each instance in the instances represents one example.
 * All the random variables (nodes) in the network
 * need to be found in each instance as columns that are named accordingly, i.e. for each
 * random variable, there must be an attribute with a matching name in the instance. 
 * @param instances         the instances
 * @throws Exception    if the result set is empty
 * @throws SQLException particularly if there is no matching column for one of the node names  
 *//*from  w  ww.jav  a2s  .  co  m*/
public void learn(Instances instances) throws Exception {
    if (!initialized)
        init();

    // if it's an empty result set, throw exception
    if (instances.numInstances() == 0)
        throw new Exception("empty result set!");

    BeliefNode[] nodes = bn.bn.getNodes();
    int numAttributes = instances.numAttributes();
    // Now we can get much more nodes than attributes
    //      if(numAttributes != nodes.length)
    //         throw new Exception("Result does not contain suitable data (attribute count = " + numAttributes + "; node count = " + nodes.length + ")");

    // map node indices to attribute index
    int[] nodeIdx2colIdx = new int[nodes.length];
    Arrays.fill(nodeIdx2colIdx, -1);
    for (int i = 0; i < numAttributes; i++) {
        Set<String> nodeNames = bn.getNodeNamesForAttribute(instances.attribute(i).name());
        //logger.debug("Nodes for attribute "+instances.attribute(i).name()+": "+nodeNames);
        if (nodeNames == null)
            continue;
        for (String nodeName : nodeNames) {
            int node_idx = bn.getNodeIndex(nodeName);
            if (node_idx == -1)
                throw new Exception("Unknown node referenced in result set: " + instances.attribute(i).name());
            nodeIdx2colIdx[node_idx] = i;
        }
    }

    // gather data, iterating over the result set
    int[] domainIndices = new int[nodes.length];
    @SuppressWarnings("unchecked")
    Enumeration<Instance> instanceEnum = instances.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = instanceEnum.nextElement();
        // for each row...
        // - get the indices into the domains of each node
        //   that correspond to the current row of data
        //   (sorted in the same order as the nodes are ordered
        //   in the BeliefNetwork)            
        for (int node_idx = 0; node_idx < nodes.length; node_idx++) {
            int domain_idx;
            if (clusterers[node_idx] == null) {
                Discrete domain = (Discrete) nodes[node_idx].getDomain();
                String strValue;
                if (domain instanceof Discretized) { // If we have a discretized domain we discretize first...
                    int colIdx = nodeIdx2colIdx[node_idx];
                    if (colIdx < 0) {
                        //bn.dump();
                        /*
                        for (int i = 0; i < numAttributes; i++) {
                           logger.debug("Attribute "+i+": "+instances.attribute(i).name());
                        }
                        StringBuffer sb = new StringBuffer();
                        for (int i = 0; i < nodeIdx2colIdx.length; i++) {
                           sb.append(i+"\t");
                        }
                        sb.append("\n");
                        for (int i = 0; i < nodeIdx2colIdx.length; i++) {
                           sb.append(nodeIdx2colIdx[i]+"\t");
                        }
                        logger.debug(sb);
                        */
                        throw new Exception(
                                "No attribute specified for " + bn.bn.getNodes()[node_idx].getName());
                    }
                    double value = instance.value(colIdx);
                    strValue = (((Discretized) domain).getNameFromContinuous(value));
                    /*if (domain.findName(strValue) == -1) {
                       logger.debug(domain);
                       logger.debug(strValue);
                    }*/
                } else {
                    int colIdx = nodeIdx2colIdx[node_idx];
                    if (colIdx < 0) {
                        throw new Exception(
                                "No attribute specified for " + bn.bn.getNodes()[node_idx].getName());
                    }
                    strValue = instance.stringValue(nodeIdx2colIdx[node_idx]);
                }
                domain_idx = domain.findName(strValue);
                if (domain_idx == -1) {
                    /*String[] myDomain = bn.getDiscreteDomainAsArray(bn.bn.getNodes()[node_idx].getName());
                    for (int i=0; i<myDomain.length; i++) {
                       logger.debug(myDomain[i]);
                    }*/
                    throw new Exception(strValue + " not found in domain of " + nodes[node_idx].getName());
                }
            } else {
                Instance inst = new Instance(1);
                inst.setValue(0, instance.value(nodeIdx2colIdx[node_idx]));
                domain_idx = clusterers[node_idx].clusterInstance(inst);
            }
            domainIndices[node_idx] = domain_idx;
        }
        // - update each node's CPT
        for (int i = 0; i < nodes.length; i++) {
            counters[i].count(domainIndices);
        }
    }
}

From source file:probcog.bayesnets.learning.DomainLearner.java

License:Open Source License

/**
 * learns all the examples in the result set. Each row in the result set
 * represents one example. All the random variables (nodes) that have been
 * scheduled for learning in the constructor need to be found in each result
 * row as columns that are named accordingly, i.e. for each random variable
 * for which the domain is to be learnt, there must be a column with a
 * matching name in the result set.//from w w w  . j a v  a 2 s  .  c o  m
 * 
 * @param rs
 *            the result set
 * @throws Exception
 *             if the result set is empty
 * @throws SQLException
 *             particularly if there is no matching column for one of the
 *             node names
 */
public void learn(Instances instances) throws Exception, SQLException {
    // if it's an empty result set, throw exception
    if (instances.numInstances() == 0)
        throw new Exception("empty result set!");

    // gather domain data
    int numDirectDomains = directDomains != null ? directDomains.length : 0;
    int numClusteredDomains = clusteredDomains != null ? clusteredDomains.length : 0;
    @SuppressWarnings("unchecked")
    Enumeration<Instance> instanceEnum = instances.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = instanceEnum.nextElement();
        // for direct learning, add outcomes to the set of outcomes
        for (int i = 0; i < numDirectDomains; i++) {
            directDomainData.get(i).add(instance.stringValue(instances.attribute(directDomains[i].getName())));
        }
        // for clustering, gather all instances
        for (int i = 0; i < numClusteredDomains; i++) {
            Instance inst = new Instance(1);
            inst.setValue(attrValue, instance.value(instances.attribute(clusteredDomains[i].nodeName)));
            clusterData[i].add(inst);
        }
    }
}