Example usage for weka.core Instance value

List of usage examples for weka.core Instance value

Introduction

In this page you can find the example usage for weka.core Instance value.

Prototype

public double value(Attribute att);

Source Link

Document

Returns an instance's attribute value in internal format.

Usage

From source file:org.wkwk.classifier.MyC45.java

@Override
public double classifyInstance(Instance data) {
    if (splitAttribute == null) {
        return classValue;
    } else {//from w w w  .  j av  a 2s  .com
        if (splitAttribute.isNominal()) {
            return successors[(int) data.value(splitAttribute)].classifyInstance(data);
        } else if (splitAttribute.isNumeric()) {
            if (data.value(splitAttribute) < attrThreshold) {
                return successors[0].classifyInstance(data);
            } else {
                return successors[1].classifyInstance(data);
            }
        } else {
            return -1;
        }
    }
}

From source file:org.wkwk.classifier.MyC45.java

public Instances[] splitData(Instances data, Attribute attr) {
    Instances[] splitData = new Instances[attr.numValues()];
    for (int i = 0; i < attr.numValues(); i++) {
        splitData[i] = new Instances(data, data.numInstances());
    }/* w  ww . j  ava  2  s  .c  o  m*/

    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        splitData[(int) inst.value(attr)].add(inst);
    }
    return splitData;
}

From source file:org.wkwk.classifier.MyC45.java

public double bestThreshold(Instances data, Attribute attr) {
    data.sort(attr);/*  w ww  .  ja  va  2 s.  c  om*/

    double m_ig = 0;
    double bestThr = 0;
    double classTemp = data.get(0).classValue();
    double valueTemp = data.get(0).value(attr);

    Enumeration instEnum = data.enumerateInstances();
    double dt;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (classTemp != inst.classValue()) {
            classTemp = inst.classValue();
            dt = valueTemp;
            valueTemp = inst.value(attr);
            double threshold = dt + ((valueTemp - dt) / 2);
            double igTemp = computeInfoGainCont(data, attr, threshold);
            if (m_ig < igTemp) {
                m_ig = igTemp;
                bestThr = threshold;
            }
        }
    }
    return bestThr;
}

From source file:Part2.HierarchicalClusterer.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *///www  . j  av  a  2s  . com
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    //double alpha = 1;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:Part2.HierarchicalClustererEx.java

License:Open Source License

/** calculate the distance between two clusters 
 * @param cluster1 list of indices of instances in the first cluster
 * @param cluster2 dito for second cluster
 * @return distance between clusters based on link type
 *///w w  w. ja  va  2  s  .  c  o  m
double getDistance(double[][] fDistance, Vector<Integer> cluster1, Vector<Integer> cluster2) {
    double fBestDist = Double.MAX_VALUE;
    //double SemiDist =  m_DistanceFunction.Semi_distance(fDistance, cluster1, cluster2);
    switch (m_nLinkType) {
    case SINGLE:
        // find single link distance aka minimum link, which is the closest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = Double.MAX_VALUE;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist > fDist) {
                    fBestDist = fDist;
                }
            }
        }
        break;
    case COMPLETE:
    case ADJCOMLPETE:
        // find complete link distance aka maximum link, which is the largest distance between
        // any item in cluster1 and any item in cluster2
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fBestDist < fDist) {
                    fBestDist = fDist;
                }
            }
        }
        if (m_nLinkType == COMPLETE) {
            break;
        }
        // calculate adjustment, which is the largest within cluster distance
        double fMaxDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = i + 1; j < cluster1.size(); j++) {
                int i2 = cluster1.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        for (int i = 0; i < cluster2.size(); i++) {
            int i1 = cluster2.elementAt(i);
            for (int j = i + 1; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                double fDist = fDistance[i1][i2];
                if (fMaxDist < fDist) {
                    fMaxDist = fDist;
                }
            }
        }
        fBestDist -= fMaxDist;
        break;
    case AVERAGE:
        // finds average distance between the elements of the two clusters
        fBestDist = 0;
        for (int i = 0; i < cluster1.size(); i++) {
            int i1 = cluster1.elementAt(i);
            for (int j = 0; j < cluster2.size(); j++) {
                int i2 = cluster2.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        fBestDist /= (cluster1.size() * cluster2.size());
        break;
    case MEAN: {
        // calculates the mean distance of a merged cluster (akak Group-average agglomerative clustering)
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        fBestDist = 0;
        for (int i = 0; i < merged.size(); i++) {
            int i1 = merged.elementAt(i);
            for (int j = i + 1; j < merged.size(); j++) {
                int i2 = merged.elementAt(j);
                fBestDist += fDistance[i1][i2];
            }
        }
        int n = merged.size();
        fBestDist /= (n * (n - 1.0) / 2.0);
    }
        break;
    case CENTROID:
        // finds the distance of the centroids of the clusters
        double[] fValues1 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster1.size(); i++) {
            Instance instance = m_instances.instance(cluster1.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues1[j] += instance.value(j);
            }
        }
        double[] fValues2 = new double[m_instances.numAttributes()];
        for (int i = 0; i < cluster2.size(); i++) {
            Instance instance = m_instances.instance(cluster2.elementAt(i));
            for (int j = 0; j < m_instances.numAttributes(); j++) {
                fValues2[j] += instance.value(j);
            }
        }
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            fValues1[j] /= cluster1.size();
            fValues2[j] /= cluster2.size();
        }
        // set up two instances for distance function
        Instance instance1 = (Instance) m_instances.instance(0).copy();
        Instance instance2 = (Instance) m_instances.instance(0).copy();
        for (int j = 0; j < m_instances.numAttributes(); j++) {
            instance1.setValue(j, fValues1[j]);
            instance2.setValue(j, fValues2[j]);
        }
        fBestDist = m_DistanceFunction.distance(instance1, instance2);
        break;
    case WARD: {
        // finds the distance of the change in caused by merging the cluster.
        // The information of a cluster is calculated as the error sum of squares of the
        // centroids of the cluster and its members.
        double ESS1 = calcESS(cluster1);
        double ESS2 = calcESS(cluster2);
        Vector<Integer> merged = new Vector<Integer>();
        merged.addAll(cluster1);
        merged.addAll(cluster2);
        double ESS = calcESS(merged);
        fBestDist = ESS * merged.size() - ESS1 * cluster1.size() - ESS2 * cluster2.size();
    }
        break;
    }
    double alpha = 0.5;
    //return alpha*SemiDist + (1-alpha)*fBestDist;
    return fBestDist;
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * /*from  ww  w.ja v a2 s .c om*/
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        //if ((getInputFormat().attribute(j).type() == Attribute.STRING) 
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    //Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    //Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:probcog.bayesnets.core.WEKADiscretizationFilter.java

License:Open Source License

public String getValueForContinuous(double continuous) {
    Instance inst = new Instance(1);
    inst.setValue(0, continuous);/*  w  w w  . j ava2  s . com*/
    try {
        filter.input(inst);
        filter.batchFinished();
        Instance newInst = filter.output();
        int value = (int) newInst.value(0);
        return outputValues[value];
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}

From source file:probcog.bayesnets.learning.CPTLearner.java

License:Open Source License

/**
 * learns all the examples in the instances. Each instance in the instances represents one example.
 * All the random variables (nodes) in the network
 * need to be found in each instance as columns that are named accordingly, i.e. for each
 * random variable, there must be an attribute with a matching name in the instance. 
 * @param instances         the instances
 * @throws Exception    if the result set is empty
 * @throws SQLException particularly if there is no matching column for one of the node names  
 *//*from  w  ww.jav  a2s  .  co  m*/
public void learn(Instances instances) throws Exception {
    if (!initialized)
        init();

    // if it's an empty result set, throw exception
    if (instances.numInstances() == 0)
        throw new Exception("empty result set!");

    BeliefNode[] nodes = bn.bn.getNodes();
    int numAttributes = instances.numAttributes();
    // Now we can get much more nodes than attributes
    //      if(numAttributes != nodes.length)
    //         throw new Exception("Result does not contain suitable data (attribute count = " + numAttributes + "; node count = " + nodes.length + ")");

    // map node indices to attribute index
    int[] nodeIdx2colIdx = new int[nodes.length];
    Arrays.fill(nodeIdx2colIdx, -1);
    for (int i = 0; i < numAttributes; i++) {
        Set<String> nodeNames = bn.getNodeNamesForAttribute(instances.attribute(i).name());
        //logger.debug("Nodes for attribute "+instances.attribute(i).name()+": "+nodeNames);
        if (nodeNames == null)
            continue;
        for (String nodeName : nodeNames) {
            int node_idx = bn.getNodeIndex(nodeName);
            if (node_idx == -1)
                throw new Exception("Unknown node referenced in result set: " + instances.attribute(i).name());
            nodeIdx2colIdx[node_idx] = i;
        }
    }

    // gather data, iterating over the result set
    int[] domainIndices = new int[nodes.length];
    @SuppressWarnings("unchecked")
    Enumeration<Instance> instanceEnum = instances.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = instanceEnum.nextElement();
        // for each row...
        // - get the indices into the domains of each node
        //   that correspond to the current row of data
        //   (sorted in the same order as the nodes are ordered
        //   in the BeliefNetwork)            
        for (int node_idx = 0; node_idx < nodes.length; node_idx++) {
            int domain_idx;
            if (clusterers[node_idx] == null) {
                Discrete domain = (Discrete) nodes[node_idx].getDomain();
                String strValue;
                if (domain instanceof Discretized) { // If we have a discretized domain we discretize first...
                    int colIdx = nodeIdx2colIdx[node_idx];
                    if (colIdx < 0) {
                        //bn.dump();
                        /*
                        for (int i = 0; i < numAttributes; i++) {
                           logger.debug("Attribute "+i+": "+instances.attribute(i).name());
                        }
                        StringBuffer sb = new StringBuffer();
                        for (int i = 0; i < nodeIdx2colIdx.length; i++) {
                           sb.append(i+"\t");
                        }
                        sb.append("\n");
                        for (int i = 0; i < nodeIdx2colIdx.length; i++) {
                           sb.append(nodeIdx2colIdx[i]+"\t");
                        }
                        logger.debug(sb);
                        */
                        throw new Exception(
                                "No attribute specified for " + bn.bn.getNodes()[node_idx].getName());
                    }
                    double value = instance.value(colIdx);
                    strValue = (((Discretized) domain).getNameFromContinuous(value));
                    /*if (domain.findName(strValue) == -1) {
                       logger.debug(domain);
                       logger.debug(strValue);
                    }*/
                } else {
                    int colIdx = nodeIdx2colIdx[node_idx];
                    if (colIdx < 0) {
                        throw new Exception(
                                "No attribute specified for " + bn.bn.getNodes()[node_idx].getName());
                    }
                    strValue = instance.stringValue(nodeIdx2colIdx[node_idx]);
                }
                domain_idx = domain.findName(strValue);
                if (domain_idx == -1) {
                    /*String[] myDomain = bn.getDiscreteDomainAsArray(bn.bn.getNodes()[node_idx].getName());
                    for (int i=0; i<myDomain.length; i++) {
                       logger.debug(myDomain[i]);
                    }*/
                    throw new Exception(strValue + " not found in domain of " + nodes[node_idx].getName());
                }
            } else {
                Instance inst = new Instance(1);
                inst.setValue(0, instance.value(nodeIdx2colIdx[node_idx]));
                domain_idx = clusterers[node_idx].clusterInstance(inst);
            }
            domainIndices[node_idx] = domain_idx;
        }
        // - update each node's CPT
        for (int i = 0; i < nodes.length; i++) {
            counters[i].count(domainIndices);
        }
    }
}

From source file:probcog.bayesnets.learning.DomainLearner.java

License:Open Source License

/**
 * learns all the examples in the result set. Each row in the result set
 * represents one example. All the random variables (nodes) that have been
 * scheduled for learning in the constructor need to be found in each result
 * row as columns that are named accordingly, i.e. for each random variable
 * for which the domain is to be learnt, there must be a column with a
 * matching name in the result set.//from w w w  . j a v  a 2 s  .  c o  m
 * 
 * @param rs
 *            the result set
 * @throws Exception
 *             if the result set is empty
 * @throws SQLException
 *             particularly if there is no matching column for one of the
 *             node names
 */
public void learn(Instances instances) throws Exception, SQLException {
    // if it's an empty result set, throw exception
    if (instances.numInstances() == 0)
        throw new Exception("empty result set!");

    // gather domain data
    int numDirectDomains = directDomains != null ? directDomains.length : 0;
    int numClusteredDomains = clusteredDomains != null ? clusteredDomains.length : 0;
    @SuppressWarnings("unchecked")
    Enumeration<Instance> instanceEnum = instances.enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
        Instance instance = instanceEnum.nextElement();
        // for direct learning, add outcomes to the set of outcomes
        for (int i = 0; i < numDirectDomains; i++) {
            directDomainData.get(i).add(instance.stringValue(instances.attribute(directDomains[i].getName())));
        }
        // for clustering, gather all instances
        for (int i = 0; i < numClusteredDomains; i++) {
            Instance inst = new Instance(1);
            inst.setValue(attrValue, instance.value(instances.attribute(clusteredDomains[i].nodeName)));
            clusterData[i].add(inst);
        }
    }
}