Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options.// w  ww. j a  va2 s.  c om
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();
}

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * /*from   ww w  . ja v a  2s  .  c  om*/
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    // used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance
            || m_DistanceFunction instanceof CustomPairWiseDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        // in case of Euclidian distance the centroid is the mean point
        // in case of Manhattan distance the centroid is the median point
        // in both cases, if the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance
                || m_DistanceFunction instanceof CustomPairWiseDistance) {
            // singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                vals[j] = sortedMembers.kthSmallestValue(j, middle + 1);
                if (dataIsEven) {
                    vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedoids.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * /*from   ww  w. jav a  2 s  .c o  m*/
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    if (!updateClusterInfo) {
        vals[0] = 100D;
        return vals;
    }

    double smallestError = Double.MAX_VALUE;
    Instance currentCentroid = null;

    for (int j = 0; j < members.numInstances(); j++) {

        Instance currentInstance = members.instance(j);
        double distanceError = 0D;
        for (int i = 0; i < members.numInstances(); i++) {
            distanceError += m_DistanceFunction.distance(currentInstance, members.instance(i));
        }
        if (distanceError < smallestError) {
            smallestError = distanceError;
            currentCentroid = currentInstance;
        }
    }

    vals[0] = currentCentroid.valueSparse(0);

    for (int j = 0; j < members.numAttributes(); j++) {
        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }

    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:de.upb.timok.oneclassclassifier.WekaSvmClassifier.java

License:Open Source License

@Override
public void train(List<double[]> trainingSamples) {
    Instances data = DatasetTransformationUtils.trainingSetToInstances(trainingSamples);
    // setting class attribute if the data format does not provide this information
    // For example, the XRFF format saves the class attribute information as well
    try {/* w  w w . j a  v  a 2 s  .  com*/
        if (filter != null) {
            filter.setInputFormat(data);
            data = Filter.useFilter(data, filter);
        }
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }
        wekaSvm.buildClassifier(data);
    } catch (final Exception e) {
        logger.error("Unexpected exception", e);
    }

}

From source file:decisiontree.ID3tree.java

private int calculateSplit(Instances inst) {
    Instance tempInst;/*from   ww  w.  j  a va 2  s . c o m*/
    ArrayList<Instance> subset;
    subset = new ArrayList();
    double[] entropy;
    int numAttr = inst.numAttributes();
    entropy = new double[numAttr];
    double tempEnt;
    int numInst = inst.numInstances();
    int splitVal = 5;

    for (int i = 0; i < numInst; i++) {
        tempInst = inst.instance(i);
        subset.add(tempInst);
    }

    for (int j = 0; j < numAttr - 1; j++) {
        tempEnt = calculateEntropy(subset, numAttr, j);
        entropy[j] = tempEnt;
    }
    double temp = 5.0;
    for (int k = 0; k < numAttr - 1; k++) {
        if (temp > entropy[k]) {
            temp = entropy[k];
            splitVal = k;
        }
    }

    return splitVal;
}

From source file:decisiontree.MyC45.java

/**
* Method for building an C45 tree./*from w  w  w  .  j av a 2s. co  m*/
*
* @param instances the training data
* @exception Exception if decision tree can't be built successfully
*/
private void makeTree(Instances instances) throws Exception {

    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[instances.numClasses()];
        return;
    }

    // Compute attribute with maximum gain ratio.
    double[] gainRatios = new double[instances.numAttributes()];
    Enumeration attrEnum = instances.enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
        Attribute attr = (Attribute) attrEnum.nextElement();
        if (attr.isNominal()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr);
        } else if (attr.isNumeric()) {
            gainRatios[attr.index()] = computeGainRatio(instances, attr, computeThreshold(instances, attr));
        }
    }
    m_Attribute = instances.attribute(Utils.maxIndex(gainRatios));

    // Make leaf if gain ratio is zero. 
    // Otherwise create successors.
    if (Utils.eq(gainRatios[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[instances.numClasses()];
        Enumeration instEnum = instances.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = instances.classAttribute();
    } else {
        Instances[] splitData = null;
        int child = 0;
        if (m_Attribute.isNominal()) {
            child = m_Attribute.numValues();
            splitData = splitData(instances, m_Attribute);
        } else if (m_Attribute.isNumeric()) {
            child = 2;
            splitData = splitData(instances, m_Attribute, computeThreshold(instances, m_Attribute));
        }
        m_Successors = new MyC45[child];
        for (int j = 0; j < child; j++) {
            m_Successors[j] = new MyC45();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontree.MyID3.java

private void makeTree(Instances data) {
    // Check if no instances have reached this node.  
    if (data.numInstances() == 0) {
        splitAttr = null;/*from w  w  w . j a va 2s .com*/
        leafValue = Double.NaN;
        leafDist = new double[data.numClasses()];
        return;
    }

    if (data.numDistinctValues(data.classIndex()) == 1) {
        leafValue = data.firstInstance().classValue();
        return;
    }

    // Compute attribute with maximum information gain.  
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    splitAttr = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero.   
    // Otherwise create successors.  
    if (Utils.eq(infoGains[splitAttr.index()], 0)) {
        splitAttr = null;
        leafDist = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            leafDist[(int) inst.classValue()]++;
        }
        normalize(leafDist);
        leafValue = Utils.maxIndex(leafDist);
        classAttr = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, splitAttr);
        child = new MyID3[splitAttr.numValues()];
        for (int j = 0; j < splitAttr.numValues(); j++) {
            child[j] = new MyID3();
            child[j].makeTree(splitData[j]);
        }
    }
}

From source file:decisiontreeclassifier.ITree2.java

/********************************************************************
 * Changes the missing data to 0.0. For the voting data set, this 
 * should be sufficient seeing as 0.00 is more or less random.
 ********************************************************************/
public Instances fixMissingData(Instances iToFix) {
    for (int i = 0; i < iToFix.numInstances(); i++) {
        for (int j = 0; j < iToFix.numAttributes(); j++) {
            if (iToFix.instance(i).isMissing(j)) {
                iToFix.instance(i).setValue(j, 0.0);
            }/* ww w . j  av a  2  s . c  o m*/
        }
    }
    return iToFix;
}

From source file:demo.Demo.java

License:Open Source License

/**
 * @param args//  www  .  ja  va 2  s  .  c o  m
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {

    CSVLoader loader = new CSVLoader();
    System.out.println("Downloading dataset...");
    URL oracle = new URL("http://repository.seasr.org/Datasets/UCI/csv/mushroom.csv");
    File csvFile = File.createTempFile("data-", ".csv");
    BufferedReader in = new BufferedReader(new InputStreamReader(oracle.openStream()));
    PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(csvFile)));
    String inputLine;
    while ((inputLine = in.readLine()) != null) {
        out.println(inputLine);
    }
    in.close();
    out.close();
    System.out.println("Dataset written to: " + csvFile.getAbsolutePath());

    loader.setFile(csvFile);
    loader.setNominalAttributes("first-last");
    Instances instances = loader.getDataSet();
    String[] variablesNames = new String[instances.numAttributes()];
    for (int i = 0; i < variablesNames.length; i++) {
        variablesNames[i] = instances.attribute(i).name();
    }

    ChordalysisModelling modeller = new ChordalysisModelling(0.05);

    System.out.println("Learning...");
    modeller.buildModel(instances);
    DecomposableModel bestModel = modeller.getModel();
    bestModel.display(variablesNames);
    System.out.println("The model selected is:");
    System.out.println(bestModel.toString(variablesNames));
    bestModel.display(variablesNames);

}

From source file:demo.DemoInference.java

License:Open Source License

/**
 * @param args/*from   w  w  w . ja  va2 s .  c o  m*/
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {

    CSVLoader loader = new CSVLoader();
    System.out.println("Downloading dataset...");
    URL oracle = new URL("http://repository.seasr.org/Datasets/UCI/csv/mushroom.csv");
    File csvFile = File.createTempFile("data-", ".csv");
    BufferedReader in = new BufferedReader(new InputStreamReader(oracle.openStream()));
    PrintWriter out = new PrintWriter(new BufferedOutputStream(new FileOutputStream(csvFile)));
    String inputLine;
    while ((inputLine = in.readLine()) != null) {
        out.println(inputLine);
    }
    in.close();
    out.close();
    System.out.println("Dataset written to: " + csvFile.getAbsolutePath());

    loader.setFile(csvFile);
    loader.setNominalAttributes("first-last");
    Instances instances = loader.getDataSet();
    String[] variablesNames = new String[instances.numAttributes()];
    String[][] outcomes = new String[instances.numAttributes()][];
    for (int i = 0; i < variablesNames.length; i++) {
        variablesNames[i] = instances.attribute(i).name();
        outcomes[i] = new String[instances.attribute(i).numValues() + 1];//+1 for missing
        for (int j = 0; j < outcomes[i].length - 1; j++) {
            outcomes[i][j] = instances.attribute(i).value(j);
        }
        outcomes[i][outcomes[i].length - 1] = "missing";
        System.out.println("Dom(" + variablesNames[i] + ") = " + Arrays.toString(outcomes[i]));

    }

    ChordalysisModelling modeller = new ChordalysisModelling(0.05);

    System.out.println("Learning...");
    modeller.buildModel(instances);
    DecomposableModel bestModel = modeller.getModel();
    //      bestModel.display(variablesNames);
    System.out.println("The model selected is:");
    System.out.println(bestModel.toString(variablesNames));

    Inference inference = new Inference(bestModel, variablesNames, outcomes);
    inference.setProbabilities(modeller.getLattice());
    String targetVariable = "population";
    System.out.println("initial beliefs on " + targetVariable + " "
            + Arrays.toString(inference.getBelief(targetVariable)));

    System.out.println("adding evidence poisonous and convex shape");
    inference.addEvidence("class", "e");
    inference.addEvidence("cap-shape", "x");
    inference.recordEvidence();

    System.out.println(
            "beliefs on " + targetVariable + " " + Arrays.toString(inference.getBelief(targetVariable)));

    inference.clearEvidences();
    System.out.println("reset beliefs");
    System.out.println(
            "reset beliefs on " + targetVariable + " " + Arrays.toString(inference.getBelief(targetVariable)));

}