Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:adams.ml.data.WekaConverter.java

License:Open Source License

/**
 * Turns an ADAMS dataset row into a Weka Instance.
 *
 * @param data   the dataset to use as template
 * @param row      the row to convert//  w  w  w.  ja va2 s. c  o m
 * @return      the generated instance
 * @throws Exception   if conversion fails
 */
public static Instance toInstance(Instances data, Row row) throws Exception {
    Instance result;
    double[] values;
    int i;
    Cell cell;
    Attribute att;

    values = new double[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
        values[i] = Utils.missingValue();

        if (!row.hasCell(i))
            continue;
        cell = row.getCell(i);
        if (cell.isMissing())
            continue;

        att = data.attribute(i);
        switch (att.type()) {
        case Attribute.NUMERIC:
            values[i] = cell.toDouble();
            break;
        case Attribute.DATE:
            values[i] = cell.toAnyDateType().getTime();
            break;
        case Attribute.NOMINAL:
            values[i] = att.indexOfValue(cell.getContent());
            break;
        case Attribute.STRING:
            values[i] = att.addStringValue(cell.getContent());
            break;
        default:
            throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att));
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(data);

    return result;
}

From source file:adams.opt.cso.HermioneSimple.java

License:Open Source License

/**
 * For testing only./*from   w  ww  .ja  v a2 s.  c o m*/
 *
 * @param args   the dataset to use
 * @throws Exception   if something fails
 */
public static void main(String[] args) throws Exception {
    Environment.setEnvironmentClass(Environment.class);
    Instances data = DataSource.read(args[0]);
    if (data.classIndex() == -1)
        data.setClassIndex(data.numAttributes() - 1);
    MaxIterationsWithoutImprovement stopping = new MaxIterationsWithoutImprovement();
    stopping.setNumIterations(2);
    stopping.setMinimumImprovement(0.001);
    stopping.setLoggingLevel(LoggingLevel.INFO);
    HermioneSimple simple = new HermioneSimple();
    simple.setEvalParallel(true);
    simple.setMeasure(Measure.CC);
    simple.setStopping(stopping);
    simple.setLoggingLevel(LoggingLevel.INFO);
    simple.setInstances(data);
    /*
    simple.setClassifier(new GPD());
    simple.setHandlers(new AbstractCatSwarmOptimizationDiscoveryHandler[]{
      new GPDGamma(),
      new GPDNoise(),
    });
    */
    LinearRegressionJ cls = new LinearRegressionJ();
    cls.setEliminateColinearAttributes(false);
    cls.setAttributeSelectionMethod(
            new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION));
    simple.setClassifier(new LinearRegressionJ());
    GenericDouble ridge = new GenericDouble();
    ridge.setClassname(new BaseClassname(cls.getClass()));
    ridge.setProperty("ridge");
    ridge.setMinimum(1e-8);
    ridge.setMaximum(1);
    simple.setHandlers(new AbstractCatSwarmOptimizationDiscoveryHandler[] { ridge, });
    DoubleMatrix best = simple.run();
    System.out.println(best);
}

From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java

License:Open Source License

protected void initMinMax(Instances data) {
    m_Min = new double[data.numAttributes()];
    m_Max = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        m_Min[i] = m_Max[i] = Double.NaN;
    }/*from  w ww . j a  va2s  .co  m*/

    for (int i = 0; i < data.numInstances(); i++) {
        updateMinMax(data.instance(i));
    }
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that
 * are not being set via options./*from w ww.j  av  a2 s  .  c  om*/
 * 
 * @param data set of instances serving as training data
 * @throws Exception if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                                                     // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder) {
        m_Assignments = clusterAssignments;
    }

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder) {
        initInstances = new Instances(instances);
    } else {
        initInstances = instances;
    }

    for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
        instIndex = RandomO.nextInt(j + 1);
        hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true);
        if (!initC.containsKey(hk)) {
            m_ClusterCentroids.add(initInstances.instance(instIndex));
            initC.put(hk, null);
        }
        initInstances.swap(j, instIndex);

        if (m_ClusterCentroids.numInstances() == m_NumClusters) {
            break;
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        for (i = 0; i < instances.numInstances(); i++) {
            Instance toCluster = instances.instance(i);
            int newC = clusterProcessedInstance(toCluster, true);
            if (newC != clusterAssignments[i]) {
                converged = false;
            }
            clusterAssignments[i] = newC;
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        for (i = 0; i < m_NumClusters; i++) {
            if (tempI[i].numInstances() == 0) {
                // empty cluster
                emptyClusterCount++;
            } else {
                moveCentroid(i, tempI[i], true);
            }
        }

        if (m_Iterations == m_MaxIterations) {
            converged = true;
        }

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index] = tempI[k];

                        for (i = 0; i < tempI[k].numAttributes(); i++) {
                            m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i];
                        }
                        index++;
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_squaredErrors = new double[m_NumClusters];
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Instance.missingValue();
                }
            }
            m_ClusterStdDevs.add(new Instance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    // Save memory!!
    m_DistanceFunction.clean();

    if (!SESAME.SESAME_GUI)
        MyFirstClusterer.weka_gui = true;
}

From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * /* w w  w.  j  a  v a  2  s  .c  o  m*/
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {

        // The centroid is the mean point. If the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir
                || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir
                || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir
                || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir
                || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:affective.core.ArffLexiconEvaluator.java

License:Open Source License

/**
 * Processes  all the dictionary files.//from  w  ww  . ja  va 2 s.  c om
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    List<Attribute> numericAttributes = new ArrayList<Attribute>();
    List<Attribute> nominalAttributes = new ArrayList<Attribute>();

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric()) {
                numericAttributes.add(lexInstances.attribute(i));
                // adds the attribute name to the message-level features to be calculated
                this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name());
            }

            else if (lexInstances.attribute(i).isNominal()) {
                nominalAttributes.add(lexInstances.attribute(i));
                // adds the attribute name together with the nominal value to the message-level features to be calculated
                int numValues = lexInstances.attribute(i).numValues();
                for (int j = 0; j < numValues; j++)
                    this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name() + "-"
                            + lexInstances.attribute(i).value(j));

            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!numericAttributes.isEmpty()) {
                Map<String, Double> wordVals = new HashMap<String, Double>();
                for (Attribute na : numericAttributes) {
                    if (!weka.core.Utils.isMissingValue(inst.value(na)))
                        wordVals.put(na.name(), inst.value(na));
                }
                this.numDict.put(word, wordVals);
            }

            // map nominal associations
            if (!nominalAttributes.isEmpty()) {
                Map<String, String> wordCounts = new HashMap<String, String>();
                for (Attribute no : nominalAttributes) {
                    if (!weka.core.Utils.isMissingValue(inst.value(no))) {
                        wordCounts.put(no.name(), no.value((int) inst.value(no)));
                    }

                    this.nomDict.put(word, wordCounts);

                }

            }

        }

    }

}

From source file:affective.core.ArffLexiconWordLabeller.java

License:Open Source License

/**
 * Processes  all the dictionary files./*from ww w. j a  v a  2s . c om*/
 * @throws IOException  an IOException will be raised if an invalid file is supplied
 */
public void processDict() throws IOException {
    BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile));
    Instances lexInstances = new Instances(reader);

    // set upper value for word index
    lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1);

    // checks all numeric and nominal attributes and discards the word attribute
    for (int i = 0; i < lexInstances.numAttributes(); i++) {

        if (i != this.lexiconWordIndex.getIndex()) {
            if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) {
                this.attributes.add(lexInstances.attribute(i));
            }

        }

    }

    // Maps all words with their affective scores discarding missing values
    for (Instance inst : lexInstances) {
        if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) {
            String word = inst.stringValue(this.lexiconWordIndex.getIndex());
            // stems the word
            word = this.m_stemmer.stem(word);

            // map numeric scores
            if (!attributes.isEmpty()) {
                Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>();
                for (Attribute na : attributes) {
                    wordVals.put(na, inst.value(na));
                }
                this.attValMap.put(word, wordVals);
            }

        }

    }

}

From source file:agnes.AgnesMain.java

public static Instances loadData(String filePath) {
    BufferedReader reader;//from   w  w  w  .j a va  2 s.  co  m
    Instances data = null;
    try {
        reader = new BufferedReader(new FileReader(filePath));
        data = new Instances(reader);
        reader.close();
        data.setClassIndex(data.numAttributes() - 1);
    } catch (Exception e) {

    }
    return data;
}

From source file:ai.BalancedRandomForest.java

License:GNU General Public License

/**
 * Build Balanced Random Forest//from w  ww  .  ja v a  2  s  .c  om
 */
public void buildClassifier(final Instances data) throws Exception {
    // If number of features is 0 then set it to log2 of M (number of attributes)
    if (numFeatures < 1)
        numFeatures = (int) Utils.log2(data.numAttributes()) + 1;
    // Check maximum number of random features
    if (numFeatures >= data.numAttributes())
        numFeatures = data.numAttributes() - 1;

    // Initialize array of trees
    tree = new BalancedRandomTree[numTrees];

    // total number of instances
    final int numInstances = data.numInstances();
    // total number of classes
    final int numClasses = data.numClasses();

    final ArrayList<Integer>[] indexSample = new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++)
        indexSample[i] = new ArrayList<Integer>();

    //System.out.println("numClasses = " + numClasses);

    // fill indexSample with the indices of each class
    for (int i = 0; i < numInstances; i++) {
        //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue());
        indexSample[(int) data.get(i).classValue()].add(i);
    }

    final Random random = new Random(seed);

    // Executor service to run concurrent trees
    final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());

    List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees);

    final boolean[][] inBag = new boolean[numTrees][numInstances];

    try {
        for (int i = 0; i < numTrees; i++) {
            final ArrayList<Integer> bagIndices = new ArrayList<Integer>();

            // Randomly select the indices in a balanced way
            for (int j = 0; j < numInstances; j++) {
                // Select first the class
                final int randomClass = random.nextInt(numClasses);
                // Select then a random sample of that class
                final int randomSample = random.nextInt(indexSample[randomClass].size());
                bagIndices.add(indexSample[randomClass].get(randomSample));
                inBag[i][indexSample[randomClass].get(randomSample)] = true;
            }

            // Create random tree
            final Splitter splitter = new Splitter(
                    new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt())));

            futures.add(exe.submit(new Callable<BalancedRandomTree>() {
                public BalancedRandomTree call() {
                    return new BalancedRandomTree(data, bagIndices, splitter);
                }
            }));
        }

        // Grab all trained trees before proceeding
        for (int treeIdx = 0; treeIdx < numTrees; treeIdx++)
            tree[treeIdx] = futures.get(treeIdx).get();

        // Calculate out of bag error
        final boolean numeric = data.classAttribute().isNumeric();

        List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances());

        for (int i = 0; i < data.numInstances(); i++) {
            VotesCollector aCollector = new VotesCollector(tree, i, data, inBag);
            votes.add(exe.submit(aCollector));
        }

        double outOfBagCount = 0.0;
        double errorSum = 0.0;

        for (int i = 0; i < data.numInstances(); i++) {

            double vote = votes.get(i).get();

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }

        }

        outOfBagError = errorSum / outOfBagCount;

    } catch (Exception ex) {
        ex.printStackTrace();
    } finally {
        exe.shutdownNow();
    }

}

From source file:ai.GiniFunction.java

License:GNU General Public License

/**
 * Create split function based on Gini coefficient
 * /*from   w  w w  . j  a  v a 2  s  . com*/
 * @param data original data
 * @param indices indices of the samples to use
 */
public void init(Instances data, ArrayList<Integer> indices) {
    if (indices.size() == 0) {
        this.index = 0;
        this.threshold = 0;
        this.allSame = true;
        return;
    }

    final int len = data.numAttributes();
    final int numElements = indices.size();
    final int numClasses = data.numClasses();
    final int classIndex = data.classIndex();

    /** Attribute-class pair comparator (by attribute value) */
    final Comparator<AttributeClassPair> comp = new Comparator<AttributeClassPair>() {
        public int compare(AttributeClassPair o1, AttributeClassPair o2) {
            final double diff = o2.attributeValue - o1.attributeValue;
            if (diff < 0)
                return 1;
            else if (diff == 0)
                return 0;
            else
                return -1;
        }

        public boolean equals(Object o) {
            return false;
        }
    };

    // Create and shuffle indices of features to use
    ArrayList<Integer> allIndices = new ArrayList<Integer>();
    for (int i = 0; i < len; i++)
        if (i != classIndex)
            allIndices.add(i);

    double minimumGini = Double.MAX_VALUE;

    for (int i = 0; i < numOfFeatures; i++) {
        // Select the random feature
        final int index = random.nextInt(allIndices.size());
        final int featureToUse = allIndices.get(index);
        allIndices.remove(index); // remove that element to prevent from repetitions

        // Get the smallest Gini coefficient

        // Create list with pairs attribute-class
        final ArrayList<AttributeClassPair> list = new ArrayList<AttributeClassPair>();
        for (int j = 0; j < numElements; j++) {
            final Instance ins = data.get(indices.get(j));
            list.add(new AttributeClassPair(ins.value(featureToUse), (int) ins.value(classIndex)));
        }

        // Sort pairs in increasing order
        Collections.sort(list, comp);

        final double[] probLeft = new double[numClasses];
        final double[] probRight = new double[numClasses];
        // initial probabilities (all samples on the right)
        for (int n = 0; n < list.size(); n++)
            probRight[list.get(n).classValue]++;

        // Try all splitting points, from position 0 to the end
        for (int splitPoint = 0; splitPoint < numElements; splitPoint++) {
            // Calculate Gini coefficient
            double giniLeft = 0;
            double giniRight = 0;
            final int rightNumElements = numElements - splitPoint;

            for (int nClass = 0; nClass < numClasses; nClass++) {
                // left set
                double prob = probLeft[nClass];
                // Divide by the number of elements to get probabilities
                if (splitPoint != 0)
                    prob /= (double) splitPoint;
                giniLeft += prob * prob;

                // right set
                prob = probRight[nClass];
                // Divide by the number of elements to get probabilities
                if (rightNumElements != 0)
                    prob /= (double) rightNumElements;
                giniRight += prob * prob;
            }

            // Total Gini value
            final double gini = ((1.0 - giniLeft) * splitPoint + (1.0 - giniRight) * rightNumElements)
                    / (double) numElements;

            // Save values of minimum Gini coefficient
            if (gini < minimumGini) {
                minimumGini = gini;
                this.index = featureToUse;
                this.threshold = list.get(splitPoint).attributeValue;
            }

            // update probabilities for next iteration
            probLeft[list.get(splitPoint).classValue]++;
            probRight[list.get(splitPoint).classValue]--;
        }
    }

    // free list of possible indices to help garbage collector
    //allIndices.clear();
    //allIndices = null;
}