Example usage for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *///from  www  .  ja  va  2  s.c o  m

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Calculate the dependent value for a given instance for a
 * given regression model./*  w  w w  . ja v  a  2s. co  m*/
 *
 * @param transformedInstance the input instance
 * @param selectedAttributes an array of flags indicating which 
 * attributes are included in the regression model
 * @param coefficients an array of coefficients for the regression
 * model
 * @return the regression value for the instance.
 * @throws Exception if the class attribute of the input instance
 * is not assigned
 */
private double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes,
        double[] coefficients) throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
        if ((m_ClassIndex != j) && (selectedAttributes[j])) {
            result += coefficients[column] * transformedInstance.value(j);
            column++;
        }
    }
    result += coefficients[column];

    return result;
}

From source file:classif.Prototyper.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    trainingData = data;/*w w w. ja  va  2  s.  c o m*/
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
        indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        sequences[i] = new Sequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(sequences[i]);
        indexClassedDataInFullData.get(clas).add(i);
        //         System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1));
    }

    buildSpecificClassifier(data);

    if (fillPrototypes)
        addMissingPrototypesRandom();
}

From source file:classif.Prototyper.java

License:Open Source License

public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }/*from  w w  w .j a  v  a2s.  c  o  m*/
    Sequence seq = new Sequence(sequence);

    double minD = Double.MAX_VALUE;
    String classValue = null;
    for (ClassedSequence s : prototypes) {
        double tmpD = seq.distance(s.sequence);
        if (tmpD < minD) {
            minD = tmpD;
            classValue = s.classValue;
        }
    }
    // System.out.println(prototypes.size());
    return sample.classAttribute().indexOfValue(classValue);
}

From source file:classif.Prototyper.java

License:Open Source License

public static ClassedSequence[] convertWekaSetToClassedSequence(Instances test) {

    Attribute classAttribute = test.classAttribute();
    ClassedSequence[] testSequences = new ClassedSequence[test.numInstances()];
    for (int i = 0; i < testSequences.length; i++) {
        Instance sample = test.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }/*from  www  .  j  a va  2s. co  m*/
        String clas = sample.stringValue(classAttribute);
        testSequences[i] = new ClassedSequence(new Sequence(sequence), clas);
    }

    return testSequences;

}

From source file:classif.PrototyperEUC.java

License:Open Source License

@Override
public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }// ww w  .jav  a  2s  .  com
    Sequence seq = new Sequence(sequence);

    double minD = Double.MAX_VALUE;
    String classValue = null;
    for (ClassedSequence s : prototypes) {
        double tmpD = seq.distanceEuc(s.sequence);
        if (tmpD < minD) {
            minD = tmpD;
            classValue = s.classValue;
        }
    }
    // System.out.println(prototypes.size());
    //      System.out.println(classValue);
    return sample.classAttribute().indexOfValue(classValue);
}

From source file:classification.classifiers.LDA.java

License:Open Source License

/**
 * Modification to make the LDA classifier be like a classifier from WEKA(R).
 * /*ww w.  j a  v a  2s  . com*/
 * @param newInstance
 * @return
 * @throws Exception
 */
public double classifyInstance(Instance newInstance) throws Exception {
    double[] instance = new double[newInstance.numAttributes()];

    instance = newInstance.toDoubleArray();

    int numClass = predict(instance);
    // System.out.println(numClass);

    double predictedClass = valueClass[numClass];
    // Double predictedClass = ;
    return predictedClass;
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//from   w  w w .  j  a  v  a 2s.c o m
 */
private void determineDictionary() {
    if (forcedAttributes == null) {
        // initialize stopwords
        Stopwords stopwords = new Stopwords();
        if (getUseStoplist()) {
            try {
                if (getStopwords().exists() && !getStopwords().isDirectory())
                    stopwords.read(getStopwords());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // Operate on a per-class basis if class attribute is set
        int classInd = getInputFormat().classIndex();
        int values = 1;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            values = getInputFormat().attribute(classInd).numValues();
        }

        // TreeMap dictionaryArr [] = new TreeMap[values];
        TreeMap[] dictionaryArr = new TreeMap[values];
        for (int i = 0; i < values; i++) {
            dictionaryArr[i] = new TreeMap();
        }

        // Make sure we know which fields to convert
        determineSelectedRange();

        // Tokenize all training text into an orderedMap of "words".
        long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            int vInd = 0;
            if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
                vInd = (int) instance.classValue();
            }

            // Iterate through all relevant string attributes of the current
            // instance
            Hashtable h = new Hashtable();
            for (int j = 0; j < instance.numAttributes(); j++) {
                if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                    // Get tokenizer
                    m_Tokenizer.tokenize(instance.stringValue(j));

                    // Iterate through tokens, perform stemming, and remove
                    // stopwords
                    // (if required)
                    while (m_Tokenizer.hasMoreElements()) {
                        String word = ((String) m_Tokenizer.nextElement()).intern();

                        if (this.m_lowerCaseTokens == true)
                            word = word.toLowerCase();

                        word = m_Stemmer.stem(word);

                        if (this.m_useStoplist == true)
                            if (stopwords.is(word))
                                continue;

                        if (!(h.contains(word)))
                            h.put(word, new Integer(0));

                        Count count = (Count) dictionaryArr[vInd].get(word);
                        if (count == null) {
                            dictionaryArr[vInd].put(word, new Count(1));
                        } else {
                            count.count++;
                        }
                    }
                }
            }

            // updating the docCount for the words that have occurred in
            // this
            // instance(document).
            Enumeration e = h.keys();
            while (e.hasMoreElements()) {
                String word = (String) e.nextElement();
                Count c = (Count) dictionaryArr[vInd].get(word);
                if (c != null) {
                    c.docCount++;
                } else
                    System.err.println("Warning: A word should definitely be in the "
                            + "dictionary.Please check the code");
            }

            if (pruneRate > 0) {
                if (i % pruneRate == 0 && i > 0) {
                    for (int z = 0; z < values; z++) {
                        Vector d = new Vector(1000);
                        Iterator it = dictionaryArr[z].keySet().iterator();
                        while (it.hasNext()) {
                            String word = (String) it.next();
                            Count count = (Count) dictionaryArr[z].get(word);
                            if (count.count <= 1) {
                                d.add(word);
                            }
                        }
                        Iterator iter = d.iterator();
                        while (iter.hasNext()) {
                            String word = (String) iter.next();
                            dictionaryArr[z].remove(word);
                        }
                    }
                }
            }
        }

        // Figure out the minimum required word frequency
        int totalsize = 0;
        int prune[] = new int[values];
        for (int z = 0; z < values; z++) {
            totalsize += dictionaryArr[z].size();

            int array[] = new int[dictionaryArr[z].size()];
            int pos = 0;
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                array[pos] = count.count;
                pos++;
            }

            // sort the array
            sortArray(array);
            if (array.length < m_WordsToKeep) {
                // if there aren't enough words, set the threshold to
                // minFreq
                prune[z] = m_minTermFreq;
            } else {
                // otherwise set it to be at least minFreq
                prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
            }
        }

        // Convert the dictionary into an attribute index
        // and create one attribute per word
        FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

        // Add the non-converted attributes
        int classIndex = -1;
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!m_SelectedRange.isInRange(i)) {
                if (getInputFormat().classIndex() == i) {
                    classIndex = attributes.size();
                }
                attributes.addElement(getInputFormat().attribute(i).copy());
            }
        }

        // Add the word vector attributes (eliminating duplicates
        // that occur in multiple classes)
        TreeMap newDictionary = new TreeMap();
        int index = attributes.size();
        for (int z = 0; z < values; z++) {
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                if (count.count >= prune[z]) {
                    if (newDictionary.get(word) == null) {
                        newDictionary.put(word, new Integer(index++));
                        attributes.addElement(new Attribute(m_Prefix + word));
                    }
                }
            }
        }

        // Compute document frequencies
        m_DocsCounts = new int[attributes.size()];
        Iterator it = newDictionary.keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            int idx = ((Integer) newDictionary.get(word)).intValue();
            int docsCount = 0;
            for (int j = 0; j < values; j++) {
                Count c = (Count) dictionaryArr[j].get(word);
                if (c != null)
                    docsCount += c.docCount;
            }
            m_DocsCounts[idx] = docsCount;
        }

        // Trim vector and set instance variables
        attributes.trimToSize();
        m_Dictionary = newDictionary;
        m_NumInstances = getInputFormat().numInstances();

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        setOutputFormat(outputFormat);
    } else {
        //m_Dictionary = newDictionary;
        determineSelectedRange();
        m_NumInstances = getInputFormat().numInstances();

        TreeMap newDictionary = new TreeMap();
        for (int i = 2; i < forcedAttributes.size(); i++) {
            newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i));
        }
        m_Dictionary = newDictionary;

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0);
        outputFormat.setClassIndex(1);
        setOutputFormat(outputFormat);
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from w ww  .  ja  va 2 s  .c  o  m
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Utils.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        // if ((getInputFormat().attribute(j).type() == Attribute.STRING)
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than
                        // two lines down to avoid
                        // hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    // Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    // Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate the dependent value for a given instance for a
 * given regression model.//from  w w  w.  ja  v  a  2s .c  o m
 *
 * @param transformedInstance the input instance
 * @param selectedAttributes an array of flags indicating which 
 * attributes are included in the regression model
 * @param coefficients an array of coefficients for the regression
 * model
 * @return the regression value for the instance.
 * @throws Exception if the class attribute of the input instance
 * is not assigned
 */
protected double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes,
        double[] coefficients) throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
        if ((m_ClassIndex != j) && (selectedAttributes[j])) {
            result += coefficients[column] * transformedInstance.value(j);
            column++;
        }
    }
    result += coefficients[column];

    return result;
}