Example usage for weka.core Instance numAttributes

List of usage examples for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *///from  www  .  ja  va  2  s.c o  m

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:CGLSMethod.LinearRegression.java

License:Open Source License

/**
 * Calculate the dependent value for a given instance for a
 * given regression model./*  w  w w  . ja v  a  2s. co  m*/
 *
 * @param transformedInstance the input instance
 * @param selectedAttributes an array of flags indicating which 
 * attributes are included in the regression model
 * @param coefficients an array of coefficients for the regression
 * model
 * @return the regression value for the instance.
 * @throws Exception if the class attribute of the input instance
 * is not assigned
 */
private double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes,
        double[] coefficients) throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
        if ((m_ClassIndex != j) && (selectedAttributes[j])) {
            result += coefficients[column] * transformedInstance.value(j);
            column++;
        }
    }
    result += coefficients[column];

    return result;
}

From source file:classif.Prototyper.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {
    trainingData = data;/*w w w. ja  va  2  s.  c o m*/
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
        classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
        indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
        Instance sample = data.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }
        sequences[i] = new Sequence(sequence);
        String clas = sample.stringValue(classAttribute);
        classMap[i] = clas;
        classedData.get(clas).add(sequences[i]);
        indexClassedDataInFullData.get(clas).add(i);
        //         System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1));
    }

    buildSpecificClassifier(data);

    if (fillPrototypes)
        addMissingPrototypesRandom();
}

From source file:classif.Prototyper.java

License:Open Source License

public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }/*from  w w  w .j a  v  a2s.  c  o  m*/
    Sequence seq = new Sequence(sequence);

    double minD = Double.MAX_VALUE;
    String classValue = null;
    for (ClassedSequence s : prototypes) {
        double tmpD = seq.distance(s.sequence);
        if (tmpD < minD) {
            minD = tmpD;
            classValue = s.classValue;
        }
    }
    // System.out.println(prototypes.size());
    return sample.classAttribute().indexOfValue(classValue);
}

From source file:classif.Prototyper.java

License:Open Source License

public static ClassedSequence[] convertWekaSetToClassedSequence(Instances test) {

    Attribute classAttribute = test.classAttribute();
    ClassedSequence[] testSequences = new ClassedSequence[test.numInstances()];
    for (int i = 0; i < testSequences.length; i++) {
        Instance sample = test.instance(i);
        MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
        int shift = (sample.classIndex() == 0) ? 1 : 0;
        for (int t = 0; t < sequence.length; t++) {
            sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
        }/*from  www  .  j  a va  2s. co  m*/
        String clas = sample.stringValue(classAttribute);
        testSequences[i] = new ClassedSequence(new Sequence(sequence), clas);
    }

    return testSequences;

}

From source file:classif.PrototyperEUC.java

License:Open Source License

@Override
public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }// ww w  .jav  a  2s  .  com
    Sequence seq = new Sequence(sequence);

    double minD = Double.MAX_VALUE;
    String classValue = null;
    for (ClassedSequence s : prototypes) {
        double tmpD = seq.distanceEuc(s.sequence);
        if (tmpD < minD) {
            minD = tmpD;
            classValue = s.classValue;
        }
    }
    // System.out.println(prototypes.size());
    //      System.out.println(classValue);
    return sample.classAttribute().indexOfValue(classValue);
}

From source file:classification.classifiers.LDA.java

License:Open Source License

/**
 * Modification to make the LDA classifier be like a classifier from WEKA(R).
 * /*ww w.  j a  v a  2s  . com*/
 * @param newInstance
 * @return
 * @throws Exception
 */
public double classifyInstance(Instance newInstance) throws Exception {
    double[] instance = new double[newInstance.numAttributes()];

    instance = newInstance.toDoubleArray();

    int numClass = predict(instance);
    // System.out.println(numClass);

    double predictedClass = valueClass[numClass];
    // Double predictedClass = ;
    return predictedClass;
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * determines the dictionary.//from   w  w w .  j  a  v  a 2s.c o m
 */
private void determineDictionary() {
    if (forcedAttributes == null) {
        // initialize stopwords
        Stopwords stopwords = new Stopwords();
        if (getUseStoplist()) {
            try {
                if (getStopwords().exists() && !getStopwords().isDirectory())
                    stopwords.read(getStopwords());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        // Operate on a per-class basis if class attribute is set
        int classInd = getInputFormat().classIndex();
        int values = 1;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            values = getInputFormat().attribute(classInd).numValues();
        }

        // TreeMap dictionaryArr [] = new TreeMap[values];
        TreeMap[] dictionaryArr = new TreeMap[values];
        for (int i = 0; i < values; i++) {
            dictionaryArr[i] = new TreeMap();
        }

        // Make sure we know which fields to convert
        determineSelectedRange();

        // Tokenize all training text into an orderedMap of "words".
        long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
        for (int i = 0; i < getInputFormat().numInstances(); i++) {
            Instance instance = getInputFormat().instance(i);
            int vInd = 0;
            if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
                vInd = (int) instance.classValue();
            }

            // Iterate through all relevant string attributes of the current
            // instance
            Hashtable h = new Hashtable();
            for (int j = 0; j < instance.numAttributes(); j++) {
                if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                    // Get tokenizer
                    m_Tokenizer.tokenize(instance.stringValue(j));

                    // Iterate through tokens, perform stemming, and remove
                    // stopwords
                    // (if required)
                    while (m_Tokenizer.hasMoreElements()) {
                        String word = ((String) m_Tokenizer.nextElement()).intern();

                        if (this.m_lowerCaseTokens == true)
                            word = word.toLowerCase();

                        word = m_Stemmer.stem(word);

                        if (this.m_useStoplist == true)
                            if (stopwords.is(word))
                                continue;

                        if (!(h.contains(word)))
                            h.put(word, new Integer(0));

                        Count count = (Count) dictionaryArr[vInd].get(word);
                        if (count == null) {
                            dictionaryArr[vInd].put(word, new Count(1));
                        } else {
                            count.count++;
                        }
                    }
                }
            }

            // updating the docCount for the words that have occurred in
            // this
            // instance(document).
            Enumeration e = h.keys();
            while (e.hasMoreElements()) {
                String word = (String) e.nextElement();
                Count c = (Count) dictionaryArr[vInd].get(word);
                if (c != null) {
                    c.docCount++;
                } else
                    System.err.println("Warning: A word should definitely be in the "
                            + "dictionary.Please check the code");
            }

            if (pruneRate > 0) {
                if (i % pruneRate == 0 && i > 0) {
                    for (int z = 0; z < values; z++) {
                        Vector d = new Vector(1000);
                        Iterator it = dictionaryArr[z].keySet().iterator();
                        while (it.hasNext()) {
                            String word = (String) it.next();
                            Count count = (Count) dictionaryArr[z].get(word);
                            if (count.count <= 1) {
                                d.add(word);
                            }
                        }
                        Iterator iter = d.iterator();
                        while (iter.hasNext()) {
                            String word = (String) iter.next();
                            dictionaryArr[z].remove(word);
                        }
                    }
                }
            }
        }

        // Figure out the minimum required word frequency
        int totalsize = 0;
        int prune[] = new int[values];
        for (int z = 0; z < values; z++) {
            totalsize += dictionaryArr[z].size();

            int array[] = new int[dictionaryArr[z].size()];
            int pos = 0;
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                array[pos] = count.count;
                pos++;
            }

            // sort the array
            sortArray(array);
            if (array.length < m_WordsToKeep) {
                // if there aren't enough words, set the threshold to
                // minFreq
                prune[z] = m_minTermFreq;
            } else {
                // otherwise set it to be at least minFreq
                prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
            }
        }

        // Convert the dictionary into an attribute index
        // and create one attribute per word
        FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

        // Add the non-converted attributes
        int classIndex = -1;
        for (int i = 0; i < getInputFormat().numAttributes(); i++) {
            if (!m_SelectedRange.isInRange(i)) {
                if (getInputFormat().classIndex() == i) {
                    classIndex = attributes.size();
                }
                attributes.addElement(getInputFormat().attribute(i).copy());
            }
        }

        // Add the word vector attributes (eliminating duplicates
        // that occur in multiple classes)
        TreeMap newDictionary = new TreeMap();
        int index = attributes.size();
        for (int z = 0; z < values; z++) {
            Iterator it = dictionaryArr[z].keySet().iterator();
            while (it.hasNext()) {
                String word = (String) it.next();
                Count count = (Count) dictionaryArr[z].get(word);
                if (count.count >= prune[z]) {
                    if (newDictionary.get(word) == null) {
                        newDictionary.put(word, new Integer(index++));
                        attributes.addElement(new Attribute(m_Prefix + word));
                    }
                }
            }
        }

        // Compute document frequencies
        m_DocsCounts = new int[attributes.size()];
        Iterator it = newDictionary.keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            int idx = ((Integer) newDictionary.get(word)).intValue();
            int docsCount = 0;
            for (int j = 0; j < values; j++) {
                Count c = (Count) dictionaryArr[j].get(word);
                if (c != null)
                    docsCount += c.docCount;
            }
            m_DocsCounts[idx] = docsCount;
        }

        // Trim vector and set instance variables
        attributes.trimToSize();
        m_Dictionary = newDictionary;
        m_NumInstances = getInputFormat().numInstances();

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        setOutputFormat(outputFormat);
    } else {
        //m_Dictionary = newDictionary;
        determineSelectedRange();
        m_NumInstances = getInputFormat().numInstances();

        TreeMap newDictionary = new TreeMap();
        for (int i = 2; i < forcedAttributes.size(); i++) {
            newDictionary.put(((Attribute) forcedAttributes.get(i)).name(), new Integer(i));
        }
        m_Dictionary = newDictionary;

        // Set the filter's output format
        Instances outputFormat = new Instances(getInputFormat().relationName(), forcedAttributes, 0);
        outputFormat.setClassIndex(1);
        setOutputFormat(outputFormat);
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from w ww  .  ja  va 2 s  .c  o  m
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Utils.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        // if ((getInputFormat().attribute(j).type() == Attribute.STRING)
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than
                        // two lines down to avoid
                        // hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    // Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    // Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Calculate the dependent value for a given instance for a
 * given regression model.//from  w w  w.  ja  v  a  2s .c  o m
 *
 * @param transformedInstance the input instance
 * @param selectedAttributes an array of flags indicating which 
 * attributes are included in the regression model
 * @param coefficients an array of coefficients for the regression
 * model
 * @return the regression value for the instance.
 * @throws Exception if the class attribute of the input instance
 * is not assigned
 */
protected double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes,
        double[] coefficients) throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
        if ((m_ClassIndex != j) && (selectedAttributes[j])) {
            result += coefficients[column] * transformedInstance.value(j);
            column++;
        }
    }
    result += coefficients[column];

    return result;
}