Example usage for weka.core Instance numAttributes

List of usage examples for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a normal Instance in L2 norm
 *
 * @author Sugato Basu/*from  w  w  w  .  j a v a2 s.  com*/
 * @param inst Instance to be normalized
 */

public static void normalizeInstance(Instance inst) throws Exception {
    double norm = 0;
    double values[] = inst.toDoubleArray();

    if (inst instanceof SparseInstance) {
        System.err.println("Is SparseInstance, using normalizeSparseInstance function instead");
        normalizeSparseInstance(inst);
    }

    for (int i = 0; i < values.length; i++) {
        if (i != inst.classIndex()) { // don't normalize the class index 
            norm += values[i] * values[i];
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < values.length; i++) {
        if (i != inst.classIndex()) { // don't normalize the class index 
            values[i] /= norm;
        }
    }
    for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, values[i]);
    }
    //inst.setValueArray(values);
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space //  ww  w . j  a v  a2 s  .  c  o  m
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///from www. j a va 2s .  co  m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary./*from  w  ww  .j  a va2 s.  c  o m*/
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
        // System.out.println("number of class:"+getInputFormat().numClasses()+" "+getInputFormat().attribute(classInd).value(0));

    }

    // TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current
        // instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove
                // stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    word = m_Stemmer.stem(word);

                    if (this.m_useStoplist == true)
                        if (stopwords.is(word))
                            continue;

                    if (!(h.contains(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        // updating the docCount for the words that have occurred in this
        // instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
                // c.doclist.add(vInd);
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
            /*
             * if(!ctd.containsKey(j)){ Map<Integer,Integer> ma = new
             * HashMap<Integer,Integer>(); ctd.put(j, ma); }
             */
            // if(ctd.get(j)==null)
            // ctd.get(j).put(idx, c);
            // int tt = ctd.get(j).get(idx);
            /*
             * for(int kk = 0;kk<c.doclist.size();kk++) {
             * //if(getInputFormat
             * ().instance(c.doclist.get(kk)).value(idx)>0)
             * ctd.get(j).put(idx, tt++); }
             */}
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from w  w w .ja  va 2s  . c  o m
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        // if ((getInputFormat().attribute(j).type() == Attribute.STRING)
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than
                        // two lines down to avoid
                        // hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    // Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
                Tfcontained.put(index, new Double(val));
                ;
            }
        }
    }

    // Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / ((double) m_DocsCounts[index.intValue()] + 0.01));
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0,
                        minDis);/*from  ww  w  .ja v  a2  s . c  o m*/
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static void getBestPerfFrom(String path) {
    try {/*from w  w  w  . j  a v a2  s. c  om*/
        BestConf bestconf = new BestConf();
        Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
        Instance best = trainingSet.firstInstance();
        //set the best configuration to the cluster
        Map<Attribute, Double> attsmap = new HashMap<Attribute, Double>();
        for (int i = 0; i < best.numAttributes() - 1; i++) {
            attsmap.put(best.attribute(i), best.value(i));
        }

        double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
        System.out.println("=========================================");
        System.err.println("The actual performance for the best point is : " + bestPerf);
        System.out.println("=========================================");
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = -1;
    if (previousSet.attribute(PerformanceAttName) != null)
        pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double[] minDists = new double[2];
    double val;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDists[0] = 1 - Double.MAX_VALUE;
        minDists[1] = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center)) {
                val = ins.value(i) - center.value(i);
                if (val < 0)
                    minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[0]);
                else
                    minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[1]);
            }//from ww  w .j a  va  2 s.  co  m
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0];

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                        minDis);//from  w  w  w  . jav a2s  .  c om
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java

License:Open Source License

public static String getMD5(Instance ins) {
    StringBuffer name = new StringBuffer("");
    for (int i = 0; i < ins.numAttributes() - 2; i++) {
        name.append(Math.round(ins.value(ins.attribute(i))) + ",");
    }//w  ww.  ja va  2  s  .c om
    return getMD5(name.toString());
}