Example usage for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a normal Instance in L2 norm
 *
 * @author Sugato Basu/*from  w  w  w  .  j a v a2 s.  com*/
 * @param inst Instance to be normalized
 */

public static void normalizeInstance(Instance inst) throws Exception {
    double norm = 0;
    double values[] = inst.toDoubleArray();

    if (inst instanceof SparseInstance) {
        System.err.println("Is SparseInstance, using normalizeSparseInstance function instead");
        normalizeSparseInstance(inst);
    }

    for (int i = 0; i < values.length; i++) {
        if (i != inst.classIndex()) { // don't normalize the class index 
            norm += values[i] * values[i];
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < values.length; i++) {
        if (i != inst.classIndex()) { // don't normalize the class index 
            values[i] /= norm;
        }
    }
    for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, values[i]);
    }
    //inst.setValueArray(values);
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space //  ww  w . j  a v  a2 s  .  c  o  m
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///from www. j a va 2s .  co  m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * determines the dictionary./*from  w  ww  .j  a va2 s.  c  o m*/
 */
private void determineDictionary() {
    // initialize stopwords
    Stopwords stopwords = new Stopwords();
    if (getUseStoplist()) {
        try {
            if (getStopwords().exists() && !getStopwords().isDirectory())
                stopwords.read(getStopwords());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    // Operate on a per-class basis if class attribute is set
    int classInd = getInputFormat().classIndex();
    int values = 1;
    if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
        values = getInputFormat().attribute(classInd).numValues();
        // System.out.println("number of class:"+getInputFormat().numClasses()+" "+getInputFormat().attribute(classInd).value(0));

    }

    // TreeMap dictionaryArr [] = new TreeMap[values];
    TreeMap[] dictionaryArr = new TreeMap[values];
    for (int i = 0; i < values; i++) {
        dictionaryArr[i] = new TreeMap();
    }

    // Make sure we know which fields to convert
    determineSelectedRange();

    // Tokenize all training text into an orderedMap of "words".
    long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances());
    for (int i = 0; i < getInputFormat().numInstances(); i++) {
        Instance instance = getInputFormat().instance(i);
        int vInd = 0;
        if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) {
            vInd = (int) instance.classValue();
        }

        // Iterate through all relevant string attributes of the current
        // instance
        Hashtable h = new Hashtable();
        for (int j = 0; j < instance.numAttributes(); j++) {
            if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

                // Get tokenizer
                m_Tokenizer.tokenize(instance.stringValue(j));

                // Iterate through tokens, perform stemming, and remove
                // stopwords
                // (if required)
                while (m_Tokenizer.hasMoreElements()) {
                    String word = ((String) m_Tokenizer.nextElement()).intern();

                    if (this.m_lowerCaseTokens == true)
                        word = word.toLowerCase();

                    word = m_Stemmer.stem(word);

                    if (this.m_useStoplist == true)
                        if (stopwords.is(word))
                            continue;

                    if (!(h.contains(word)))
                        h.put(word, new Integer(0));

                    Count count = (Count) dictionaryArr[vInd].get(word);
                    if (count == null) {
                        dictionaryArr[vInd].put(word, new Count(1));
                    } else {
                        count.count++;
                    }
                }
            }
        }

        // updating the docCount for the words that have occurred in this
        // instance(document).
        Enumeration e = h.keys();
        while (e.hasMoreElements()) {
            String word = (String) e.nextElement();
            Count c = (Count) dictionaryArr[vInd].get(word);
            if (c != null) {
                c.docCount++;
                // c.doclist.add(vInd);
            } else
                System.err.println(
                        "Warning: A word should definitely be in the " + "dictionary.Please check the code");
        }

        if (pruneRate > 0) {
            if (i % pruneRate == 0 && i > 0) {
                for (int z = 0; z < values; z++) {
                    Vector d = new Vector(1000);
                    Iterator it = dictionaryArr[z].keySet().iterator();
                    while (it.hasNext()) {
                        String word = (String) it.next();
                        Count count = (Count) dictionaryArr[z].get(word);
                        if (count.count <= 1) {
                            d.add(word);
                        }
                    }
                    Iterator iter = d.iterator();
                    while (iter.hasNext()) {
                        String word = (String) iter.next();
                        dictionaryArr[z].remove(word);
                    }
                }
            }
        }
    }

    // Figure out the minimum required word frequency
    int totalsize = 0;
    int prune[] = new int[values];
    for (int z = 0; z < values; z++) {
        totalsize += dictionaryArr[z].size();

        int array[] = new int[dictionaryArr[z].size()];
        int pos = 0;
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            array[pos] = count.count;
            pos++;
        }

        // sort the array
        sortArray(array);
        if (array.length < m_WordsToKeep) {
            // if there aren't enough words, set the threshold to
            // minFreq
            prune[z] = m_minTermFreq;
        } else {
            // otherwise set it to be at least minFreq
            prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]);
        }
    }

    // Convert the dictionary into an attribute index
    // and create one attribute per word
    FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes());

    // Add the non-converted attributes
    int classIndex = -1;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().classIndex() == i) {
                classIndex = attributes.size();
            }
            attributes.addElement(getInputFormat().attribute(i).copy());
        }
    }

    // Add the word vector attributes (eliminating duplicates
    // that occur in multiple classes)
    TreeMap newDictionary = new TreeMap();
    int index = attributes.size();
    for (int z = 0; z < values; z++) {
        Iterator it = dictionaryArr[z].keySet().iterator();
        while (it.hasNext()) {
            String word = (String) it.next();
            Count count = (Count) dictionaryArr[z].get(word);
            if (count.count >= prune[z]) {
                if (newDictionary.get(word) == null) {
                    newDictionary.put(word, new Integer(index++));
                    attributes.addElement(new Attribute(m_Prefix + word));
                }
            }
        }
    }

    // Compute document frequencies
    m_DocsCounts = new int[attributes.size()];
    Iterator it = newDictionary.keySet().iterator();
    while (it.hasNext()) {
        String word = (String) it.next();
        int idx = ((Integer) newDictionary.get(word)).intValue();
        int docsCount = 0;
        for (int j = 0; j < values; j++) {
            Count c = (Count) dictionaryArr[j].get(word);
            if (c != null)
                docsCount += c.docCount;
            /*
             * if(!ctd.containsKey(j)){ Map<Integer,Integer> ma = new
             * HashMap<Integer,Integer>(); ctd.put(j, ma); }
             */
            // if(ctd.get(j)==null)
            // ctd.get(j).put(idx, c);
            // int tt = ctd.get(j).get(idx);
            /*
             * for(int kk = 0;kk<c.doclist.size();kk++) {
             * //if(getInputFormat
             * ().instance(c.doclist.get(kk)).value(idx)>0)
             * ctd.get(j).put(idx, tt++); }
             */}
        m_DocsCounts[idx] = docsCount;
    }

    // Trim vector and set instance variables
    attributes.trimToSize();
    m_Dictionary = newDictionary;
    m_NumInstances = getInputFormat().numInstances();

    // Set the filter's output format
    Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * //from w  w w .ja  va 2s  . c  o m
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        // if ((getInputFormat().attribute(j).type() == Attribute.STRING)
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than
                        // two lines down to avoid
                        // hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    // Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
                Tfcontained.put(index, new Double(val));
                ;
            }
        }
    }

    // Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / ((double) m_DocsCounts[index.intValue()] + 0.01));
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0,
                        minDis);/*from  ww  w  .ja v  a2  s . c  o m*/
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static void getBestPerfFrom(String path) {
    try {/*from w  w  w  . j  a v a2  s. c  om*/
        BestConf bestconf = new BestConf();
        Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
        Instance best = trainingSet.firstInstance();
        //set the best configuration to the cluster
        Map<Attribute, Double> attsmap = new HashMap<Attribute, Double>();
        for (int i = 0; i < best.numAttributes() - 1; i++) {
            attsmap.put(best.attribute(i), best.value(i));
        }

        double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
        System.out.println("=========================================");
        System.err.println("The actual performance for the best point is : " + bestPerf);
        System.out.println("=========================================");
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = -1;
    if (previousSet.attribute(PerformanceAttName) != null)
        pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double[] minDists = new double[2];
    double val;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDists[0] = 1 - Double.MAX_VALUE;
        minDists[1] = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center)) {
                val = ins.value(i) - center.value(i);
                if (val < 0)
                    minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[0]);
                else
                    minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[1]);
            }//from ww  w .j a  va  2 s.  co  m
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0];

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                        minDis);//from  w  w  w  . jav a2s  .  c om
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java

License:Open Source License

public static String getMD5(Instance ins) {
    StringBuffer name = new StringBuffer("");
    for (int i = 0; i < ins.numAttributes() - 2; i++) {
        name.append(Math.round(ins.value(ins.attribute(i))) + ",");
    }//w  ww.  ja va  2  s  .c om
    return getMD5(name.toString());
}