List of usage examples for weka.core Instance numAttributes
public int numAttributes();
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Normalizes the values of a normal Instance in L2 norm * * @author Sugato Basu/*from w w w . j a v a2 s. com*/ * @param inst Instance to be normalized */ public static void normalizeInstance(Instance inst) throws Exception { double norm = 0; double values[] = inst.toDoubleArray(); if (inst instanceof SparseInstance) { System.err.println("Is SparseInstance, using normalizeSparseInstance function instead"); normalizeSparseInstance(inst); } for (int i = 0; i < values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index norm += values[i] * values[i]; } } norm = Math.sqrt(norm); for (int i = 0; i < values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index values[i] /= norm; } } for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, values[i]); } //inst.setValueArray(values); }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** This function divides every attribute value in an instance by * the instance weight -- useful to find the mean of a cluster in * Euclidean space // ww w . j a v a2 s . c o m * @param inst Instance passed in for normalization (destructive update) */ public static void normalizeByWeight(Instance inst) { double weight = inst.weight(); if (inst instanceof SparseInstance) { for (int i = 0; i < inst.numValues(); i++) { inst.setValueSparse(i, inst.valueSparse(i) / weight); } } else if (!(inst instanceof SparseInstance)) { for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, inst.value(i) / weight); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Finds sum of 2 instances (handles sparse and non-sparse) *///from www. j a va 2s . co m public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception("Error!! inst1 and inst2 should have same number of attributes."); } double weight1 = inst1.weight(), weight2 = inst2.weight(); double[] values = new double[numAttributes]; for (int i = 0; i < numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i = 0; i < inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i = 0; i < inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } SparseInstance newInst = new SparseInstance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) { for (int i = 0; i < numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } } else { throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } Instance newInst = new Instance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; }
From source file:cn.edu.xjtu.dbmine.StringToWordVector.java
License:Open Source License
/** * determines the dictionary./*from w ww .j a va2 s. c o m*/ */ private void determineDictionary() { // initialize stopwords Stopwords stopwords = new Stopwords(); if (getUseStoplist()) { try { if (getStopwords().exists() && !getStopwords().isDirectory()) stopwords.read(getStopwords()); } catch (Exception e) { e.printStackTrace(); } } // Operate on a per-class basis if class attribute is set int classInd = getInputFormat().classIndex(); int values = 1; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { values = getInputFormat().attribute(classInd).numValues(); // System.out.println("number of class:"+getInputFormat().numClasses()+" "+getInputFormat().attribute(classInd).value(0)); } // TreeMap dictionaryArr [] = new TreeMap[values]; TreeMap[] dictionaryArr = new TreeMap[values]; for (int i = 0; i < values; i++) { dictionaryArr[i] = new TreeMap(); } // Make sure we know which fields to convert determineSelectedRange(); // Tokenize all training text into an orderedMap of "words". long pruneRate = Math.round((m_PeriodicPruningRate / 100.0) * getInputFormat().numInstances()); for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance instance = getInputFormat().instance(i); int vInd = 0; if (!m_doNotOperateOnPerClassBasis && (classInd != -1)) { vInd = (int) instance.classValue(); } // Iterate through all relevant string attributes of the current // instance Hashtable h = new Hashtable(); for (int j = 0; j < instance.numAttributes(); j++) { if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { // Get tokenizer m_Tokenizer.tokenize(instance.stringValue(j)); // Iterate through tokens, perform stemming, and remove // stopwords // (if required) while (m_Tokenizer.hasMoreElements()) { String word = ((String) m_Tokenizer.nextElement()).intern(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); if (this.m_useStoplist == true) if (stopwords.is(word)) continue; if (!(h.contains(word))) h.put(word, new Integer(0)); Count count = (Count) dictionaryArr[vInd].get(word); if (count == null) { dictionaryArr[vInd].put(word, new Count(1)); } else { count.count++; } } } } // updating the docCount for the words that have occurred in this // instance(document). Enumeration e = h.keys(); while (e.hasMoreElements()) { String word = (String) e.nextElement(); Count c = (Count) dictionaryArr[vInd].get(word); if (c != null) { c.docCount++; // c.doclist.add(vInd); } else System.err.println( "Warning: A word should definitely be in the " + "dictionary.Please check the code"); } if (pruneRate > 0) { if (i % pruneRate == 0 && i > 0) { for (int z = 0; z < values; z++) { Vector d = new Vector(1000); Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count <= 1) { d.add(word); } } Iterator iter = d.iterator(); while (iter.hasNext()) { String word = (String) iter.next(); dictionaryArr[z].remove(word); } } } } } // Figure out the minimum required word frequency int totalsize = 0; int prune[] = new int[values]; for (int z = 0; z < values; z++) { totalsize += dictionaryArr[z].size(); int array[] = new int[dictionaryArr[z].size()]; int pos = 0; Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); array[pos] = count.count; pos++; } // sort the array sortArray(array); if (array.length < m_WordsToKeep) { // if there aren't enough words, set the threshold to // minFreq prune[z] = m_minTermFreq; } else { // otherwise set it to be at least minFreq prune[z] = Math.max(m_minTermFreq, array[array.length - m_WordsToKeep]); } } // Convert the dictionary into an attribute index // and create one attribute per word FastVector attributes = new FastVector(totalsize + getInputFormat().numAttributes()); // Add the non-converted attributes int classIndex = -1; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().classIndex() == i) { classIndex = attributes.size(); } attributes.addElement(getInputFormat().attribute(i).copy()); } } // Add the word vector attributes (eliminating duplicates // that occur in multiple classes) TreeMap newDictionary = new TreeMap(); int index = attributes.size(); for (int z = 0; z < values; z++) { Iterator it = dictionaryArr[z].keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); Count count = (Count) dictionaryArr[z].get(word); if (count.count >= prune[z]) { if (newDictionary.get(word) == null) { newDictionary.put(word, new Integer(index++)); attributes.addElement(new Attribute(m_Prefix + word)); } } } } // Compute document frequencies m_DocsCounts = new int[attributes.size()]; Iterator it = newDictionary.keySet().iterator(); while (it.hasNext()) { String word = (String) it.next(); int idx = ((Integer) newDictionary.get(word)).intValue(); int docsCount = 0; for (int j = 0; j < values; j++) { Count c = (Count) dictionaryArr[j].get(word); if (c != null) docsCount += c.docCount; /* * if(!ctd.containsKey(j)){ Map<Integer,Integer> ma = new * HashMap<Integer,Integer>(); ctd.put(j, ma); } */ // if(ctd.get(j)==null) // ctd.get(j).put(idx, c); // int tt = ctd.get(j).get(idx); /* * for(int kk = 0;kk<c.doclist.size();kk++) { * //if(getInputFormat * ().instance(c.doclist.get(kk)).value(idx)>0) * ctd.get(j).put(idx, tt++); } */} m_DocsCounts[idx] = docsCount; } // Trim vector and set instance variables attributes.trimToSize(); m_Dictionary = newDictionary; m_NumInstances = getInputFormat().numInstances(); // Set the filter's output format Instances outputFormat = new Instances(getInputFormat().relationName(), attributes, 0); outputFormat.setClassIndex(classIndex); setOutputFormat(outputFormat); }
From source file:cn.edu.xjtu.dbmine.StringToWordVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * //from w w w .ja va 2s . c o m * @oaram instance the instance to convert * @param v * @return the conerted instance */ private int convertInstancewoDocNorm(Instance instance, FastVector v) { // Convert the instance into a sorted set of indexes TreeMap contained = new TreeMap(); // Copy all non-converted attributes from input to output int firstCopy = 0; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { contained.put(new Integer(firstCopy), new Double(instance.value(i))); } } else { if (instance.isMissing(i)) { contained.put(new Integer(firstCopy), new Double(Instance.missingValue())); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(firstCopy) .addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(firstCopy) .addStringValue(instance.stringValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } } firstCopy++; } } for (int j = 0; j < instance.numAttributes(); j++) { // if ((getInputFormat().attribute(j).type() == Attribute.STRING) if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { m_Tokenizer.tokenize(instance.stringValue(j)); while (m_Tokenizer.hasMoreElements()) { String word = (String) m_Tokenizer.nextElement(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); Integer index = (Integer) m_Dictionary.get(word); if (index != null) { if (m_OutputCounts) { // Separate if here rather than // two lines down to avoid // hashtable lookup Double count = (Double) contained.get(index); if (count != null) { contained.put(index, new Double(count.doubleValue() + 1.0)); } else { contained.put(index, new Double(1)); } } else { contained.put(index, new Double(1)); } } } } } // Doing TFTransform if (m_TFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = Math.log(val + 1); contained.put(index, new Double(val)); Tfcontained.put(index, new Double(val)); ; } } } // Doing IDFTransform if (m_IDFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = val * Math.log(m_NumInstances / ((double) m_DocsCounts[index.intValue()] + 0.01)); contained.put(index, new Double(val)); } } } // Convert the set to structures needed to create a sparse instance. double[] values = new double[contained.size()]; int[] indices = new int[contained.size()]; Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); Double value = (Double) contained.get(index); values[i] = value.doubleValue(); indices[i] = index.intValue(); } Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes()); inst.setDataset(outputFormatPeek()); v.addElement(inst); return firstCopy; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0, minDis);/*from ww w .ja v a2 s . c o m*/ } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static void getBestPerfFrom(String path) { try {/*from w w w . j a v a2 s. c om*/ BestConf bestconf = new BestConf(); Instances trainingSet = DataIOFile.loadDataFromArffFile(path); Instance best = trainingSet.firstInstance(); //set the best configuration to the cluster Map<Attribute, Double> attsmap = new HashMap<Attribute, Double>(); for (int i = 0; i < best.numAttributes() - 1; i++) { attsmap.put(best.attribute(i), best.value(i)); } double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom"); System.out.println("========================================="); System.err.println("The actual performance for the best point is : " + bestPerf); System.out.println("========================================="); } catch (IOException e) { e.printStackTrace(); } }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = -1; if (previousSet.attribute(PerformanceAttName) != null) pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double[] minDists = new double[2]; double val; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDists[0] = 1 - Double.MAX_VALUE; minDists[1] = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) { val = ins.value(i) - center.value(i); if (val < 0) minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[0]); else minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[1]); }//from ww w .j a va 2 s. co m } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0]; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDis);//from w w w . jav a2s . c om } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public static String getMD5(Instance ins) { StringBuffer name = new StringBuffer(""); for (int i = 0; i < ins.numAttributes() - 2; i++) { name.append(Math.round(ins.value(ins.attribute(i))) + ","); }//w ww. ja va 2 s .c om return getMD5(name.toString()); }