List of usage examples for weka.core Instance valueSparse
public double valueSparse(int indexOfIndex);
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric.//from w w w.j a v a 2 s .c om * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }
From source file:cba.ItemSet.java
License:Open Source License
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set *//*from w w w. jav a 2s .co m*/ public boolean containedBy(Instance instance) { if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) { int numInstVals = instance.numValues(); int numItemSetVals = m_items.length; for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) { int instIndex = Integer.MAX_VALUE; if (p1 < numInstVals) { instIndex = instance.index(p1); } int itemIndex = p2; if (m_items[itemIndex] > -1) { if (itemIndex != instIndex) { return false; } else { if (instance.isMissingSparse(p1)) { return false; } if (m_items[itemIndex] != (int) instance.valueSparse(p1)) { return false; } } p1++; p2++; } else { if (itemIndex < instIndex) { p2++; } else if (itemIndex == instIndex) { p2++; p1++; } } } } else { for (int i = 0; i < instance.numAttributes(); i++) if (m_items[i] > -1) { if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0)) return false; if (m_items[i] != (int) instance.value(i)) return false; } } return true; }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully/* w w w.j av a 2s . com*/ */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances.//from ww w . jav a 2 s . c o m * * @return true if there are instances pending output. * @throws IllegalStateException * if no input structure has been defined. */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } // We only need to do something in this method // if the first batch hasn't been processed. Otherwise // input() has already done all the work. if (!isFirstBatchDone()) { // Determine the dictionary from the first batch (training data) determineDictionary(); // Convert all instances w/o normalization FastVector fv = new FastVector(); int firstCopy = 0; for (int i = 0; i < m_NumInstances; i++) { firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv); } // Need to compute average document length if necessary if (m_filterType != FILTER_NONE) { m_AvgDocLength = 0; for (int i = 0; i < fv.size(); i++) { Instance inst = (Instance) fv.elementAt(i); double docLength = 0; for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } m_AvgDocLength += Math.sqrt(docLength); } m_AvgDocLength /= m_NumInstances; } // Perform normalization if necessary. if (m_filterType == FILTER_NORMALIZE_ALL) { for (int i = 0; i < fv.size(); i++) { normalizeInstance((Instance) fv.elementAt(i), firstCopy); } } // Push all instances into the output queue for (int i = 0; i < fv.size(); i++) { push((Instance) fv.elementAt(i)); } } // Flush the input flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes)./* www .j a v a 2 s . c o m*/ * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Normalizes the values of a SparseInstance in L2 norm * * @author Sugato Basu/* w ww.j av a 2 s .com*/ * @param inst SparseInstance to be normalized */ public static void normalizeSparseInstance(Instance inst) throws Exception { double norm = 0; int length = inst.numValues(); if (!(inst instanceof SparseInstance)) { System.err.println("Not SparseInstance, using normalizeInstance function instead"); normalizeInstance(inst); } for (int i = 0; i < length; i++) { if (inst.index(i) != inst.classIndex()) { // don't normalize the class index norm += inst.valueSparse(i) * inst.valueSparse(i); } } norm = Math.sqrt(norm); for (int i = 0; i < length; i++) { // don't normalize the class index if (inst.index(i) != inst.classIndex()) { inst.setValueSparse(i, inst.valueSparse(i) / norm); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** This function divides every attribute value in an instance by * the instance weight -- useful to find the mean of a cluster in * Euclidean space /* ww w . j a va 2s.c om*/ * @param inst Instance passed in for normalization (destructive update) */ public static void normalizeByWeight(Instance inst) { double weight = inst.weight(); if (inst instanceof SparseInstance) { for (int i = 0; i < inst.numValues(); i++) { inst.setValueSparse(i, inst.valueSparse(i) / weight); } } else if (!(inst instanceof SparseInstance)) { for (int i = 0; i < inst.numAttributes(); i++) { inst.setValue(i, inst.value(i) / weight); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Finds sum of 2 instances (handles sparse and non-sparse) *//*from ww w .ja v a2s . c o m*/ public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception("Error!! inst1 and inst2 should have same number of attributes."); } double weight1 = inst1.weight(), weight2 = inst2.weight(); double[] values = new double[numAttributes]; for (int i = 0; i < numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i = 0; i < inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i = 0; i < inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } SparseInstance newInst = new SparseInstance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) { for (int i = 0; i < numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } } else { throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } Instance newInst = new Instance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; }
From source file:cn.edu.xjtu.dbmine.StringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes)./*from www. j a v a2 s . c o m*/ * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Samoa instance from weka instance./*from w ww . j a v a 2s . c om*/ * * @param inst the inst * @return the instance */ public Instance samoaInstance(weka.core.Instance inst) { Instance samoaInstance; if (inst instanceof weka.core.SparseInstance) { double[] attributeValues = new double[inst.numValues()]; int[] indexValues = new int[inst.numValues()]; for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { attributeValues[i] = inst.valueSparse(i); indexValues[i] = inst.index(i); } } samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes()); } else { samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray()); //samoaInstance.deleteAttributeAt(inst.classIndex()); } if (this.samoaInstanceInformation == null) { this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset()); } samoaInstance.setDataset(samoaInstanceInformation); samoaInstance.setClassValue(inst.classValue()); return samoaInstance; }