List of usage examples for weka.core Instance index
public int index(int position);
From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Discretizes all attributes that are * numeric./*from w w w . j a v a2 s . c om*/ * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numAttributes = data.numAttributes(); m_DFs = new int[numAttributes]; Enumeration e = data.enumerateInstances(); while (e.hasMoreElements()) { Instance instance = (Instance) e.nextElement(); int numValues = instance.numValues(); for (int valueIndex = 0; valueIndex < numValues; valueIndex++) { int attIndex = instance.index(valueIndex); if (attIndex != classIndex) { double value = instance.valueSparse(valueIndex); //missingvalues werden also 0 betrachtet. if (m_missingAsZero) { if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex) m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } else { if (value != 0.0) { m_DFs[attIndex]++; //m_DFs[ attIndex ]+=value ; } } } } } }
From source file:cba.ItemSet.java
License:Open Source License
/** * Checks if an instance contains an item set. * * @param instance the instance to be tested * @return true if the given instance contains this item set *///w ww. j av a2 s. c om public boolean containedBy(Instance instance) { if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) { int numInstVals = instance.numValues(); int numItemSetVals = m_items.length; for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) { int instIndex = Integer.MAX_VALUE; if (p1 < numInstVals) { instIndex = instance.index(p1); } int itemIndex = p2; if (m_items[itemIndex] > -1) { if (itemIndex != instIndex) { return false; } else { if (instance.isMissingSparse(p1)) { return false; } if (m_items[itemIndex] != (int) instance.valueSparse(p1)) { return false; } } p1++; p2++; } else { if (itemIndex < instIndex) { p2++; } else if (itemIndex == instIndex) { p2++; p1++; } } } } else { for (int i = 0; i < instance.numAttributes(); i++) if (m_items[i] > -1) { if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0)) return false; if (m_items[i] != (int) instance.value(i)) return false; } } return true; }
From source file:ChiSquare.ChiSquaredAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully// w w w.j a v a2 s. com */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false); } } }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances.//w ww. j av a 2 s . com * * @return true if there are instances pending output. * @throws IllegalStateException * if no input structure has been defined. */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } // We only need to do something in this method // if the first batch hasn't been processed. Otherwise // input() has already done all the work. if (!isFirstBatchDone()) { // Determine the dictionary from the first batch (training data) determineDictionary(); // Convert all instances w/o normalization FastVector fv = new FastVector(); int firstCopy = 0; for (int i = 0; i < m_NumInstances; i++) { firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv); } // Need to compute average document length if necessary if (m_filterType != FILTER_NONE) { m_AvgDocLength = 0; for (int i = 0; i < fv.size(); i++) { Instance inst = (Instance) fv.elementAt(i); double docLength = 0; for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } m_AvgDocLength += Math.sqrt(docLength); } m_AvgDocLength /= m_NumInstances; } // Perform normalization if necessary. if (m_filterType == FILTER_NORMALIZE_ALL) { for (int i = 0; i < fv.size(); i++) { normalizeInstance((Instance) fv.elementAt(i), firstCopy); } } // Push all instances into the output queue for (int i = 0; i < fv.size(); i++) { push((Instance) fv.elementAt(i)); } } // Flush the input flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes).//ww w .ja v a2s . co m * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Normalizes the values of a SparseInstance in L2 norm * * @author Sugato Basu/*from w w w .ja v a2s .co m*/ * @param inst SparseInstance to be normalized */ public static void normalizeSparseInstance(Instance inst) throws Exception { double norm = 0; int length = inst.numValues(); if (!(inst instanceof SparseInstance)) { System.err.println("Not SparseInstance, using normalizeInstance function instead"); normalizeInstance(inst); } for (int i = 0; i < length; i++) { if (inst.index(i) != inst.classIndex()) { // don't normalize the class index norm += inst.valueSparse(i) * inst.valueSparse(i); } } norm = Math.sqrt(norm); for (int i = 0; i < length; i++) { // don't normalize the class index if (inst.index(i) != inst.classIndex()) { inst.setValueSparse(i, inst.valueSparse(i) / norm); } } }
From source file:cluster.ABC.ClusterUtils.java
License:Open Source License
/** Finds sum of 2 instances (handles sparse and non-sparse) *///from w w w . j a v a 2 s. c o m public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception { int numAttributes = inst1.numAttributes(); if (inst2.numAttributes() != numAttributes) { throw new Exception("Error!! inst1 and inst2 should have same number of attributes."); } double weight1 = inst1.weight(), weight2 = inst2.weight(); double[] values = new double[numAttributes]; for (int i = 0; i < numAttributes; i++) { values[i] = 0; } if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) { for (int i = 0; i < inst1.numValues(); i++) { int indexOfIndex = inst1.index(i); values[indexOfIndex] = inst1.valueSparse(i); } for (int i = 0; i < inst2.numValues(); i++) { int indexOfIndex = inst2.index(i); values[indexOfIndex] += inst2.valueSparse(i); } SparseInstance newInst = new SparseInstance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) { for (int i = 0; i < numAttributes; i++) { values[i] = inst1.value(i) + inst2.value(i); } } else { throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse"); } Instance newInst = new Instance(weight1 + weight2, values); newInst.setDataset(m_Instances); return newInst; }
From source file:cn.edu.xjtu.dbmine.StringToWordVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes).// w ww .j a v a 2 s .co m * * @param inst * the instance to normalize * @param firstCopy * @throws Exception * if avg. doc length not set */ private void normalizeInstance(Instance inst, int firstCopy) throws Exception { double docLength = 0; if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Compute length of document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { docLength += inst.valueSparse(j) * inst.valueSparse(j); } } docLength = Math.sqrt(docLength); // Normalize document vector for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) >= firstCopy) { double val = inst.valueSparse(j) * m_AvgDocLength / docLength; inst.setValueSparse(j, val); if (val == 0) { System.err.println("setting value " + inst.index(j) + " to zero."); j--; } } } }
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Samoa instance from weka instance.//from w w w . jav a 2 s.c o m * * @param inst the inst * @return the instance */ public Instance samoaInstance(weka.core.Instance inst) { Instance samoaInstance; if (inst instanceof weka.core.SparseInstance) { double[] attributeValues = new double[inst.numValues()]; int[] indexValues = new int[inst.numValues()]; for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { attributeValues[i] = inst.valueSparse(i); indexValues[i] = inst.index(i); } } samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes()); } else { samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray()); //samoaInstance.deleteAttributeAt(inst.classIndex()); } if (this.samoaInstanceInformation == null) { this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset()); } samoaInstance.setDataset(samoaInstanceInformation); samoaInstance.setClassValue(inst.classValue()); return samoaInstance; }
From source file:de.uni_potsdam.hpi.bpt.promnicat.util.WeightedEuclideanDistance.java
License:Open Source License
/** * Calculates the distance between two instances. Offers speed up (if the * distance function class in use supports it) in nearest neighbour search by * taking into account the cutOff or maximum distance. Depending on the * distance function class, post processing of the distances by * postProcessDistances(double []) may be required if this function is used. * * @param first the first instance/*from w ww . ja va 2 s . co m*/ * @param second the second instance * @param cutOffValue If the distance being calculated becomes larger than * cutOffValue then the rest of the calculation is * discarded. * @param stats the performance stats object * @return the distance between the two given instances or * Double.POSITIVE_INFINITY if the distance being * calculated becomes larger than cutOffValue. */ public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) { double distance = 0; int firstI, secondI; int firstNumValues = first.numValues(); int secondNumValues = second.numValues(); int numAttributes = m_Data.numAttributes(); int classIndex = m_Data.classIndex(); double weights = 1; validate(); for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) { weights += first.attribute(p1).weight(); if (p1 >= firstNumValues) firstI = numAttributes; else firstI = first.index(p1); if (p2 >= secondNumValues) secondI = numAttributes; else secondI = second.index(p2); if (firstI == classIndex) { p1++; continue; } if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) { p1++; continue; } if (secondI == classIndex) { p2++; continue; } if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) { p2++; continue; } double diff; if (firstI == secondI) { diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2)); p1++; p2++; } else if (firstI > secondI) { diff = difference(secondI, 0, second.valueSparse(p2)); p2++; } else { diff = difference(firstI, first.valueSparse(p1), 0); p1++; } if (stats != null) stats.incrCoordCount(); distance = updateDistance(distance, diff); if (distance > cutOffValue) return Double.POSITIVE_INFINITY; } if (weights > 1) { return distance / (weights - 1); } return distance / weights; }