List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:es.jarias.FMC.FMC.java
License:Open Source License
public static double[][] mutualInfo(Instances data, int[] indexes) { double[][] m_counts = new double[indexes.length][]; double[][][] m_2counts = new double[indexes.length][indexes.length][]; double[] nValues = new double[indexes.length]; double[][] I = new double[indexes.length][indexes.length]; for (int i = 0; i < indexes.length; i++) { nValues[i] = data.attribute(indexes[i]).numValues(); m_counts[i] = new double[(int) nValues[i]]; }/* ww w . j a v a 2s . co m*/ for (int i = 0; i < indexes.length; i++) { for (int j = 0; j < indexes.length; j++) { if (i != j) { double cardinality = nValues[i] * nValues[j]; m_2counts[i][j] = new double[(int) cardinality]; } } } // Compute counts: for (Instance d : data) { for (int i = 0; i < indexes.length; i++) { m_counts[i][(int) d.value(indexes[i])]++; for (int j = 0; j < indexes.length; j++) { if (i != j) { int index = (int) (d.value(indexes[j]) * nValues[i] + d.value(indexes[i])); m_2counts[i][j][index]++; } } } } // Calculate MI(X_i; X_j) for (int i = 0; i < indexes.length; i++) { for (int j = 0; j < indexes.length; j++) { if (i != j) { double mi = 0.0; for (int v_i = 0; v_i < nValues[i]; v_i++) { for (int v_j = 0; v_j < nValues[j]; v_j++) { if ((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0) mi += m_2counts[i][j][(int) (v_j * nValues[i] + v_i)] * Math.log((1.0 * data.numInstances() * m_2counts[i][j][(int) (v_j * nValues[i] + v_i)]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j])); } } I[i][j] = mi / data.numInstances(); } } } return I; }
From source file:es.jarias.FMC.HITON.java
License:Open Source License
public HITON(Instances data) { dataset = data;//from www . j av a 2s.c o m m_numAttributes = dataset.numAttributes(); m_numCases = dataset.numInstances(); m_numValues = new int[m_numAttributes]; for (int att = 0; att < m_numAttributes; att++) m_numValues[att] = dataset.attribute(att).numValues(); m_counts = new double[m_numAttributes][]; for (int att = 0; att < m_numAttributes; att++) m_counts[att] = new double[m_numValues[att]]; m_condiCounts = new double[m_numAttributes][m_numAttributes][]; for (int att1 = 0; att1 < m_numAttributes; att1++) { for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) { m_condiCounts[att1][att2] = new double[m_numValues[att1] * m_numValues[att2]]; m_condiCounts[att2][att1] = new double[m_numValues[att1] * m_numValues[att2]]; } } I = new double[m_numAttributes][m_numAttributes]; // Compute counts: for (Instance inst : dataset) { for (int att1 = 0; att1 < m_numAttributes; att1++) { m_counts[att1][(int) inst.value(att1)]++; for (int att2 = att1 + 1; att2 < m_numAttributes; att2++) { m_condiCounts[att1][att2][(int) (inst.value(att1) * m_numValues[att2] + inst.value(att2))]++; m_condiCounts[att2][att1][(int) (inst.value(att2) * m_numValues[att1] + inst.value(att1))]++; } } } // Compute I(X_i; X_j) for (int i = 0; i < m_numAttributes; i++) { for (int j = 0; j < m_numAttributes; j++) { if (i == j) continue; double mi = 0.0; for (int v_i = 0; v_i < m_numValues[i]; v_i++) { for (int v_j = 0; v_j < m_numValues[j]; v_j++) { int condiIndex = (int) (v_i * m_numValues[j] + v_j); if ((1.0 * m_numCases * m_condiCounts[i][j][condiIndex]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j]) > 0) mi += m_condiCounts[i][j][condiIndex] * Math.log((1.0 * m_numCases * m_condiCounts[i][j][condiIndex]) / (1.0 * m_counts[i][v_i] * m_counts[j][v_j])); } } I[i][j] = mi / m_numCases; } } // Compute parent and children for every variable: PC1 = new ArrayList[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) PC1[i] = HITONPC1(i); PC = new HashSet[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) PC[i] = HITONPC2(i); }
From source file:es.jarias.FMC.HITON.java
License:Open Source License
public double computeConditionalMI(int X, int Y, Integer[] Z) { int Zcardinality = 1; int[] offsets = new int[Z.length]; for (int z = Z.length - 1; z >= 0; z--) { offsets[z] = Zcardinality;/*from www .j a v a 2 s . c o m*/ Zcardinality *= m_numValues[Z[z]]; } offsets[Z.length - 1] = 0; double conditional2Counts[][][] = new double[m_numValues[X]][m_numValues[Y]][Zcardinality]; double conditionalXCounts[][] = new double[m_numValues[X]][Zcardinality]; double conditionalYCounts[][] = new double[m_numValues[Y]][Zcardinality]; double Zcounts[] = new double[Zcardinality]; for (Instance inst : dataset) { int index = 0; for (int z = 0; z < Z.length; z++) index += inst.value(Z[z]) * offsets[z]; index += inst.value(Z[Z.length - 1]); Zcounts[index]++; conditionalXCounts[(int) inst.value(X)][index]++; conditionalYCounts[(int) inst.value(Y)][index]++; conditional2Counts[(int) inst.value(X)][(int) inst.value(Y)][index]++; } // COMPUTE I(X;Y|Z) double mi = 0.0; for (int v_z = 0; v_z < Zcardinality; v_z++) { double p_z = Zcounts[v_z] / m_numCases; double t_mi = 0.0; for (int v_x = 0; v_x < m_numValues[X]; v_x++) { for (int v_y = 0; v_y < m_numValues[Y]; v_y++) { if ((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z]) / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z]) * (conditionalYCounts[v_y][v_z] / Zcounts[v_z])) > 0) { t_mi += (conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z]) * Math.log((1.0 * conditional2Counts[v_x][v_y][v_z] / Zcounts[v_z]) / ((conditionalXCounts[v_x][v_z] / Zcounts[v_z]) * (conditionalYCounts[v_y][v_z] / Zcounts[v_z]))); } } } mi += p_z * t_mi; } return mi; }
From source file:etc.aloe.filters.AbstractRegexFilter.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }/*www . ja v a 2 s . com*/ String stringValue = instance.stringValue(stringAttributeIndex); NamedRegex[] regexFeatures = getRegexFeatures(); int numOldValues = instance.numAttributes(); int numNewFeatures = regexFeatures.length; if (countRegexLengths) { numNewFeatures = regexFeatures.length * 2; } double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } for (int i = 0; i < regexFeatures.length; i++) { Pattern pattern = regexFeatures[i].getPattern(); Matcher matches = pattern.matcher(stringValue); int count = 0; int maxLength = 0; while (matches.find()) { count++; int len = matches.group().length(); if (len > maxLength) { maxLength = len; } } int index = numOldValues + i; if (countRegexLengths) { index = numOldValues + 2 * i; } newValues[index] = count; if (countRegexLengths) { newValues[numOldValues + 2 * i + 1] = maxLength; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * * @param instance the instance to convert * * @param ArrayList<Instance> the list of instances * @return the document length//from w w w . j a v a 2 s.com */ private double convertInstancewoDocNorm(Instance instance, ArrayList<Instance> converted) { if (stringAttributeIndex < 0) { throw new IllegalStateException("String attribute index not valid"); } int numOldValues = instance.numAttributes(); double[] newValues = new double[numOldValues + m_selectedTerms.size()]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(stringAttributeIndex); double docLength = 0; HashMap<String, Integer> termMatches = m_selectedTermsTrie.countNonoverlappingMatches(stringValue); for (Map.Entry<String, Integer> entry : termMatches.entrySet()) { String term = entry.getKey(); int termIdx = m_selectedTermIndices.get(term); double matches = entry.getValue(); if (!m_OutputCounts && matches > 0) { matches = 1; } if (matches > 0) { if (m_TFTransform == true) { matches = Math.log(matches + 1); } if (m_IDFTransform == true) { matches = matches * Math.log(m_NumInstances / (double) m_DocsCounts[termIdx]); } newValues[numOldValues + termIdx] = matches; docLength += matches * matches; } } Instance result = new SparseInstance(instance.weight(), newValues); converted.add(result); return Math.sqrt(docLength); }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
/** * Normalizes given instance to average doc length (only the newly * constructed attributes)./*from ww w. java 2 s .c om*/ * * @param inst the instance to normalize * @param double the document length * @throws Exception if avg. doc length not set */ private void normalizeInstance(Instance inst, double docLength) throws Exception { if (docLength == 0) { return; } int numOldValues = getInputFormat().numAttributes(); if (m_AvgDocLength < 0) { throw new Exception("Average document length not set."); } // Normalize document vector for (int j = numOldValues; j < inst.numAttributes(); j++) { double val = inst.value(j) * m_AvgDocLength / docLength; inst.setValue(j, val); } }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
@Override protected Instance process(Instance instance) throws Exception { if (selectedAttributeIndex < 0) { throw new IllegalStateException("String attribute not set"); }//from w ww . jav a 2 s . co m int numOldValues = instance.numAttributes(); int numNewFeatures = unigrams.size() + bigrams.size(); double[] newValues = new double[numOldValues + numNewFeatures]; // Copy all attributes from input to output for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { newValues[i] = instance.value(i); } } else { if (instance.isMissing(i)) { newValues[i] = Utils.missingValue(); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(i).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(i).addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(i).addStringValue(instance.stringValue(i)); newValues[i] = newIndex; } } } String stringValue = instance.stringValue(selectedAttributeIndex); if (instance.isMissing(selectedAttributeIndex) == false) { List<String> words = tokenizeDocument(instance); Set<String> wordSet = new HashSet<String>(words); for (int i = 0; i < unigrams.size(); i++) { String unigram = unigrams.get(i); int count = 0; if (wordSet.contains(unigram)) { //Count the times the word is in the document for (int w = 0; w < words.size(); w++) { if (words.get(w).equals(unigram)) { count += 1; } } } int featureIndex = numOldValues + i; newValues[featureIndex] = count; } for (int i = 0; i < bigrams.size(); i++) { Bigram bigram = bigrams.get(i); int count = bigram.getTimesInDocument(words); int featureIndex = numOldValues + unigrams.size() + i; newValues[featureIndex] = count; } } Instance result = new SparseInstance(instance.weight(), newValues); return result; }
From source file:experimentshell.knn.java
@Override public double classifyInstance(Instance instance) throws Exception { // sorting happens automatically with a TreeMap Map<Integer, Double> distances = new TreeMap<>(); int distance = 0; // iterate through the data and determine the distance for (int i = 0; i < data.numInstances(); i++) { for (int j = 0; j < data.numAttributes(); j++) { // this is the Manhattan distance distance += abs((int) (data.instance(i).value(j) - instance.value(j))); // the Euclidean distance // distance += pow((int)(data.instance(i).value(j) - instance.value(j)), 2); }/* w ww. jav a 2 s. co m*/ distances.put(distance, data.instance(i).classValue()); distance = 0; } // find the majority class of the nearest neighbor int count = 0; double tempClass; int tally[] = new int[data.numClasses()]; // use k instances to find the majority class and assign that instance for (Map.Entry<Integer, Double> entry : distances.entrySet()) { if (count >= k) break; tempClass = entry.getValue(); tally[(int) tempClass]++; count++; } int maxIndex = 0; int majority; for (int i = 0; i < data.numClasses(); i++) { majority = tally[i]; if (majority > tally[maxIndex]) { maxIndex = i; } } return maxIndex; }
From source file:expshell.KNN.java
@Override public double classifyInstance(Instance instance) throws Exception { //Sort by using TreeMap Map<Integer, Double> distances = new TreeMap<>(); int distance = 0; //go through the data and find hte distance for (int i = 0; i < data.numInstances(); i++) { for (int j = 0; i < data.numAttributes(); j++) { //the Manhattan distance distance += abs((int) (data.instance(i).value(j) - instance.value(j))); }/*from w w w . j a va 2 s.c o m*/ distances.put(distance, data.instance(i).classValue()); distance = 0; } //find the nearest neighbor's major class int count = 0; double tempClass; int tally[] = new int[data.numClasses()]; // comment... for (Map.Entry<Integer, Double> entry : distances.entrySet()) { if (count >= k) break; tempClass = entry.getValue(); tally[(int) tempClass]++; count++; } int maxIndex = 0; int majority; for (int i = 0; i < data.numClasses(); i++) { majority = tally[i]; if (majority > tally[maxIndex]) { maxIndex = i; } } return maxIndex; }
From source file:expshell.Neural.java
public double cal(Instance ins) { double sum = 0; sum += -1 * weights.get(0);//from ww w .j a va2 s.c o m for (int i = 1; i < weights.size(); i++) { sum += ins.value(i - 1) * weights.get(i); } List<Double> temp = new ArrayList<Double>(); for (int i = 0; i < ins.numValues(); i++) { temp.add(ins.value(i)); } values = temp; //if (sum > 0) // return 1.0; return threshold(sum); }