List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:cerebro.Id3.java
License:Open Source License
/** * Computes class distribution for instance using decision tree. * * @param instance the instance for which distribution is to be computed * @return the class distribution for the given instance * @throws NoSupportForMissingValuesException if instance has missing values *//*from w ww . j av a 2 s.co m*/ public double[] distributionForInstance(Instance instance) throws NoSupportForMissingValuesException { if (instance.hasMissingValue()) { throw new NoSupportForMissingValuesException("Id3: no missing values, " + "please."); } if (m_Attribute == null) { return m_Distribution; } else { return m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance); } }
From source file:cerebro.Id3.java
License:Open Source License
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *///ww w . j ava 2 s . c o m private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:CGLSMethod.LinearRegression.java
License:Open Source License
/** * Calculate the dependent value for a given instance for a * given regression model./*from w ww . j a v a2 s. co m*/ * * @param transformedInstance the input instance * @param selectedAttributes an array of flags indicating which * attributes are included in the regression model * @param coefficients an array of coefficients for the regression * model * @return the regression value for the instance. * @throws Exception if the class attribute of the input instance * is not assigned */ private double regressionPrediction(Instance transformedInstance, boolean[] selectedAttributes, double[] coefficients) throws Exception { double result = 0; int column = 0; for (int j = 0; j < transformedInstance.numAttributes(); j++) { if ((m_ClassIndex != j) && (selectedAttributes[j])) { result += coefficients[column] * transformedInstance.value(j); column++; } } result += coefficients[column]; return result; }
From source file:CGLSMethod.LinearRegression.java
License:Open Source License
/** * Calculate a linear regression using the selected attributes * * @param selectedAttributes an array of booleans where each element * is true if the corresponding attribute should be included in the * regression./*from ww w. java 2 s . com*/ * @return an array of coefficients for the linear regression model. * @throws Exception if an error occurred during the regression. */ private double[] doRegression(boolean[] selectedAttributes) throws Exception { if (b_Debug) { System.out.print("doRegression("); for (int i = 0; i < selectedAttributes.length; i++) { System.out.print(" " + selectedAttributes[i]); } System.out.println(" )"); } int numAttributes = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { numAttributes++; } } // Check whether there are still attributes left Matrix independent = null, dependent = null; double[] weights = null; if (numAttributes > 0) { independent = new Matrix(m_TransformedData.numInstances(), numAttributes); dependent = new Matrix(m_TransformedData.numInstances(), 1); for (int i = 0; i < m_TransformedData.numInstances(); i++) { Instance inst = m_TransformedData.instance(i); int column = 0; for (int j = 0; j < m_TransformedData.numAttributes(); j++) { if (j == m_ClassIndex) { dependent.setElement(i, 0, inst.classValue()); } else { if (selectedAttributes[j]) { double value = inst.value(j) - m_Means[j]; // We only need to do this if we want to // scale the input if (!m_checksTurnedOff) { value /= m_StdDevs[j]; } independent.setElement(i, column, value); column++; } } } } // Grab instance weights weights = new double[m_TransformedData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = m_TransformedData.instance(i).weight(); } } // Compute coefficients (note that we have to treat the // intercept separately so that it doesn't get affected // by the ridge constant.) double[] coefficients = new double[numAttributes + 1]; if (numAttributes > 0) { double[] coeffsWithoutIntercept = independent.regression(dependent, weights, m_Ridge); System.arraycopy(coeffsWithoutIntercept, 0, coefficients, 0, numAttributes); } coefficients[numAttributes] = m_ClassMean; // Convert coefficients into original scale int column = 0; for (int i = 0; i < m_TransformedData.numAttributes(); i++) { if ((i != m_TransformedData.classIndex()) && (selectedAttributes[i])) { // We only need to do this if we have scaled the // input. if (!m_checksTurnedOff) { coefficients[column] /= m_StdDevs[i]; } // We have centred the input coefficients[coefficients.length - 1] -= coefficients[column] * m_Means[i]; column++; } } return coefficients; }
From source file:classes.AbdoAgglomerativeClusterer.java
/** * calculates the with-in-cluster variance as the sum squares of differences * between instances contained in the cluster and its mean divided by * cluster size//from w w w . j a va 2 s . c o m * * @param cluster * @return */ double calcWithinVariance(Vector<Integer> cluster) { double variance = 0.0; double[] fValues1 = new double[m_instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { Instance instance = m_instances.instance(cluster.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] += instance.value(j); } } for (int j = 0; j < m_instances.numAttributes(); j++) { fValues1[j] /= cluster.size(); } // set up two instances for distance function Instance centroid = (Instance) m_instances.instance(cluster.elementAt(0)).copy(); for (int j = 0; j < m_instances.numAttributes(); j++) { centroid.setValue(j, fValues1[j]); } for (int i = 0; i < cluster.size(); i++) { double temp = 0; Instance instance = m_instances.instance(cluster.elementAt(i)); for (int j = 0; j < m_instances.numAttributes(); j++) { temp += (instance.value(j) - centroid.value(j)); } variance += temp * temp; } return variance / cluster.size(); }
From source file:classif.Prototyper.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { trainingData = data;/*from w w w . j a v a 2s. com*/ Attribute classAttribute = data.classAttribute(); prototypes = new ArrayList<>(); classedData = new HashMap<String, ArrayList<Sequence>>(); indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>(); for (int c = 0; c < data.numClasses(); c++) { classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>()); indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>()); } sequences = new Sequence[data.numInstances()]; classMap = new String[sequences.length]; for (int i = 0; i < sequences.length; i++) { Instance sample = data.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); } sequences[i] = new Sequence(sequence); String clas = sample.stringValue(classAttribute); classMap[i] = clas; classedData.get(clas).add(sequences[i]); indexClassedDataInFullData.get(clas).add(i); // System.out.println("Element "+i+" of train is classed "+clas+" and went to element "+(indexClassedDataInFullData.get(clas).size()-1)); } buildSpecificClassifier(data); if (fillPrototypes) addMissingPrototypesRandom(); }
From source file:classif.Prototyper.java
License:Open Source License
public double classifyInstance(Instance sample) throws Exception { // transform instance to sequence MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); }// w w w. j a v a2 s .c om Sequence seq = new Sequence(sequence); double minD = Double.MAX_VALUE; String classValue = null; for (ClassedSequence s : prototypes) { double tmpD = seq.distance(s.sequence); if (tmpD < minD) { minD = tmpD; classValue = s.classValue; } } // System.out.println(prototypes.size()); return sample.classAttribute().indexOfValue(classValue); }
From source file:classif.Prototyper.java
License:Open Source License
public static ClassedSequence[] convertWekaSetToClassedSequence(Instances test) { Attribute classAttribute = test.classAttribute(); ClassedSequence[] testSequences = new ClassedSequence[test.numInstances()]; for (int i = 0; i < testSequences.length; i++) { Instance sample = test.instance(i); MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); }//from w w w .j a v a2 s. c o m String clas = sample.stringValue(classAttribute); testSequences[i] = new ClassedSequence(new Sequence(sequence), clas); } return testSequences; }
From source file:classif.PrototyperEUC.java
License:Open Source License
@Override public double classifyInstance(Instance sample) throws Exception { // transform instance to sequence MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1]; int shift = (sample.classIndex() == 0) ? 1 : 0; for (int t = 0; t < sequence.length; t++) { sequence[t] = new MonoDoubleItemSet(sample.value(t + shift)); }//ww w .j a v a 2 s .co m Sequence seq = new Sequence(sequence); double minD = Double.MAX_VALUE; String classValue = null; for (ClassedSequence s : prototypes) { double tmpD = seq.distanceEuc(s.sequence); if (tmpD < minD) { minD = tmpD; classValue = s.classValue; } } // System.out.println(prototypes.size()); // System.out.println(classValue); return sample.classAttribute().indexOfValue(classValue); }
From source file:classifier.CustomStringToWordVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * /*from w w w . ja va2 s. com*/ * @oaram instance the instance to convert * @param v * @return the conerted instance */ private int convertInstancewoDocNorm(Instance instance, FastVector v) { // Convert the instance into a sorted set of indexes TreeMap contained = new TreeMap(); // Copy all non-converted attributes from input to output int firstCopy = 0; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { contained.put(new Integer(firstCopy), new Double(instance.value(i))); } } else { if (instance.isMissing(i)) { contained.put(new Integer(firstCopy), new Double(Utils.missingValue())); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(firstCopy) .addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(firstCopy) .addStringValue(instance.stringValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } } firstCopy++; } } for (int j = 0; j < instance.numAttributes(); j++) { // if ((getInputFormat().attribute(j).type() == Attribute.STRING) if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { m_Tokenizer.tokenize(instance.stringValue(j)); while (m_Tokenizer.hasMoreElements()) { String word = (String) m_Tokenizer.nextElement(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); Integer index = (Integer) m_Dictionary.get(word); if (index != null) { if (m_OutputCounts) { // Separate if here rather than // two lines down to avoid // hashtable lookup Double count = (Double) contained.get(index); if (count != null) { contained.put(index, new Double(count.doubleValue() + 1.0)); } else { contained.put(index, new Double(1)); } } else { contained.put(index, new Double(1)); } } } } } // Doing TFTransform if (m_TFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = Math.log(val + 1); contained.put(index, new Double(val)); } } } // Doing IDFTransform if (m_IDFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]); contained.put(index, new Double(val)); } } } // Convert the set to structures needed to create a sparse instance. double[] values = new double[contained.size()]; int[] indices = new int[contained.size()]; Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); Double value = (Double) contained.get(index); values[i] = value.doubleValue(); indices[i] = index.intValue(); } Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes()); inst.setDataset(outputFormatPeek()); v.addElement(inst); return firstCopy; }