List of usage examples for weka.core Instance value
public double value(Attribute att);
From source file:org.scripps.branch.classifier.ManualTree.java
License:Open Source License
/** * Splits instances into subsets based on the given split. * // w ww. ja v a 2 s . co m * @param data * the data to work with * @return the subsets of instances * @throws Exception * if something goes wrong */ protected Instances[] splitData(Instances data) throws Exception { // Allocate array of Instances objects Instances[] subsets = new Instances[m_Prop.length]; for (int i = 0; i < m_Prop.length; i++) { subsets[i] = new Instances(data, data.numInstances()); } if (m_Attribute >= data.numAttributes()) { if (m_Attribute >= listOfFc.size() + data.numAttributes() - 1) { CustomSet cSet = getReqCustomSet(m_Attribute - (data.numAttributes() - 1 + listOfFc.size()), cSetList); JsonNode vertices = mapper.readTree(cSet.getConstraints()); ArrayList<double[]> attrVertices = generateVerticesList(vertices); List<Attribute> aList = generateAttributeList(cSet, data, d); double[] testPoint = new double[2]; int ctr = 0; for (int k = 0; k < data.numInstances(); k++) { ctr = 0; for (Attribute a : aList) { testPoint[ctr] = data.instance(k).value(a); ctr++; } int check = checkPointInPolygon(attrVertices, testPoint); subsets[check].add(data.instance(k)); continue; } } else { Classifier fc; double predictedClass; // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); String classifierId = getKeyinMap(listOfFc, m_Attribute, data); fc = listOfFc.get(classifierId); predictedClass = fc.classifyInstance(inst); if (predictedClass != Instance.missingValue()) { subsets[(int) predictedClass].add(inst); continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } } } else { // Go through the data for (int i = 0; i < data.numInstances(); i++) { // Get instance Instance inst = data.instance(i); // Does the instance have a missing value? if (inst.isMissing(m_Attribute)) { // Split instance up for (int k = 0; k < m_Prop.length; k++) { if (m_Prop[k] > 0) { Instance copy = (Instance) inst.copy(); copy.setWeight(m_Prop[k] * inst.weight()); subsets[k].add(copy); } } // Proceed to next instance continue; } // Do we have a nominal attribute? if (data.attribute(m_Attribute).isNominal()) { subsets[(int) inst.value(m_Attribute)].add(inst); // Proceed to next instance continue; } // Do we have a numeric attribute? if (data.attribute(m_Attribute).isNumeric()) { subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst); // Proceed to next instance continue; } // Else throw an exception throw new IllegalArgumentException("Unknown attribute type"); } } // Save memory for (int i = 0; i < m_Prop.length; i++) { subsets[i].compactify(); } // Return the subsets return subsets; }
From source file:org.sr.recognition.paleo.paleoNN.PaleoTrainer.java
License:BSD License
/** * Converts ARFF instances into a format readable by LibSVM and outputs it * to file/* w ww. j av a 2 s . c o m*/ * * @param filename * output file name * @param data * ARFF instances (the data) * @throws IOException * if output fails */ public static void libSVMToFile(String filename, Instances data) throws IOException { BufferedWriter writer = new BufferedWriter(new FileWriter(filename)); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); double label = inst.value(inst.numValues() - 1); writer.write(label + "\t"); for (int a = 0; a < inst.numValues() - 1; a++) { double value = inst.value(a); if (Double.isInfinite(value) || Double.isNaN(value)) value = mean(data.attributeToDoubleArray(a)); writer.write((a + 1) + ":" + value); if (a < inst.numValues() - 2) writer.write(" "); } writer.newLine(); } }
From source file:org.stream_gpu.float_knn.float_search.EuclideanDistance.java
License:Open Source License
/** * Returns true if the value of the given dimension is smaller or equal the * value to be compared with.//from w w w . ja v a2 s. c o m * * @param instance the instance where the value should be taken of * @param dim the dimension of the value * @param value the value to compare with * @return true if value of instance is smaller or equal value */ public boolean valueIsSmallerEqual(Instance instance, int dim, float value) { //This stays return instance.value(dim) <= value; }
From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java
License:Open Source License
/** * Used to initialize the ranges. For this the values of the first * instance is used to save time.//from www. j ava2 s . c o m * Sets low and high to the values of the first instance and * width to zero. * * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void updateRangesFirst(Instance instance, int numAtt, float[][] ranges) { for (int j = 0; j < numAtt; j++) { if (!instance.isMissing(j)) { ranges[j][R_MIN] = (float) instance.value(j); ranges[j][R_MAX] = (float) instance.value(j); ranges[j][R_WIDTH] = 0.0F; } else { // if value was missing ranges[j][R_MIN] = Float.POSITIVE_INFINITY; ranges[j][R_MAX] = -Float.POSITIVE_INFINITY; ranges[j][R_WIDTH] = Float.POSITIVE_INFINITY; } } }
From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java
License:Open Source License
/** * Updates the minimum and maximum and width values for all the attributes * based on a new instance./*from w w w. j av a 2s . c om*/ * * @param instance the new instance * @param numAtt number of attributes in the model * @param ranges low, high and width values for all attributes */ public void updateRanges(Instance instance, int numAtt, float[][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < numAtt; j++) { float value = (float) instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; if (value > ranges[j][R_MAX]) { //if this is the first value that is ranges[j][R_MAX] = value; //not missing. The,0 ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } else { if (value > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } }
From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java
License:Open Source License
/** * Updates the ranges given a new instance. * //from w w w.j a v a 2 s .c om * @param instance the new instance * @param ranges low, high and width values for all attributes * @return the updated ranges */ public float[][] updateRanges(Instance instance, float[][] ranges) { // updateRangesFirst must have been called on ranges for (int j = 0; j < ranges.length; j++) { float value = (float) instance.value(j); if (!instance.isMissing(j)) { if (value < ranges[j][R_MIN]) { ranges[j][R_MIN] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } else { if (instance.value(j) > ranges[j][R_MAX]) { ranges[j][R_MAX] = value; ranges[j][R_WIDTH] = ranges[j][R_MAX] - ranges[j][R_MIN]; } } } } return ranges; }
From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java
License:Open Source License
/** * Test if an instance is within the given ranges. * /*from w w w . j a va 2s . c om*/ * @param instance the instance * @param ranges the ranges the instance is tested to be in * @return true if instance is within the ranges */ public boolean inRanges(Instance instance, float[][] ranges) { boolean isIn = true; // updateRangesFirst must have been called on ranges for (int j = 0; isIn && (j < ranges.length); j++) { if (!instance.isMissing(j)) { float value = (float) instance.value(j); isIn = value <= ranges[j][R_MAX]; if (isIn) isIn = value >= ranges[j][R_MIN]; } } return isIn; }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.OutlierHandler.java
License:Apache License
/** * Method to replace the detected outlier values. * * @throws Exception the exception//from ww w . j ava 2 s . co m */ public void replaceOutliers() throws Exception { Instances inputData, outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); inputData = fileLoader.getDataSet(); this.setInputFormat(inputData); outputData = Filter.useFilter(inputData, this); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; double[] outlier_AttributeValues = null; double[] extreme_AttributeValues = null; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // access instance Instance tempInstance = outputData.instance(instanceIndex); for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } // detect the outlier values using Interquartile approach if (this.isOutlier(tempInstance, m_AttributeIndices[attributeIndex])) { double outlierValue = tempInstance.value(attributeIndex); int outlierColumnIndex = attributeIndex; double sum = 0.0; outlier_AttributeValues = outputData.attributeToDoubleArray(outlierColumnIndex); for (int i = 0; i < outlier_AttributeValues.length; i++) { sum = sum + outlier_AttributeValues[i]; } sum = sum - outlierValue; double replacedValue = sum / (outlier_AttributeValues.length - 1); replacedValue = Math.round(replacedValue * 100D) / 100D; // replace the outliers with attribute mean values outputData.instance(instanceIndex).setValue(outlierColumnIndex, replacedValue); } // extreme value? if (this.isExtremeValue(tempInstance, m_AttributeIndices[attributeIndex])) { double extremeValue = tempInstance.value(attributeIndex); int extremeColumnIndex = attributeIndex; double sum = 0.0; extreme_AttributeValues = outputData.attributeToDoubleArray(extremeColumnIndex); for (int i = 0; i < extreme_AttributeValues.length; i++) { sum = sum + extreme_AttributeValues[i]; } sum = sum - extremeValue; double replacedValue = sum / (extreme_AttributeValues.length - 1); replacedValue = Math.round(replacedValue * 100D) / 100D; outputData.instance(instanceIndex).setValue(extremeColumnIndex, replacedValue); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveConsistentData(inputFile, outputData); }
From source file:org.wikipedia.miner.annotation.Disambiguator.java
License:Open Source License
@SuppressWarnings("unchecked") private void weightTrainingInstances() { double positiveInstances = 0; double negativeInstances = 0; Enumeration<Instance> e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = (Instance) e.nextElement(); double isValidSense = i.value(3); if (isValidSense == 0) positiveInstances++;/*w ww. j a va 2 s.c o m*/ else negativeInstances++; } double p = (double) positiveInstances / (positiveInstances + negativeInstances); e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = (Instance) e.nextElement(); double isValidSense = i.value(3); if (isValidSense == 0) i.setWeight(0.5 * (1.0 / p)); else i.setWeight(0.5 * (1.0 / (1 - p))); } }
From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java
License:Open Source License
@SuppressWarnings("unchecked") private void weightTrainingInstances() { double positiveInstances = 0; double negativeInstances = 0; Enumeration<Instance> e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = e.nextElement(); double isValidSense = i.value(attributes.size() - 1); if (isValidSense == 0) positiveInstances++;/*from w w w . j a v a 2 s . c o m*/ else negativeInstances++; } double p = (double) positiveInstances / (positiveInstances + negativeInstances); System.out.println("stats: positive=" + p + ", negative=" + (1 - p)); e = trainingData.enumerateInstances(); while (e.hasMoreElements()) { Instance i = e.nextElement(); double isLinked = i.value(attributes.size() - 1); if (isLinked == 0) i.setWeight(0.5 * (1.0 / p)); else i.setWeight(0.5 * (1.0 / (1 - p))); } }