List of usage examples for weka.core Instances variance
publicdouble variance(Attribute att)
From source file:de.ugoe.cs.cpdp.dataprocessing.NormalizationUtil.java
License:Apache License
/** * <p>//from w w w . ja v a 2 s. c o m * Z-Score normalization using the mean and std of the test data (N4 in Transfer Defect Learning * by Nam et al.). * </p> * * @param testdata * test data of the target product * @param traindataSet * training data */ public static void zScoreTarget(Instances testdata, SetUniqueList<Instances> traindataSet) { final double[] mean = new double[testdata.numAttributes()]; final double[] std = new double[testdata.numAttributes()]; // get means of testdata for (int j = 0; j < testdata.numAttributes(); j++) { if (testdata.classIndex() != j) { mean[j] = testdata.meanOrMode(j); std[j] = Math.sqrt(testdata.variance(j)); } } applyZScore(testdata, mean, std); for (Instances traindata : traindataSet) { applyZScore(traindata, mean, std); } }
From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./*from w w w .ja v a 2 s. c om*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); }
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private void makeTree(Instances data, java.util.Random r, int depth) throws Exception { if (m_K > data.numAttributes()) { m_K = data.numAttributes() - 1;/*w ww. ja va 2 s.co m*/ } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } int[] randAtts = new int[data.numAttributes() - 1]; //TODO: handle class target att for (int i = 0; i < randAtts.length; i++) { randAtts[i] = i; } for (int i = 0; i < randAtts.length; i++) { int randomPosition = r.nextInt(randAtts.length); int temp = randAtts[i]; randAtts[i] = randAtts[randomPosition]; randAtts[randomPosition] = temp; } int bestAttIndex = -1; AttScorePair[] attScorePair = new AttScorePair[m_K]; //double currentR2 = estimateAvgDistanceSpearman(data); for (int i = 0; i < m_K; i++) { int attIndex = randAtts[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double r2 = estimateR2(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, r2); } Arrays.sort(attScorePair); bestAttIndex = attScorePair[0].index; double maxR2 = attScorePair[0].score; boolean stop1 = false; // for (int kk = 0; kk < attScorePair.length; kk++) { // System.out.println(attScorePair[kk].score); // } // if (true) { // throw new Exception("stop"); // } if (attScorePair[0].score <= attScorePair[m_K - 1].score) { stop1 = true; } if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) //|| maxR2 <= 0.01 // removed 10/01/2013 || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this || data.variance(bestAttIndex) <= 0) { m_Attribute = null; m_Prototype = AbstractRanker.getAvgRanking(data); //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod); return; } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); m_Successors = new BinaryART[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new BinaryART(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumObjects(m_NumObjects); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:gr.iti.mklab.visual.quantization.SimpleKMeansWithOutput.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via * options.// ww w . jav a 2s .c o m * * @param data * set of instances serving as training data * @throws Exception * if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) m_Assignments = clusterAssignments; m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) initInstances = new Instances(instances); else initInstances = instances; if (m_initializeWithKMeansPlusPlus) { kMeansPlusPlusInit(initInstances); } else { for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; startExecutorPool(); long start = System.currentTimeMillis(); while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; System.out.print(new Date() + ": " + "Iter " + m_Iterations + " "); if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } } else { converged = launchAssignToClusters(instances, clusterAssignments); } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true, true); } } } else { emptyClusterCount = launchMoveCentroids(tempI); } if (m_Iterations == m_MaxIterations) converged = true; if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index++] = tempI[k]; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } System.out.println("Sum of within cluster distances: " + Utils.sum(m_squaredErrors)); // reset erros to zero m_squaredErrors = new double[m_NumClusters]; } long end = System.currentTimeMillis(); System.out.println("\nClustering completed in " + (end - start) + " ms and converged in " + m_Iterations + " iterations"); // calculate errors if (!m_FastDistanceCalc) { for (i = 0; i < instances.numInstances(); i++) { clusterProcessedInstance(instances.instance(i), true, false); } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } m_executorPool.shutdown(); }
From source file:mlda.attributes.MeanSkewnessNumericAttributes.java
License:Open Source License
/** * Calculate metric value/*ww w .j a va 2s. co m*/ * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int nInstances = mlData.getNumInstances(); Set<Attribute> attributesSet = mlData.getFeatureAttributes(); int nNumeric = 0; double mean = 0; double avg; double var; double stdev; for (Attribute att : attributesSet) { if (att.isNumeric()) { nNumeric++; avg = instances.meanOrMode(att); var = 0; for (Instance inst : instances) { var += Math.pow(inst.value(att) - avg, 3); } stdev = Math.sqrt(instances.variance(att)); mean += nInstances * var / ((nInstances - 1) * (nInstances - 2) * Math.pow(stdev, 3)); } } if (nNumeric > 0) { this.value = mean / nNumeric; } else { this.value = Double.NaN; } //this.value = mean; return value; }
From source file:mlda.attributes.MeanStdvNumericAttributes.java
License:Open Source License
/** * Calculate metric value// ww w. j a va 2s . co m * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { double mean = 0; int nNumeric = 0; Instances instances = mlData.getDataSet(); Set<Attribute> attributeSet = mlData.getFeatureAttributes(); for (Attribute att : attributeSet) { if (att.isNumeric()) { nNumeric++; mean += Math.sqrt(instances.variance(att)); } } if (nNumeric > 0) { this.value = mean / nNumeric; } else { this.value = Double.NaN; } //this.value = mean; return value; }
From source file:mlda.attributes.ProportionNumericAttributesWithOutliers.java
License:Open Source License
/** * Calculate metric value/*from w w w .ja va2 s . co m*/ * * @param mlData Multi-label dataset to which calculate the metric * @return Value of the metric */ public double calculate(MultiLabelInstances mlData) { Instances instances = mlData.getDataSet(); int nInstances = mlData.getNumInstances(); double alpha = 0.05; int numToTrimAtSide = (int) (nInstances * alpha / 2); int nNumeric = 0; int nOutliers = 0; Set<Attribute> attributeSet = mlData.getFeatureAttributes(); double variance, varianceTrimmed; double[] values; double[] trimmed = new double[nInstances - (numToTrimAtSide * 2)]; double ratio; for (Attribute att : attributeSet) { if (att.isNumeric()) { nNumeric++; variance = instances.variance(att); values = instances.attributeToDoubleArray(att.index()); Arrays.sort(values); for (int i = 0; i < trimmed.length; i++) { trimmed[i] = values[i + numToTrimAtSide]; } varianceTrimmed = Utils.variance(trimmed); ratio = varianceTrimmed / variance; if (ratio < 0.7) { nOutliers++; } } } if (nNumeric > 0) { this.value = ((double) nOutliers) / nNumeric; } else { this.value = Double.NaN; } return value; }
From source file:org.isep.simizer.example.policy.utils.IterativeSimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./* w w w. j a va 2 s.c om*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common value } } } // Modified to account for already set centroids if (m_ClusterCentroids == null) { m_ClusterCentroids = new Instances(instances, m_NumClusters); } int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } // Modified to account for already set centroids if (m_ClusterCentroids.numInstances() > 0) { initC = this.centersMap; for (int i = 0; i < m_NumClusters; i++) initInstances.add(m_ClusterCentroids.instance(i)); } else { //part de la fin du Data Set. swappe le centre identifi avec la derniere for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } } m_NumClusters = m_ClusterCentroids.numInstances(); //removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index++] = tempI[k]; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } }
From source file:org.openml.webapplication.fantail.dc.statistical.Statistical.java
License:Open Source License
@Override public Map<String, Double> characterize(Instances instances) { int attrib_count = instances.numAttributes() - 1, numeric_count = 0; for (int i = 0; i < attrib_count; i++) { if (instances.attribute(i).isNumeric()) { numeric_count++;//w w w. j a va 2 s .c om final double mean = instances.meanOrMode(i); final double stddev = Math.sqrt(instances.variance(i)); final double kurtosis = findKurtosis(instances, mean, stddev, i); final double skewness = findSkewness(instances, mean, stddev, i); meanList.add(mean); stdDevList.add(stddev); kurtosisList.add(kurtosis); skewnessList.add(skewness); } } if (0 == numeric_count) { Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], 0.0); qualities.put(ids[1], 0.0); qualities.put(ids[2], 0.0); qualities.put(ids[3], 0.0); qualities.put(ids[4], 0.0); qualities.put(ids[5], 0.0); qualities.put(ids[6], 0.0); qualities.put(ids[7], 0.0); qualities.put(ids[8], 0.0); qualities.put(ids[9], 0.0); qualities.put(ids[10], 0.0); qualities.put(ids[11], 0.0); qualities.put(ids[12], 0.0); qualities.put(ids[13], 0.0); qualities.put(ids[14], 0.0); qualities.put(ids[15], 0.0); qualities.put(ids[16], 0.0); qualities.put(ids[17], 0.0); qualities.put(ids[18], 0.0); qualities.put(ids[19], 0.0); qualities.put(ids[20], 0.0); qualities.put(ids[21], 0.0); qualities.put(ids[22], 0.0); qualities.put(ids[23], 0.0); return qualities; } else { double[] meansArray = ArrayUtils.toPrimitive(meanList.toArray(new Double[numeric_count])); double[] stdDevsArray = ArrayUtils.toPrimitive(stdDevList.toArray(new Double[numeric_count])); double[] kurtosisArray = ArrayUtils.toPrimitive(kurtosisList.toArray(new Double[numeric_count])); double[] skewnessArray = ArrayUtils.toPrimitive(skewnessList.toArray(new Double[numeric_count])); Map<String, Double> qualities = new HashMap<String, Double>(); qualities.put(ids[0], StatUtils.mean(meansArray)); qualities.put(ids[1], StatUtils.mean(stdDevsArray)); qualities.put(ids[2], StatUtils.mean(kurtosisArray)); qualities.put(ids[3], StatUtils.mean(skewnessArray)); qualities.put(ids[4], StatUtils.min(meansArray)); qualities.put(ids[5], StatUtils.min(stdDevsArray)); qualities.put(ids[6], StatUtils.min(kurtosisArray)); qualities.put(ids[7], StatUtils.min(skewnessArray)); qualities.put(ids[8], StatUtils.max(meansArray)); qualities.put(ids[9], StatUtils.max(stdDevsArray)); qualities.put(ids[10], StatUtils.max(kurtosisArray)); qualities.put(ids[11], StatUtils.max(skewnessArray)); qualities.put(ids[12], StatUtils.percentile(meansArray, 25)); qualities.put(ids[13], StatUtils.percentile(stdDevsArray, 25)); qualities.put(ids[14], StatUtils.percentile(kurtosisArray, 25)); qualities.put(ids[15], StatUtils.percentile(skewnessArray, 25)); qualities.put(ids[16], StatUtils.percentile(meansArray, 50)); qualities.put(ids[17], StatUtils.percentile(stdDevsArray, 50)); qualities.put(ids[18], StatUtils.percentile(kurtosisArray, 50)); qualities.put(ids[19], StatUtils.percentile(skewnessArray, 50)); qualities.put(ids[20], StatUtils.percentile(meansArray, 75)); qualities.put(ids[21], StatUtils.percentile(stdDevsArray, 75)); qualities.put(ids[22], StatUtils.percentile(kurtosisArray, 75)); qualities.put(ids[23], StatUtils.percentile(skewnessArray, 75)); return qualities; } }
From source file:org.vimarsha.utils.impl.ArffAttributeInfoExtractor.java
License:Open Source License
/** * Returns a table model with the attribute related info when the selected attribute index is passed. * * @param index index of the attribute/* ww w .j a va 2s .c om*/ * @return DefaultTableModel */ public DefaultTableModel getArffAttributeInfo(int index) { DefaultTableModel defaultTableModel = new DefaultTableModel(); Instances temp = this.arffData; //since kthSmallestValue cannot handle missing values, they need to be removed. temp.deleteWithMissing(index); ArrayList<String> tmp = new ArrayList<String>(); defaultTableModel.addColumn("Statistics", new String[] { "Name", "Variance", "Min", "Max", "Mean" }); tmp.add(temp.attribute(index).name()); tmp.add(String.valueOf(temp.variance(index))); tmp.add(String.valueOf(temp.kthSmallestValue(index, 1))); //min value is the 1st smallest value tmp.add(String.valueOf(temp.kthSmallestValue(index, temp.numInstances()))); //max value is the last smallest value tmp.add(String.valueOf(temp.meanOrMode(index))); defaultTableModel.addColumn("Value", tmp.toArray()); return defaultTableModel; }