List of usage examples for weka.core Instances meanOrMode
publicdouble meanOrMode(Attribute att)
From source file:adams.data.instancesanalysis.pls.AbstractMultiClassPLS.java
License:Open Source License
/** * Preprocesses the data.//w w w . j a v a 2 s .c o m * * @param instances the data to process * @return the preprocessed data */ protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception { Map<Integer, double[]> classValues; int i; int index; switch (m_PredictionType) { case ALL: classValues = null; break; default: classValues = new HashMap<>(); for (i = 0; i < m_ClassAttributeIndices.size(); i++) { index = m_ClassAttributeIndices.get(i); classValues.put(index, instances.attributeToDoubleArray(index)); } } if (classValues != null) params.put(PARAM_CLASSVALUES, classValues); if (!isInitialized()) { if (m_ReplaceMissing) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(instances); } else { m_Missing = null; } m_ClassMean = new HashMap<>(); m_ClassStdDev = new HashMap<>(); for (i = 0; i < m_ClassAttributeIndices.size(); i++) { index = m_ClassAttributeIndices.get(i); switch (m_PreprocessingType) { case CENTER: m_ClassMean.put(index, instances.meanOrMode(index)); m_ClassStdDev.put(index, 1.0); m_Filter = new Center(); ((Center) m_Filter).setIgnoreClass(true); break; case STANDARDIZE: m_ClassMean.put(index, instances.meanOrMode(index)); m_ClassStdDev.put(index, StrictMath.sqrt(instances.variance(index))); m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); break; case NONE: m_ClassMean.put(index, 0.0); m_ClassStdDev.put(index, 1.0); m_Filter = null; break; default: throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType); } } if (m_Filter != null) m_Filter.setInputFormat(instances); } // filter data if (m_Missing != null) instances = Filter.useFilter(instances, m_Missing); if (m_Filter != null) instances = Filter.useFilter(instances, m_Filter); return instances; }
From source file:adams.data.instancesanalysis.pls.AbstractSingleClassPLS.java
License:Open Source License
/** * Preprocesses the data.//from www . jav a 2 s . c o m * * @param instances the data to process * @return the preprocessed data */ protected Instances preTransform(Instances instances, Map<String, Object> params) throws Exception { double[] classValues; switch (m_PredictionType) { case ALL: classValues = null; break; default: classValues = instances.attributeToDoubleArray(instances.classIndex()); } if (classValues != null) params.put(PARAM_CLASSVALUES, classValues); if (!isInitialized()) { if (m_ReplaceMissing) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(instances); } else { m_Missing = null; } switch (m_PreprocessingType) { case CENTER: m_ClassMean = instances.meanOrMode(instances.classIndex()); m_ClassStdDev = 1; m_Filter = new Center(); ((Center) m_Filter).setIgnoreClass(true); break; case STANDARDIZE: m_ClassMean = instances.meanOrMode(instances.classIndex()); m_ClassStdDev = StrictMath.sqrt(instances.variance(instances.classIndex())); m_Filter = new Standardize(); ((Standardize) m_Filter).setIgnoreClass(true); break; case NONE: m_ClassMean = 0; m_ClassStdDev = 1; m_Filter = null; break; default: throw new IllegalStateException("Unhandled preprocessing type; " + m_PreprocessingType); } if (m_Filter != null) m_Filter.setInputFormat(instances); } // filter data if (m_Missing != null) instances = Filter.useFilter(instances, m_Missing); if (m_Filter != null) instances = Filter.useFilter(instances, m_Filter); return instances; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * // w w w . j a v a 2 s . c o m * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { // The centroid is the mean point. If the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:aw_cluster.myKMeans.java
protected double[] moveCentroid(Instances members) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { vals[j] = members.meanOrMode(j); }/* ww w .j ava 2s.c o m*/ centroid.add(new Instance(1.0, vals)); return vals; }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
private void EM_Init(Instances inst) throws Exception { int i, j, k;//from w w w . j av a2s .c o m // run k means 10 times and choose best solution SimpleKMeans bestK = null; double bestSqE = Double.MAX_VALUE; for (i = 0; i < 10; i++) { SimpleKMeans sk = new SimpleKMeans(); sk.setSeed(m_rr.nextInt()); sk.setNumClusters(m_num_clusters); sk.setDisplayStdDevs(true); sk.buildClusterer(inst); if (sk.getSquaredError() < bestSqE) { bestSqE = sk.getSquaredError(); bestK = sk; } } // initialize with best k-means solution m_num_clusters = bestK.numberOfClusters(); m_weights = new double[inst.numInstances()][m_num_clusters]; m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; m_modelNormal = new double[m_num_clusters][m_num_attribs][3]; m_priors = new double[m_num_clusters]; Instances centers = bestK.getClusterCentroids(); Instances stdD = bestK.getClusterStandardDevs(); double[][][] nominalCounts = bestK.getClusterNominalCounts(); double[] clusterSizes = bestK.getClusterSizes(); for (i = 0; i < m_num_clusters; i++) { Instance center = centers.instance(i); for (j = 0; j < m_num_attribs; j++) { if (inst.attribute(j).isNominal()) { m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true); for (k = 0; k < inst.attribute(j).numValues(); k++) { m_model[i][j].addValue(k, nominalCounts[i][j][k]); } } else { double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev; double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j); m_modelNormal[i][j][0] = mean; double stdv = (stdD.instance(i).isMissing(j)) ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters)) : stdD.instance(i).value(j); if (stdv < minStdD) { stdv = inst.attributeStats(j).numericStats.stdDev; if (Double.isInfinite(stdv)) { stdv = minStdD; } if (stdv < minStdD) { stdv = minStdD; } } if (stdv <= 0) { stdv = m_minStdDev; } m_modelNormal[i][j][1] = stdv; m_modelNormal[i][j][2] = 1.0; } } } for (j = 0; j < m_num_clusters; j++) { // m_priors[j] += 1.0; m_priors[j] = clusterSizes[j]; } Utils.normalize(m_priors); }
From source file:br.ufrn.ia.core.clustering.SimpleKMeansIaProject.java
License:Open Source License
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null;/*from www . j ava 2s . c o m*/ int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the // mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) m_ClusterCentroids.add(new DenseInstance(1.0, vals)); return vals; }
From source file:CGLSMethod.LinearRegression.java
License:Open Source License
/** * Builds a regression model for the given data. * * @param data the training data to be used for generating the * linear regression function// ww w . ja v a2s . c om * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // Preprocess instances if (!m_checksTurnedOff) { m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(data); data = Filter.useFilter(data, m_MissingFilter); data.deleteWithMissingClass(); } else { m_TransformFilter = null; m_MissingFilter = null; } m_ClassIndex = data.classIndex(); m_TransformedData = data; // Turn all attributes on for a start m_SelectedAttributes = new boolean[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex) { m_SelectedAttributes[i] = true; } } m_Coefficients = null; // Compute means and standard deviations m_Means = new double[data.numAttributes()]; m_StdDevs = new double[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { m_Means[j] = data.meanOrMode(j); m_StdDevs[j] = Math.sqrt(data.variance(j)); if (m_StdDevs[j] == 0) { m_SelectedAttributes[j] = false; } } } m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex())); m_ClassMean = data.meanOrMode(m_TransformedData.classIndex()); // Perform the regression findBestModel(); // Save memory m_TransformedData = new Instances(data, 0); }
From source file:Classifier.supervised.LinearRegression.java
License:Open Source License
/** * Builds a regression model for the given data. * * @param data the training data to be used for generating the * linear regression function// w ww . ja v a2 s. c o m * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { m_ModelBuilt = false; if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); } // Preprocess instances if (!m_checksTurnedOff) { m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(data); data = Filter.useFilter(data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(data); data = Filter.useFilter(data, m_MissingFilter); data.deleteWithMissingClass(); } else { m_TransformFilter = null; m_MissingFilter = null; } m_ClassIndex = data.classIndex(); m_TransformedData = data; // Turn all attributes on for a start m_SelectedAttributes = new boolean[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != m_ClassIndex) { m_SelectedAttributes[i] = true; } } m_Coefficients = null; // Compute means and standard deviations m_Means = new double[data.numAttributes()]; m_StdDevs = new double[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { m_Means[j] = data.meanOrMode(j); m_StdDevs[j] = Math.sqrt(data.variance(j)); if (m_StdDevs[j] == 0) { m_SelectedAttributes[j] = false; } } } m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex())); m_ClassMean = data.meanOrMode(m_TransformedData.classIndex()); // Perform the regression findBestModel(); // Save memory if (m_Minimal) { m_TransformedData = null; m_Means = null; m_StdDevs = null; } else { m_TransformedData = new Instances(data, 0); } m_ModelBuilt = true; }
From source file:cn.edu.xmu.dm.d3c.clustering.SimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used.//from w ww . ja v a 2 s . co m * @param centroidIndex index of the centroid which the coordinates will be computed * @param members the objects that are assigned to the cluster of this centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { //singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) m_ClusterCentroids.add(new DenseInstance(1.0, vals)); return vals; }
From source file:de.ugoe.cs.cpdp.dataprocessing.NormalizationUtil.java
License:Apache License
/** * <p>// ww w .ja v a 2 s . co m * Z-Score normalization (N2 in Transfer Defect Learning by Nam et al.). * </p> * * @param data * data that is normalized */ public static void zScore(Instances data) { final double[] mean = new double[data.numAttributes()]; final double[] std = new double[data.numAttributes()]; // get means and stddevs of data for (int j = 0; j < data.numAttributes(); j++) { if (data.classIndex() != j) { mean[j] = data.meanOrMode(j); std[j] = Math.sqrt(data.variance(j)); } } applyZScore(data, mean, std); }