List of usage examples for weka.core Instances meanOrMode
publicdouble meanOrMode(Attribute att)
From source file:de.ugoe.cs.cpdp.dataprocessing.NormalizationUtil.java
License:Apache License
/** * <p>/*from w ww . jav a 2 s. co m*/ * Z-Score normalization using the mean and std of the training data (N3 in Transfer Defect * Learning by Nam et al.). * </p> * * @param testdata * test data of the target product * @param traindata * training data */ public static void zScoreTraining(Instances testdata, Instances traindata) { final double[] mean = new double[testdata.numAttributes()]; final double[] std = new double[testdata.numAttributes()]; // get means of training for (int j = 0; j < traindata.numAttributes(); j++) { if (traindata.classIndex() != j) { mean[j] = traindata.meanOrMode(j); std[j] = Math.sqrt(traindata.variance(j)); } } applyZScore(testdata, mean, std); applyZScore(traindata, mean, std); }
From source file:de.ugoe.cs.cpdp.dataprocessing.NormalizationUtil.java
License:Apache License
/** * <p>/*from w w w.j ava 2 s .c om*/ * Z-Score normalization using the mean and std of the test data (N4 in Transfer Defect Learning * by Nam et al.). * </p> * * @param testdata * test data of the target product * @param traindata * training data */ public static void zScoreTarget(Instances testdata, Instances traindata) { final double[] mean = new double[testdata.numAttributes()]; final double[] std = new double[testdata.numAttributes()]; // get means of testdata for (int j = 0; j < testdata.numAttributes(); j++) { if (testdata.classIndex() != j) { mean[j] = testdata.meanOrMode(j); std[j] = Math.sqrt(testdata.variance(j)); } } applyZScore(testdata, mean, std); applyZScore(traindata, mean, std); }
From source file:de.ugoe.cs.cpdp.dataprocessing.NormalizationUtil.java
License:Apache License
/** * <p>/* ww w .j av a2s.c o m*/ * Z-Score normalization using the mean and std of the test data (N4 in Transfer Defect Learning * by Nam et al.). * </p> * * @param testdata * test data of the target product * @param traindataSet * training data */ public static void zScoreTarget(Instances testdata, SetUniqueList<Instances> traindataSet) { final double[] mean = new double[testdata.numAttributes()]; final double[] std = new double[testdata.numAttributes()]; // get means of testdata for (int j = 0; j < testdata.numAttributes(); j++) { if (testdata.classIndex() != j) { mean[j] = testdata.meanOrMode(j); std[j] = Math.sqrt(testdata.variance(j)); } } applyZScore(testdata, mean, std); for (Instances traindata : traindataSet) { applyZScore(traindata, mean, std); } }
From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * /* w ww . j a v a 2s . co m*/ * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance || m_DistanceFunction instanceof CustomPairWiseDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance || m_DistanceFunction instanceof CustomPairWiseDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:dewaweebtreeclassifier.Sujeong.java
public void buildTree(Instances instances) throws java.lang.Exception { if (instances.numAttributes() < 1) { throw new Exception("Data instances need to have minimum of 1 attribute."); } else if (instances.numAttributes() == 1) { this.value = instances.meanOrMode(instances.classIndex()); } else {/*from w w w. ja v a 2 s .c om*/ Enumeration attrs = instances.enumerateAttributes(); double informationGain = 0.0; while (attrs.hasMoreElements()) { Attribute attr = (Attribute) attrs.nextElement(); double tmpGain = computeGain(instances, attr); if (tmpGain > informationGain) { bestAttr = attr; informationGain = tmpGain; } } if (bestAttr != null) { double mode = instances.meanOrMode(instances.classIndex()); Instances[] chunks = splitInstancesOnAttribute(instances, bestAttr); children = new Sujeong[chunks.length]; for (int i = 0; i < chunks.length; ++i) { Instances chunk = chunks[i]; Sujeong child = new Sujeong(); children[i] = child; if (chunk.numInstances() > 0) child.buildTree(chunk); else child.value = mode; } } else { this.value = instances.meanOrMode(instances.classIndex()); } } }
From source file:fantail.algorithms.BinaryART.java
License:Open Source License
private void makeTree(Instances data, java.util.Random r, int depth) throws Exception { if (m_K > data.numAttributes()) { m_K = data.numAttributes() - 1;//w ww.ja va 2 s. com } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } int[] randAtts = new int[data.numAttributes() - 1]; //TODO: handle class target att for (int i = 0; i < randAtts.length; i++) { randAtts[i] = i; } for (int i = 0; i < randAtts.length; i++) { int randomPosition = r.nextInt(randAtts.length); int temp = randAtts[i]; randAtts[i] = randAtts[randomPosition]; randAtts[randomPosition] = temp; } int bestAttIndex = -1; AttScorePair[] attScorePair = new AttScorePair[m_K]; //double currentR2 = estimateAvgDistanceSpearman(data); for (int i = 0; i < m_K; i++) { int attIndex = randAtts[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double r2 = estimateR2(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, r2); } Arrays.sort(attScorePair); bestAttIndex = attScorePair[0].index; double maxR2 = attScorePair[0].score; boolean stop1 = false; // for (int kk = 0; kk < attScorePair.length; kk++) { // System.out.println(attScorePair[kk].score); // } // if (true) { // throw new Exception("stop"); // } if (attScorePair[0].score <= attScorePair[m_K - 1].score) { stop1 = true; } if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) //|| maxR2 <= 0.01 // removed 10/01/2013 || maxR2 >= 0.95 || stop1 // 11/01/13 the paper version doesn't have this || data.variance(bestAttIndex) <= 0) { m_Attribute = null; m_Prototype = AbstractRanker.getAvgRanking(data); //m_Prototype = AbstractRanker.getCenterRanking(data, m_ApproxCenterMethod); return; } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); m_Successors = new BinaryART[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new BinaryART(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumObjects(m_NumObjects); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:fantail.algorithms.RankingWithBinaryPCT.java
License:Open Source License
private void makeTree(Instances data, Random r, int depth) throws Exception { if (data.numInstances() <= m_MiniLeaf || (depth >= m_MaxDepth && m_MaxDepth != 0) || computeVariance(data) <= m_MinVariancea) { //|| maxVarianceaReduction <= 0 //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, m_Attribute = null;//from w w w .j a va2s .c o m m_Prototype = AbstractRanker.getAvgRanking(data); return; } // if (m_K > data.numAttributes()) { m_K = data.numAttributes(); } if (m_K < 1) { m_K = (int) weka.core.Utils.log2(data.numAttributes()) + 1; } // TODO: int[] attIndice = new int[data.numAttributes() - 1]; for (int i = 0; i < attIndice.length; i++) { attIndice[i] = i; } for (int i = 0; i < attIndice.length; i++) { //int randomPosition = getRandomPosition(r, attIndice); int randomPosition = r.nextInt(attIndice.length); int temp = attIndice[i]; attIndice[i] = attIndice[randomPosition]; attIndice[randomPosition] = temp; } AttScorePair[] attScorePair = new AttScorePair[m_K]; for (int i = 0; i < m_K; i++) { int attIndex = attIndice[i]; double splitPoint = Double.NaN; if (!m_UseMedian) { splitPoint = data.meanOrMode(attIndex); } else { splitPoint = getMedian(data, attIndex); } double varianceReduction = computeVarianceReduction(data, attIndex, splitPoint); attScorePair[i] = new AttScorePair(attIndex, varianceReduction); } Arrays.sort(attScorePair); int randAttIndex = 0; int bestAttIndex = attScorePair[randAttIndex].index; double maxVarianceaReduction = attScorePair[randAttIndex].score; // if (data.numInstances() <= 1 * m_MiniLeaf // || (depth >= m_MaxDepth && m_MaxDepth != 0) // || computeVariance(data) <= m_MinVariancea) { // //|| maxVarianceaReduction <= 0 // //|| data.variance(bestAttIndex) <= 0) { // || data.variance(bestAttIndex) <= 0 ) { copied from ART, // // m_Attribute = null; // m_Prototype = AbstractRanker.getAvgRanking(data); // return; // } m_Attribute = data.attribute(bestAttIndex); if (!m_UseMedian) { m_SplitPoint = data.meanOrMode(bestAttIndex); } else { m_SplitPoint = getMedian(data, bestAttIndex); } //m_SplitPoint = data.meanOrMode(m_Attribute); Instances[] splitData = splitData(data, bestAttIndex, m_SplitPoint); //System.out.println(splitData[0].numInstances()); //System.out.println(splitData[1].numInstances()); //System.out.println(); m_Successors = new RankingWithBinaryPCT[2]; for (int j = 0; j < 2; j++) { m_Successors[j] = new RankingWithBinaryPCT(); m_Successors[j].setMiniLeaf(m_MiniLeaf); m_Successors[j].setK(m_K); m_Successors[j].setUseMedian(m_UseMedian); m_Successors[j].setNumTargetLabels(m_NumTargetLabels); m_Successors[j].makeTree(splitData[j], r, depth + 1); } }
From source file:gr.iti.mklab.visual.quantization.SimpleKMeansWithOutput.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid coordinates based on it's members * (objects assigned to the cluster of the centroid) and the distance function being used. * //from w w w . j ava 2s. c o m * @param centroidIndex * index of the centroid which the coordinates will be computed * @param members * the objects that are assigned to the cluster of this centroid * @param updateClusterInfo * if the method is supposed to update the m_Cluster arrays * @param addToCentroidInstances * true if the method is to add the computed coordinates to the Instances holding the centroids * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo, boolean addToCentroidInstances) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (addToCentroidInstances) { m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } return vals; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used./*from w ww . ja v a 2 s.c o m*/ * @return the centroid */ public static MixedCentroid computeMixedCentroid(final boolean preserveOrder, final NormalizableDistance distanceFunction, final Instances numericInstances, final Instances originalInstances, final int clusterIndex) { final int numInstances = numericInstances.numInstances(); final int numAttributes = numericInstances.numAttributes(); final Map<TupleSI, Integer> addedAttr = new HashMap<TupleSI, Integer>(); if (numInstances == 1) { Instance uniqueNumInstance = numericInstances.firstInstance(); Instance uniqueMixInstance = originalInstances.firstInstance(); double[] centroid = uniqueNumInstance.toDoubleArray(); for (int i = 0; i < uniqueMixInstance.numAttributes(); i++) { if (!uniqueMixInstance.attribute(i).isNumeric()) { final String catVal = uniqueMixInstance.attribute(i).value((int) uniqueMixInstance.value(i)); addedAttr.put(new TupleSI(catVal, i), 1); } } return new MixedCentroid(clusterIndex, centroid, addedAttr); } final double[] vals = new double[numAttributes]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; final boolean isManhattanDist = (distanceFunction instanceof ManhattanDistance); final boolean isEuclideanDist = (distanceFunction instanceof EuclideanDistance); if (isManhattanDist) { middle = (numInstances - 1) / 2; dataIsEven = ((numInstances % 2) == 0); if (preserveOrder) { sortedMembers = numericInstances; } else { sortedMembers = new Instances(numericInstances); } } for (int j = 0; j < numAttributes; j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (isEuclideanDist) { vals[j] = numericInstances.meanOrMode(j); for (int i = 0; i < numInstances; i++) { if (!originalInstances.attribute(j).isNumeric()) { final Instance instance = originalInstances.instance(i); final String catVal = instance.attribute(j).value((int) instance.value(j)); //Initialize map final TupleSI key = new TupleSI(catVal, j); if (!addedAttr.containsKey(key)) addedAttr.put(key, 0); addedAttr.put(key, addedAttr.get(key) + 1); } } } else if (isManhattanDist) { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } else { throw new IllegalStateException("Not handled distance ..."); } } return new MixedCentroid(clusterIndex, vals, addedAttr); }
From source file:meansagnes.MyKMeans.java
protected Instance moveCentroid(Instances instances) { double[] vals = new double[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) { vals[k] = instances.meanOrMode(k); }/* ww w .ja va 2 s.c o m*/ return new Instance(1.0, vals); }