Example usage for weka.clusterers SimpleKMeans getClusterCentroids

List of usage examples for weka.clusterers SimpleKMeans getClusterCentroids

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getClusterCentroids.

Prototype

public Instances getClusterCentroids() 

Source Link

Document

Gets the the cluster centroids.

Usage

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector./*from  w w w. ja  v  a2  s  .  com*/
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void EM_Init(Instances inst) throws Exception {
    int i, j, k;/*from   w ww  . j  a  v  a  2 s . c o m*/

    // run k means 10 times and choose best solution
    SimpleKMeans bestK = null;
    double bestSqE = Double.MAX_VALUE;
    for (i = 0; i < 10; i++) {
        SimpleKMeans sk = new SimpleKMeans();
        sk.setSeed(m_rr.nextInt());
        sk.setNumClusters(m_num_clusters);
        sk.setDisplayStdDevs(true);
        sk.buildClusterer(inst);
        if (sk.getSquaredError() < bestSqE) {
            bestSqE = sk.getSquaredError();
            bestK = sk;
        }
    }

    // initialize with best k-means solution
    m_num_clusters = bestK.numberOfClusters();
    m_weights = new double[inst.numInstances()][m_num_clusters];
    m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs];
    m_modelNormal = new double[m_num_clusters][m_num_attribs][3];
    m_priors = new double[m_num_clusters];
    Instances centers = bestK.getClusterCentroids();
    Instances stdD = bestK.getClusterStandardDevs();
    double[][][] nominalCounts = bestK.getClusterNominalCounts();
    double[] clusterSizes = bestK.getClusterSizes();

    for (i = 0; i < m_num_clusters; i++) {
        Instance center = centers.instance(i);
        for (j = 0; j < m_num_attribs; j++) {
            if (inst.attribute(j).isNominal()) {
                m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true);
                for (k = 0; k < inst.attribute(j).numValues(); k++) {
                    m_model[i][j].addValue(k, nominalCounts[i][j][k]);
                }
            } else {
                double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev;
                double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j);
                m_modelNormal[i][j][0] = mean;
                double stdv = (stdD.instance(i).isMissing(j))
                        ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters))
                        : stdD.instance(i).value(j);
                if (stdv < minStdD) {
                    stdv = inst.attributeStats(j).numericStats.stdDev;
                    if (Double.isInfinite(stdv)) {
                        stdv = minStdD;
                    }
                    if (stdv < minStdD) {
                        stdv = minStdD;
                    }
                }
                if (stdv <= 0) {
                    stdv = m_minStdDev;
                }

                m_modelNormal[i][j][1] = stdv;
                m_modelNormal[i][j][2] = 1.0;
            }
        }
    }

    for (j = 0; j < m_num_clusters; j++) {
        // m_priors[j] += 1.0;
        m_priors[j] = clusterSizes[j];
    }
    Utils.normalize(m_priors);
}

From source file:de.unidue.langtech.grading.tc.ClusterExemplarTask.java

License:Open Source License

@Override
public void execute(TaskContext aContext) throws Exception {
    if (learningMode.equals(Constants.LM_MULTI_LABEL)) {
        throw new IllegalArgumentException("Cannot use multi-label setup in clustering.");
    }/*from  w  w  w  .j a v  a2 s  . co  m*/
    boolean multiLabel = false;

    File arffFileTrain = new File(
            aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY).getPath() + "/"
                    + TRAINING_DATA_FILENAME);

    Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel);

    Clusterer abstractClusterer = AbstractClusterer.forName(clusteringArguments.get(0),
            clusteringArguments.subList(1, clusteringArguments.size()).toArray(new String[0]));

    // we assume that only this method has been used - breaks modularity, but need results fast ... :/
    SimpleKMeans clusterer = (SimpleKMeans) abstractClusterer;

    trainData = WekaUtils.removeOutcomeId(trainData, multiLabel);
    Instances copyTrainData = new Instances(trainData);

    // generate data for clusterer (w/o class)
    Remove filter = new Remove();
    filter.setAttributeIndices("" + (trainData.classIndex() + 1));
    filter.setInputFormat(trainData);
    Instances clusterTrainData = Filter.useFilter(trainData, filter);

    clusterer.buildClusterer(clusterTrainData);
    Instances centroids = clusterer.getClusterCentroids();

    //        Add addFilter = new Add();
    //        addFilter.setAttributeIndex(new Integer(numTestLabels + i + 1).toString());
    //        addFilter.setNominalLabels("0,1");
    //        addFilter.setAttributeName(trainData.attribute(i).name() + COMPATIBLE_OUTCOME_CLASS);
    //        addFilter.setInputFormat(testData);

    trainData.clear();

    Enumeration<Instance> centroidInstances = centroids.enumerateInstances();
    while (centroidInstances.hasMoreElements()) {
        Instance centroidInstance = centroidInstances.nextElement();

        // centroidInstance is usually not a real instance, but a virtual centroid
        // we need to find the closest point in the training data
        double minDistance = Double.POSITIVE_INFINITY;
        int offset = 0;
        int minOffset = 0;
        Enumeration<Instance> trainInstances = clusterTrainData.enumerateInstances();
        while (trainInstances.hasMoreElements()) {
            Instance trainInstance = trainInstances.nextElement();

            double dist = distance(centroidInstance, trainInstance);
            if (dist < minDistance) {
                minDistance = dist;
                minOffset = offset;
            }
            offset++;
        }

        // add selected instance to instances
        trainData.add(copyTrainData.get(minOffset));
    }

    // write the new training data (that will be used by the test task instead of the original one)                
    DataSink.write(aContext.getStorageLocation(ADAPTED_TRAINING_DATA, AccessMode.READWRITE).getPath() + "/"
            + ARFF_FILENAME, trainData);
}

From source file:entities.ArffFile.java

/**
 * Dada una lista de parametros, se ejecuta el filtro de microagregacion.
 * Todos estos parametros son entrada del usuario.
 * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada.
 * @param numCluster//from  w  ww  .  j  ava2s .  c  o m
 * @param seed
 * @param maxIterations
 * @param replaceMissingValues
 * @param preserveInstancesOrder
 * @param attributes lista de los atributos que se desean generalizar con cluster
 */
public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations,
        boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes)
        throws Exception {
    //instancesFilter = new Instances(instances);
    SimpleKMeans kMeans;
    kMeans = new SimpleKMeans();
    Instances uniqueAttributes;
    uniqueAttributes = new Instances(instancesFilter);
    List<String> names = new ArrayList<>();
    int i = 0;
    for (Integer attribute : attributes) {
        String name = new String(instancesFilter.attribute(attribute).name());
        if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString())
            throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING");
        names.add(name);
    }
    while (uniqueAttributes.numAttributes() != attributes.size()) {
        if (!names.contains(uniqueAttributes.attribute(i).name()))
            uniqueAttributes.deleteAttributeAt(i);
        else
            i++;
    }
    try {
        kMeans.setNumClusters(numCluster);
        kMeans.setMaxIterations(maxIterations);
        kMeans.setSeed(seed);
        kMeans.setDisplayStdDevs(false);
        kMeans.setDistanceFunction(df);
        kMeans.setDontReplaceMissingValues(replaceMissingValues);
        kMeans.setPreserveInstancesOrder(preserveInstancesOrder);
        kMeans.buildClusterer(uniqueAttributes);
        //System.out.println(kMeans);
        for (int j = 0; j < uniqueAttributes.numInstances(); j++) {
            int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j));
            for (int k = 0; k < uniqueAttributes.numAttributes(); k++) {
                if (uniqueAttributes.attribute(k).isNumeric())
                    uniqueAttributes.instance(j).setValue(k,
                            Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k)));
                else
                    uniqueAttributes.instance(j).setValue(k,
                            kMeans.getClusterCentroids().instance(cluster).toString(k));
            }
        }
        replaceValues(uniqueAttributes, attributes);
    } catch (Exception ex) {
        Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    //saveToFile("4");
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception //from  ww  w .  j  av a  2  s .  co  m
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*w  w  w  .j  a va 2 s.  c om*/
 * 
 * @param trainSet
 *             the type of Instances train set
 * */

public ClassifierSet initializePopulation(final Instances trainset) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = trainset;

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, trainset);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset);

    /*
    * Instead of having multiple positions for the same label combination, use only one.
    * This is the one that will be used to "cover" the centroids.
    */

    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
    * Delete the labels from the partitions.
    */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
    }
    // partitions now contains only attributes

    /*
    * delete the attributes from partitionsWithCLasses
    */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();

            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            //System.out.println(centroids);
            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomCoveringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//from   w w w.  ja va 2s.c  o  m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*  w  ww . j av  a  2 s  .  co  m*/
 * @param instances
 * @param k
 * @param clusters_sizes
 * @param clusters_centroids
 * @return
 */
private double R2(SimpleKMeans kMeans) {
    //int k, int[] clusters_sizes, Instances clusters_centroids){
    final int k = kMeans.getNumClusters();
    final int[] clusters_sizes = kMeans.getClusterSizes();
    final Instances clusters_centroids = kMeans.getClusterCentroids();
    double inter, total;
    double[] weights = new double[k];
    double[] centroid = new double[instances.numAttributes()];
    final int N = instances.numInstances();
    final double instance_weight = 1.0;
    inter = total = 0;

    //Computing the centroid of the entire set
    for (int i = 0; i < N; i++) {
        final Instance instance = instances.get(i);
        double[] temp = instance.toDoubleArray();
        for (int j = 0; j < temp.length; j++)
            centroid[j] += temp[j];
    }
    for (int j = 0; j < centroid.length; j++) {
        centroid[j] = centroid[j] / N;
    }

    for (int i = 0; i < k; i++) {
        weights[i] = (0.0 + clusters_sizes[i]) / N;
    }

    final Instance centroid_G = new DenseInstance(instance_weight, centroid);
    for (int i = 0; i < N; i++) {
        total += Math.pow(distance.distance(instances.instance(i), centroid_G), 2);
    }
    total = total / N;

    for (int i = 0; i < k; i++) {
        inter += weights[i] * Math.pow(distance.distance(clusters_centroids.get(i), centroid_G), 2);
    }

    return (inter / total);
}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * //from  www  . j a  v a 2 s  .  co  m
 */
private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df,
        final int maxInstances) throws Exception {
    Instances centroids = null;
    List<Instances> clusters = null;

    final int savedClassIndex = instances.classIndex();
    instances.setClassIndex(-1);
    final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df);
    clusterer.buildClusterer(instances);
    clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList();
    instances.setClassIndex(savedClassIndex);
    final int numClusters = clusters.size();
    //Set class index for each cluster instances
    //System.out.println("Setting class index to each cluster : " + savedClassIndex);
    for (int i = 0; i < numClusters; i++) {
        clusters.get(i).setClassIndex(savedClassIndex);
    }
    //Save centroids
    centroids = clusterer.getClusterCentroids();

    return new KmeansResult(centroids, clusters);
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}/*from   w w w .  ja v  a 2  s. com*/
 * 
 * <p>
 * This method is specialized for <b>kmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models
        Instances instances = getInstances(behaviorModelsAbsolute);

        // KMeans --> Weka
        SimpleKMeans kmeans = new SimpleKMeans();

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // kmeans.setDistanceFunction(manhattanDistance);

        // distance function with option don*t normalize
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        kmeans.setDistanceFunction(euclideanDistance);
        kmeans.setPreserveInstancesOrder(true);

        int[] clustersize = null;
        int[] assignments = null;

        // get number of clusters to be generated.
        int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());

        // clustering
        for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) {
            // must be specified in a fix way
            kmeans.setNumClusters(clusterSize);

            // build cluster
            kmeans.buildClusterer(instances);

            clustersize = kmeans.getClusterSizes();
            assignments = kmeans.getAssignments();

            ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
            clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids());
            clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances,
                    assignments);
            clusteringMetrics.calculateBetas();

            clusteringMetrics.printErrorMetricsHeader();
            clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances());
            clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
            // clusteringMetrics.printClusterAssignmentsToSession(assignments,
            // clusterSize);

        }

        Instances resultingCentroids = kmeans.getClusterCentroids();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}