net.sf.jtmt.clustering.GeneticClusterer.java Source code

Java tutorial

Introduction

Here is the source code for net.sf.jtmt.clustering.GeneticClusterer.java

Source

/*
 * Copyright 2012 Nabeel Mukhtar 
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 *  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and
 * limitations under the License. 
 * 
 */
package net.sf.jtmt.clustering;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * The Class GeneticClusterer.
 */
public class GeneticClusterer {

    /** The log. */
    private final Log log = LogFactory.getLog(getClass());

    /** The randomize data. */
    private boolean randomizeData;

    /** The num crossovers per mutation. */
    private int numCrossoversPerMutation;

    /** The max generations. */
    private int maxGenerations;

    /**
     * Sets the randomize data.
     *
     * @param randomizeData the new randomize data
     */
    public void setRandomizeData(boolean randomizeData) {
        this.randomizeData = randomizeData;
    }

    /**
     * Sets the number of crossovers per mutation.
     *
     * @param ncpm the new number of crossovers per mutation
     */
    public void setNumberOfCrossoversPerMutation(int ncpm) {
        this.numCrossoversPerMutation = ncpm;
    }

    /**
     * Sets the max generations.
     *
     * @param maxGenerations the new max generations
     */
    public void setMaxGenerations(int maxGenerations) {
        this.maxGenerations = maxGenerations;
    }

    /**
     * Cluster.
     *
     * @param collection the collection
     * @return the list
     */
    public List<Cluster> cluster(DocumentCollection collection) {
        // get initial clusters
        int k = (int) Math.floor(Math.sqrt(collection.size()));
        List<Cluster> clusters = new ArrayList<Cluster>();
        for (int i = 0; i < k; i++) {
            Cluster cluster = new Cluster("C" + i);
            clusters.add(cluster);
        }
        if (randomizeData) {
            collection.shuffle();
        }
        // load it up using mod partitioning, this is P(0)
        int docId = 0;
        for (String documentName : collection.getDocumentNames()) {
            int clusterId = docId % k;
            clusters.get(clusterId).addDocument(documentName, collection.getDocument(documentName));
            docId++;
        }
        log.debug("Initial clusters = " + clusters.toString());
        // holds previous cluster in the compute loop
        List<Cluster> prevClusters = new ArrayList<Cluster>();
        double prevFitness = 0.0D;
        int generations = 0;
        for (;;) {
            // compute fitness for P(t)
            double fitness = computeFitness(clusters);
            // if termination condition achieved, break and return clusters
            if (prevFitness > fitness) {
                clusters.clear();
                clusters.addAll(prevClusters);
                break;
            }
            // even if termination condition not met, terminate after the
            // maximum number of generations
            if (generations > maxGenerations) {
                break;
            }
            // do specified number of crossover operations for this generation
            for (int i = 0; i < numCrossoversPerMutation; i++) {
                crossover(clusters, collection, i);
                generations++;
            }
            // followed by a single mutation per generation
            mutate(clusters, collection);
            generations++;
            log.debug("..Intermediate clusters (" + generations + "): " + clusters.toString());
            // hold on to previous solution
            prevClusters.clear();
            prevClusters.addAll(clusters);
            prevFitness = computeFitness(prevClusters);
        }
        return clusters;
    }

    /**
     * Compute fitness.
     *
     * @param clusters the clusters
     * @return the double
     */
    private double computeFitness(List<Cluster> clusters) {
        double radius = 0.0D;
        for (Cluster cluster : clusters) {
            cluster.getCentroid();
            radius += cluster.getRadius();
        }
        return radius;
    }

    /**
     * Crossover.
     *
     * @param clusters the clusters
     * @param collection the collection
     * @param sequence the sequence
     */
    public void crossover(List<Cluster> clusters, DocumentCollection collection, int sequence) {
        IdGenerator clusterIdGenerator = new IdGenerator(clusters.size());
        int[] clusterIds = new int[2];
        clusterIds[0] = clusterIdGenerator.getNextId();
        clusterIds[1] = clusterIdGenerator.getNextId();
        int minSize = Math.min(clusters.get(clusterIds[0]).size(), clusters.get(clusterIds[1]).size());
        IdGenerator docIdGenerator = new IdGenerator(minSize);
        int[] cutPoints = new int[2];
        cutPoints[0] = docIdGenerator.getNextId();
        cutPoints[1] = docIdGenerator.getNextId();
        Arrays.sort(cutPoints);
        Cluster cluster1 = clusters.get(clusterIds[0]);
        Cluster cluster2 = clusters.get(clusterIds[1]);
        for (int i = 0; i < cutPoints[0]; i++) {
            String docName1 = cluster1.getDocumentName(i);
            String docName2 = cluster2.getDocumentName(i);
            cluster1.removeDocument(docName1);
            cluster2.addDocument(docName1, collection.getDocument(docName1));
            cluster2.removeDocument(docName2);
            cluster1.addDocument(docName2, collection.getDocument(docName2));
        }
        // leave the documents between the cut points alone
        for (int i = cutPoints[1]; i < minSize; i++) {
            String docName1 = cluster1.getDocumentName(i);
            String docName2 = cluster2.getDocumentName(i);
            cluster1.removeDocument(docName1);
            cluster2.addDocument(docName1, collection.getDocument(docName1));
            cluster2.removeDocument(docName2);
            cluster1.addDocument(docName2, collection.getDocument(docName2));
        }
        // rebuild the Cluster list, replacing the changed clusters.
        List<Cluster> crossoverClusters = new ArrayList<Cluster>();
        int clusterId = 0;
        for (Cluster cluster : clusters) {
            if (clusterId == clusterIds[0]) {
                crossoverClusters.add(cluster1);
            } else if (clusterId == clusterIds[1]) {
                crossoverClusters.add(cluster2);
            } else {
                crossoverClusters.add(cluster);
            }
            clusterId++;
        }
        clusters.clear();
        clusters.addAll(crossoverClusters);
    }

    /**
     * Mutate.
     *
     * @param clusters the clusters
     * @param collection the collection
     */
    private void mutate(List<Cluster> clusters, DocumentCollection collection) {
        // choose two random clusters
        IdGenerator clusterIdGenerator = new IdGenerator(clusters.size());
        int[] clusterIds = new int[2];
        clusterIds[0] = clusterIdGenerator.getNextId();
        clusterIds[1] = clusterIdGenerator.getNextId();
        Cluster cluster1 = clusters.get(clusterIds[0]);
        Cluster cluster2 = clusters.get(clusterIds[1]);
        // choose two random documents in the clusters
        int minSize = Math.min(clusters.get(clusterIds[0]).size(), clusters.get(clusterIds[1]).size());
        IdGenerator docIdGenerator = new IdGenerator(minSize);
        String docName1 = cluster1.getDocumentName(docIdGenerator.getNextId());
        String docName2 = cluster2.getDocumentName(docIdGenerator.getNextId());
        // exchange the documents
        cluster1.removeDocument(docName1);
        cluster1.addDocument(docName2, collection.getDocument(docName2));
        cluster2.removeDocument(docName2);
        cluster2.addDocument(docName1, collection.getDocument(docName1));
        // rebuild the cluster list, replacing changed clusters
        List<Cluster> mutatedClusters = new ArrayList<Cluster>();
        int clusterId = 0;
        for (Cluster cluster : clusters) {
            if (clusterId == clusterIds[0]) {
                mutatedClusters.add(cluster1);
            } else if (clusterId == clusterIds[1]) {
                mutatedClusters.add(cluster2);
            } else {
                mutatedClusters.add(cluster);
            }
            clusterId++;
        }
        clusters.clear();
        clusters.addAll(mutatedClusters);
    }
}