Java tutorial
/* * Copyright 2012 Nabeel Mukhtar * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package net.sf.jtmt.clustering; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * The Class GeneticClusterer. */ public class GeneticClusterer { /** The log. */ private final Log log = LogFactory.getLog(getClass()); /** The randomize data. */ private boolean randomizeData; /** The num crossovers per mutation. */ private int numCrossoversPerMutation; /** The max generations. */ private int maxGenerations; /** * Sets the randomize data. * * @param randomizeData the new randomize data */ public void setRandomizeData(boolean randomizeData) { this.randomizeData = randomizeData; } /** * Sets the number of crossovers per mutation. * * @param ncpm the new number of crossovers per mutation */ public void setNumberOfCrossoversPerMutation(int ncpm) { this.numCrossoversPerMutation = ncpm; } /** * Sets the max generations. * * @param maxGenerations the new max generations */ public void setMaxGenerations(int maxGenerations) { this.maxGenerations = maxGenerations; } /** * Cluster. * * @param collection the collection * @return the list */ public List<Cluster> cluster(DocumentCollection collection) { // get initial clusters int k = (int) Math.floor(Math.sqrt(collection.size())); List<Cluster> clusters = new ArrayList<Cluster>(); for (int i = 0; i < k; i++) { Cluster cluster = new Cluster("C" + i); clusters.add(cluster); } if (randomizeData) { collection.shuffle(); } // load it up using mod partitioning, this is P(0) int docId = 0; for (String documentName : collection.getDocumentNames()) { int clusterId = docId % k; clusters.get(clusterId).addDocument(documentName, collection.getDocument(documentName)); docId++; } log.debug("Initial clusters = " + clusters.toString()); // holds previous cluster in the compute loop List<Cluster> prevClusters = new ArrayList<Cluster>(); double prevFitness = 0.0D; int generations = 0; for (;;) { // compute fitness for P(t) double fitness = computeFitness(clusters); // if termination condition achieved, break and return clusters if (prevFitness > fitness) { clusters.clear(); clusters.addAll(prevClusters); break; } // even if termination condition not met, terminate after the // maximum number of generations if (generations > maxGenerations) { break; } // do specified number of crossover operations for this generation for (int i = 0; i < numCrossoversPerMutation; i++) { crossover(clusters, collection, i); generations++; } // followed by a single mutation per generation mutate(clusters, collection); generations++; log.debug("..Intermediate clusters (" + generations + "): " + clusters.toString()); // hold on to previous solution prevClusters.clear(); prevClusters.addAll(clusters); prevFitness = computeFitness(prevClusters); } return clusters; } /** * Compute fitness. * * @param clusters the clusters * @return the double */ private double computeFitness(List<Cluster> clusters) { double radius = 0.0D; for (Cluster cluster : clusters) { cluster.getCentroid(); radius += cluster.getRadius(); } return radius; } /** * Crossover. * * @param clusters the clusters * @param collection the collection * @param sequence the sequence */ public void crossover(List<Cluster> clusters, DocumentCollection collection, int sequence) { IdGenerator clusterIdGenerator = new IdGenerator(clusters.size()); int[] clusterIds = new int[2]; clusterIds[0] = clusterIdGenerator.getNextId(); clusterIds[1] = clusterIdGenerator.getNextId(); int minSize = Math.min(clusters.get(clusterIds[0]).size(), clusters.get(clusterIds[1]).size()); IdGenerator docIdGenerator = new IdGenerator(minSize); int[] cutPoints = new int[2]; cutPoints[0] = docIdGenerator.getNextId(); cutPoints[1] = docIdGenerator.getNextId(); Arrays.sort(cutPoints); Cluster cluster1 = clusters.get(clusterIds[0]); Cluster cluster2 = clusters.get(clusterIds[1]); for (int i = 0; i < cutPoints[0]; i++) { String docName1 = cluster1.getDocumentName(i); String docName2 = cluster2.getDocumentName(i); cluster1.removeDocument(docName1); cluster2.addDocument(docName1, collection.getDocument(docName1)); cluster2.removeDocument(docName2); cluster1.addDocument(docName2, collection.getDocument(docName2)); } // leave the documents between the cut points alone for (int i = cutPoints[1]; i < minSize; i++) { String docName1 = cluster1.getDocumentName(i); String docName2 = cluster2.getDocumentName(i); cluster1.removeDocument(docName1); cluster2.addDocument(docName1, collection.getDocument(docName1)); cluster2.removeDocument(docName2); cluster1.addDocument(docName2, collection.getDocument(docName2)); } // rebuild the Cluster list, replacing the changed clusters. List<Cluster> crossoverClusters = new ArrayList<Cluster>(); int clusterId = 0; for (Cluster cluster : clusters) { if (clusterId == clusterIds[0]) { crossoverClusters.add(cluster1); } else if (clusterId == clusterIds[1]) { crossoverClusters.add(cluster2); } else { crossoverClusters.add(cluster); } clusterId++; } clusters.clear(); clusters.addAll(crossoverClusters); } /** * Mutate. * * @param clusters the clusters * @param collection the collection */ private void mutate(List<Cluster> clusters, DocumentCollection collection) { // choose two random clusters IdGenerator clusterIdGenerator = new IdGenerator(clusters.size()); int[] clusterIds = new int[2]; clusterIds[0] = clusterIdGenerator.getNextId(); clusterIds[1] = clusterIdGenerator.getNextId(); Cluster cluster1 = clusters.get(clusterIds[0]); Cluster cluster2 = clusters.get(clusterIds[1]); // choose two random documents in the clusters int minSize = Math.min(clusters.get(clusterIds[0]).size(), clusters.get(clusterIds[1]).size()); IdGenerator docIdGenerator = new IdGenerator(minSize); String docName1 = cluster1.getDocumentName(docIdGenerator.getNextId()); String docName2 = cluster2.getDocumentName(docIdGenerator.getNextId()); // exchange the documents cluster1.removeDocument(docName1); cluster1.addDocument(docName2, collection.getDocument(docName2)); cluster2.removeDocument(docName2); cluster2.addDocument(docName1, collection.getDocument(docName1)); // rebuild the cluster list, replacing changed clusters List<Cluster> mutatedClusters = new ArrayList<Cluster>(); int clusterId = 0; for (Cluster cluster : clusters) { if (clusterId == clusterIds[0]) { mutatedClusters.add(cluster1); } else if (clusterId == clusterIds[1]) { mutatedClusters.add(cluster2); } else { mutatedClusters.add(cluster); } clusterId++; } clusters.clear(); clusters.addAll(mutatedClusters); } }