Java tutorial
/* * Copyright (C) 2011 by Allamanis Miltiadis * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /** * */ package gr.auth.ee.lcs; //comment import gr.auth.ee.lcs.classifiers.Classifier; import gr.auth.ee.lcs.classifiers.ClassifierSet; import gr.auth.ee.lcs.classifiers.Macroclassifier; import gr.auth.ee.lcs.classifiers.populationcontrol.FixedSizeSetWorstFitnessDeletion; import gr.auth.ee.lcs.classifiers.populationcontrol.SortPopulationControl; import gr.auth.ee.lcs.classifiers.statistics.MeanAttributeSpecificityStatistic; import gr.auth.ee.lcs.classifiers.statistics.MeanCoverageStatistic; import gr.auth.ee.lcs.classifiers.statistics.MeanFitnessStatistic; import gr.auth.ee.lcs.classifiers.statistics.MeanLabelSpecificity; import gr.auth.ee.lcs.classifiers.statistics.WeightedMeanAttributeSpecificityStatistic; import gr.auth.ee.lcs.classifiers.statistics.WeightedMeanCoverageStatistic; import gr.auth.ee.lcs.classifiers.statistics.WeightedMeanLabelSpecificity; import gr.auth.ee.lcs.data.AbstractUpdateStrategy; import gr.auth.ee.lcs.data.ClassifierTransformBridge; import gr.auth.ee.lcs.data.ILCSMetric; import gr.auth.ee.lcs.data.representations.complex.ComplexRepresentation; import gr.auth.ee.lcs.data.representations.complex.GenericMultiLabelRepresentation; import gr.auth.ee.lcs.data.representations.complex.GenericMultiLabelRepresentation.BestFitnessClassificationStrategy; import gr.auth.ee.lcs.data.representations.complex.GenericMultiLabelRepresentation.VotingClassificationStrategy; import gr.auth.ee.lcs.data.updateAlgorithms.MlASLCS3UpdateAlgorithm; import gr.auth.ee.lcs.data.updateAlgorithms.MlASLCS4UpdateAlgorithm; import gr.auth.ee.lcs.evaluators.AccuracyRecallEvaluator; import gr.auth.ee.lcs.evaluators.ExactMatchEvalutor; import gr.auth.ee.lcs.evaluators.FileLogger; import gr.auth.ee.lcs.evaluators.HammingLossEvaluator; import gr.auth.ee.lcs.evaluators.bamevaluators.IdentityBAMEvaluator; import gr.auth.ee.lcs.evaluators.bamevaluators.PositionBAMEvaluator; import gr.auth.ee.lcs.geneticalgorithm.selectors.RouletteWheelSelector; import gr.auth.ee.lcs.utilities.ExtendedBitSet; import gr.auth.ee.lcs.utilities.SettingsLoader; import gr.auth.ee.lcs.utilities.InstancesUtility; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.util.Arrays; import java.util.Vector; import java.lang.String; import weka.clusterers.SimpleKMeans; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Remove; import edu.rit.pj.ParallelTeam; /** * An abstract LCS class to be implemented by all LCSs. * * @author Miltiadis Allamanis * */ public abstract class AbstractLearningClassifierSystem { public String hookedMetricsFileDirectory; public final int UPDATE_MODE = (int) SettingsLoader.getNumericSetting("UPDATE_MODE", 0); /** * Selection of the update mode, which adds offsprings to the population * as soon as they are created. * */ public static final int UPDATE_MODE_IMMEDIATE = 0; /** * Selection of the update mode, which adds the total number * of produced offsprings en masse to the population. * */ public static final int UPDATE_MODE_HOLD = 1; /** * The mean correct set numerosity (in miscroclassifiers) of the population. * */ public double meanCorrectSetNumerosity = 0; /** * Its value represents the current learning iteration (iterations * datasetInstanceIndex). * */ private int cummulativeCurrentInstanceIndex = 0; /** * The train set. * @uml.property name="instances" multiplicity="(0 -1)" dimension="2" */ public double[][] instances; public double[][] testInstances; public Instances trainSet; public Instances testSet; /** * The LCS instance transform bridge. * @uml.property name="transformBridge" * @uml.associationEnd */ public double labelCardinality = 1; public int numberOfCoversOccured = 0; private ClassifierTransformBridge transformBridge; /** * The Abstract Update Algorithm Strategy of the LCS. * @uml.property name="updateStrategy" * @uml.associationEnd */ protected AbstractUpdateStrategy updateStrategy; /** * The rule population. * @uml.property name="rulePopulation" * @uml.associationEnd */ protected ClassifierSet rulePopulation; /** * A vector of all evaluator hooks. * @uml.property name="hooks" * @uml.associationEnd multiplicity="(0 -1)" elementType="gr.auth.ee.lcs.data.ILCSMetric" */ private final Vector<ILCSMetric> hooks; /** * Frequency of the hook callback execution. * @uml.property name="hookCallbackRate" */ private int hookCallbackRate; public int repetition; private final boolean thoroughlyCheckWIthPopulation = SettingsLoader .getStringSetting("thoroughlyCheckWIthPopulation", "true").equals("true"); /** * Matrix used to store the time measurements for different phases of the train procedure. */ public double[][] timeMeasurements; public double[][] systemAccuracy; public Vector<Float> qualityIndexOfDeleted = new Vector<Float>(); public Vector<Float> qualityIndexOfClassifiersCoveredDeleted = new Vector<Float>(); public Vector<Float> qualityIndexOfClassifiersGaedDeleted = new Vector<Float>(); public Vector<Float> accuracyOfDeleted = new Vector<Float>(); public Vector<Float> accuracyOfCoveredDeletion = new Vector<Float>(); public Vector<Float> accuracyOfGaedDeletion = new Vector<Float>(); public Vector<Integer> iteration = new Vector<Integer>(); public Vector<Integer> originOfDeleted = new Vector<Integer>(); public Vector<Float> systemAccuracyInTraining = new Vector<Float>(); public Vector<Float> systemAccuracyInTestingWithPcut = new Vector<Float>(); public Vector<Float> systemCoverage = new Vector<Float>(); public int numberOfClassifiersDeletedInMatchSets; /** * Indicates whether the parallel implementation is employed or not. */ final private boolean smp; /** * The Parallel Team containing the threads that perform the parallel implementation * of the generateMatchSet function. */ final private ParallelTeam pt = new ParallelTeam(); public int totalRepetition = 0; private Instances inst; public final int iterations; /** * Constructor. */ protected AbstractLearningClassifierSystem() { try { SettingsLoader.loadSettings(); } catch (IOException e) { e.printStackTrace(); } hooks = new Vector<ILCSMetric>(); hookCallbackRate = (int) SettingsLoader.getNumericSetting("callbackRate", 100); smp = SettingsLoader.getStringSetting("SMP_run", "false").contains("true") ? true : false; iterations = (int) SettingsLoader.getNumericSetting("trainIterations", 1000); if (smp) System.out.println("smp: true"); else System.out.println("smp: false"); } public void assimilateDuplicateClassifiers(ClassifierSet rulePopulation, final boolean evolve) { //if (evolve) { // if subsumption is only made by the parents and not the whole population, merge classifiers to avoid duplicates for (int j = 0; j < rulePopulation.getNumberOfMacroclassifiers(); j++) { //for (int j = rulePopulation.getNumberOfMacroclassifiers() -1; j >= 0 ; j--) { Vector<Integer> indicesOfDuplicates = new Vector<Integer>(); Vector<Float> fitnessOfDuplicates = new Vector<Float>(); Vector<Integer> experienceOfDuplicates = new Vector<Integer>(); final Classifier aClassifier = rulePopulation.getMacroclassifiersVector().get(j).myClassifier; for (int i = rulePopulation.getNumberOfMacroclassifiers() - 1; i >= 0; i--) { //for (int i = 0; i < rulePopulation.getNumberOfMacroclassifiers(); i++) { Classifier theClassifier = rulePopulation.getMacroclassifiersVector().get(i).myClassifier; if (theClassifier.equals(aClassifier)) { indicesOfDuplicates.add(i); float theClassifierFitness = (float) (rulePopulation.getMacroclassifiersVector() .get(i).numerosity * getUpdateStrategy().getComparisonValue(theClassifier, AbstractUpdateStrategy.COMPARISON_MODE_EXPLORATION)); fitnessOfDuplicates.add(theClassifierFitness); experienceOfDuplicates.add(theClassifier.experience); } } // exo brei ta indexes ton diplon kanonon sto vector myMacroclassifiers /*an bro enan mono, simainei oti aClassifier == theClassifier, opote den exei noima na ginei afomoiosi * an bro duo i kai perissoterous simainei oti prepei na epilekso poios apo olous 9a afomoiosei olous tous allous. * opoios exei megalutero fitness afomoionei tous upoloipous. an duo exoun to idio fitness, 9a afomoiosei autos me to megalutero experience * */ if (indicesOfDuplicates.size() >= 2) { int indexOfSurvivor = 0; float maxFitness = 0; for (int k = 0; k < indicesOfDuplicates.size(); k++) { if (fitnessOfDuplicates.elementAt(k) > maxFitness) { maxFitness = fitnessOfDuplicates.elementAt(k); indexOfSurvivor = k; } else if (fitnessOfDuplicates.elementAt(k) == maxFitness) { if (experienceOfDuplicates.elementAt(k) >= experienceOfDuplicates .elementAt(indexOfSurvivor)) { indexOfSurvivor = k; } } } // exo brei poios 9a einai o epizon classifier. initiate assimilation //for (int k = indicesOfDuplicates.size() -1; k >= 0 ; k--) { for (int k = 0; k < indicesOfDuplicates.size(); k++) { if (k != indexOfSurvivor) { rulePopulation.getMacroclassifiersVector() .get(indicesOfDuplicates.elementAt(indexOfSurvivor)).numerosity += rulePopulation .getMacroclassifiersVector() .get(indicesOfDuplicates.elementAt(k)).numerosity; rulePopulation.getMacroclassifiersVector() .get(indicesOfDuplicates.elementAt(indexOfSurvivor)).numberOfSubsumptions++; rulePopulation.totalNumerosity += rulePopulation.getMacroclassifiersVector() .get(indicesOfDuplicates.elementAt(k)).numerosity; rulePopulation.deleteMacroclassifier(indicesOfDuplicates.elementAt(k)); } } } //if (indicesOfDuplicates.size() != 0) { indicesOfDuplicates.clear(); fitnessOfDuplicates.clear(); experienceOfDuplicates.clear(); //} } } /** * Classify a single instance. * * @param instance * the instance to classify * @return the labels the instance is classified in */ public abstract int[] classifyInstance(double[] instance); /** * Creates a new instance of the actual implementation of the LCS. * * @return a pointer to the new instance. */ public abstract AbstractLearningClassifierSystem createNew(); /** * Execute hooks. * * @param aSet * the set on which to run the callbacks */ private void executeCallbacks(final ClassifierSet aSet, final int repetition) { for (int i = 0; i < hooks.size(); i++) { hooks.elementAt(i).getMetric(this); } int numberOfClassifiersCovered = 0; int numberClassifiersGaed = 0; int numberOfSubsumptions = 0; double meanNs = 0; for (int i = 0; i < rulePopulation.getNumberOfMacroclassifiers(); i++) { if (this.getRulePopulation().getMacroclassifier(i).myClassifier .getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_COVER) { numberOfClassifiersCovered++; } else if (this.getRulePopulation().getMacroclassifier(i).myClassifier .getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_GA) { numberClassifiersGaed++; } numberOfSubsumptions += this.getRulePopulation().getMacroclassifier(i).numberOfSubsumptions; meanNs += this.getRulePopulation().getMacroclassifier(i).myClassifier.getNs(); } meanNs /= this.getRulePopulation().getNumberOfMacroclassifiers(); try { // record the rule population and its metrics in population.txt final FileWriter fstream = new FileWriter( this.hookedMetricsFileDirectory + "/population_" + repetition + ".txt", true); final BufferedWriter buffer = new BufferedWriter(fstream); buffer.write(String.valueOf(this.repetition) + "th repetition:" + System.getProperty("line.separator") + System.getProperty("line.separator") + "Population size: " + rulePopulation.getNumberOfMacroclassifiers() + System.getProperty("line.separator") + "Timestamp: " + rulePopulation.totalGAInvocations + System.getProperty("line.separator") + "Classifiers in population covered :" + numberOfClassifiersCovered + System.getProperty("line.separator") + "Classifiers in population ga-ed :" + numberClassifiersGaed + System.getProperty("line.separator") + "Covers occured: " + numberOfCoversOccured + System.getProperty("line.separator") + "Subsumptions: " + numberOfSubsumptions + System.getProperty("line.separator") + "Mean ns: " + meanNs + System.getProperty("line.separator") + rulePopulation + System.getProperty("line.separator")); buffer.flush(); buffer.close(); } catch (Exception e) { e.printStackTrace(); } this.numberOfCoversOccured = 0; } /** * Return the LCS's classifier transform bridge. * * @return the lcs's classifier transform bridge */ public final ClassifierTransformBridge getClassifierTransformBridge() { return transformBridge; } public int getCummulativeCurrentInstanceIndex() { return cummulativeCurrentInstanceIndex; } /** * Returns a string array of the names of the evaluation metrics. * * @return a string array containing the evaluation names. */ public abstract String[] getEvaluationNames(); /** * Returns the evaluation metrics for the given test set. * * @param testSet * the test set on which to calculate the metrics * @return a double array containing the metrics */ public abstract double[] getEvaluations(Instances testSet); /** * Create a new classifier for the specific LCS. * * @return the new classifier. */ public final Classifier getNewClassifier() { return Classifier.createNewClassifier(this); } /** * Return a new classifier object for the specific LCS given a chromosome. * * @param chromosome * the chromosome to be replicated * @return a new classifier containing information about the LCS */ public final Classifier getNewClassifier(final ExtendedBitSet chromosome) { return Classifier.createNewClassifier(this, chromosome); } /** * Getter for the rule population. * @return a ClassifierSet containing the LCSs population * @uml.property name="rulePopulation" */ public final ClassifierSet getRulePopulation() { return rulePopulation; } /** * Returns the LCS's update strategy. * @return the update strategy * @uml.property name="updateStrategy" */ public final AbstractUpdateStrategy getUpdateStrategy() { return updateStrategy; } /** * collect the system's multilabel accuracy per iteration, plus every classifier's accuracy per iteration(TODO) * */ public void harvestAccuracies(int iteration) { final AccuracyRecallEvaluator trainingAccuracy = new AccuracyRecallEvaluator(trainSet, false, this, AccuracyRecallEvaluator.TYPE_ACCURACY); final VotingClassificationStrategy str = ((GenericMultiLabelRepresentation) transformBridge).new VotingClassificationStrategy( (float) this.labelCardinality); ((GenericMultiLabelRepresentation) transformBridge).setClassificationStrategy(str); str.proportionalCutCalibration(this.instances, rulePopulation); final AccuracyRecallEvaluator testingAccuracyWithPcut = new AccuracyRecallEvaluator(testSet, false, this, AccuracyRecallEvaluator.TYPE_ACCURACY); final MeanCoverageStatistic coverage = new MeanCoverageStatistic(); double trainAcc = trainingAccuracy.getMetric(this); double testAccPcut = testingAccuracyWithPcut.getMetric(this); double cov = coverage.getMetric(this); systemAccuracyInTraining.add((float) trainAcc); systemAccuracyInTestingWithPcut.add((float) testAccPcut); systemCoverage.add((float) cov); } /** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception * * @param file * the .arff file * */ public ClassifierSet initializePopulation(final String file) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = InstancesUtility.openInstance(file); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, file); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); //System.out.println(partitions[i]); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomClusteringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } System.out.println(initialClassifiers); return initialClassifiers; } /** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception * * @param trainSet * the type of Instances train set * */ public ClassifierSet initializePopulation(final Instances trainset) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = trainset; SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, trainset); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } //System.out.println(centroids); double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomCoveringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } //System.out.println(initialClassifiers); return initialClassifiers; } /** * Prints the population classifiers of the LCS. */ public final void printSet() { rulePopulation.print(); } /** * Register an evaluator to be called during training. * * @param evaluator * the evaluator to register * @return true if the evaluator has been registered successfully */ public final boolean registerHook(final ILCSMetric evaluator) { return hooks.add(evaluator); } /** * Registration of hooks to perform periodical inspection using metrics. * * @param numberOfLabels * the dataset's number of labels. * *@param instances * the set of instances on which we will evaluate on. (train or test) * * @author alexandros filotheou * * * */ public void registerMultilabelHooks(double[][] instances, int numberOfLabels) { new FileLogger(this); this.registerHook(new FileLogger("accuracy", new AccuracyRecallEvaluator(instances, false, this, AccuracyRecallEvaluator.TYPE_ACCURACY))); this.registerHook(new FileLogger("recall", new AccuracyRecallEvaluator(instances, false, this, AccuracyRecallEvaluator.TYPE_RECALL))); this.registerHook(new FileLogger("exactMatch", new ExactMatchEvalutor(instances, false, this))); this.registerHook( new FileLogger("hamming", new HammingLossEvaluator(instances, false, numberOfLabels, this))); this.registerHook(new FileLogger("meanFitness", new MeanFitnessStatistic(AbstractUpdateStrategy.COMPARISON_MODE_EXPLOITATION))); this.registerHook(new FileLogger("meanCoverage", new MeanCoverageStatistic())); this.registerHook(new FileLogger("weightedMeanCoverage", new WeightedMeanCoverageStatistic(AbstractUpdateStrategy.COMPARISON_MODE_EXPLOITATION))); this.registerHook(new FileLogger("meanAttributeSpecificity", new MeanAttributeSpecificityStatistic())); this.registerHook( new FileLogger("weightedMeanAttributeSpecificity", new WeightedMeanAttributeSpecificityStatistic( AbstractUpdateStrategy.COMPARISON_MODE_EXPLOITATION))); this.registerHook(new FileLogger("meanLabelSpecificity", new MeanLabelSpecificity(numberOfLabels))); this.registerHook(new FileLogger("weightedMeanLabelSpecificity", new WeightedMeanLabelSpecificity( numberOfLabels, AbstractUpdateStrategy.COMPARISON_MODE_EXPLOITATION))); if (SettingsLoader.getStringSetting("filename", "").indexOf("position") != -1) { this.registerHook(new FileLogger("BAM", new PositionBAMEvaluator((int) SettingsLoader.getNumericSetting("numberOfLabels", 1), PositionBAMEvaluator.GENERIC_REPRESENTATION, this))); } if (SettingsLoader.getStringSetting("filename", "").indexOf("identity") != -1) { this.registerHook(new FileLogger("BAM", new IdentityBAMEvaluator((int) SettingsLoader.getNumericSetting("numberOfLabels", 1), IdentityBAMEvaluator.GENERIC_REPRESENTATION, this))); } } /** * Save the rules to the given filename. * * @param filename */ public final void saveRules(String filename) { ClassifierSet.saveClassifierSet(rulePopulation, filename); } /** * Constructor. * * @param bridge * the classifier transform bridge * @param update * the update strategy */ public final void setElements(final ClassifierTransformBridge bridge, final AbstractUpdateStrategy update) { transformBridge = bridge; updateStrategy = update; } /** * @param rate */ public void setHookCallbackRate(int rate) { hookCallbackRate = rate; } public void setHookedMetricsFileDirectory(String file) { hookedMetricsFileDirectory = file; } /** * Sets the LCS's population. * @param population the new LCS's population */ public final void setRulePopulation(ClassifierSet population) { rulePopulation = population; } /** * Run the LCS and train it. */ public abstract void train(); /** * Train population with all train instances and perform evolution. * * @param iterations * the number of full iterations (one iteration the LCS is * trained with all instances) to train the LCS * @param population * the population of the classifiers to train. */ protected final void trainSet(final int iterations, final ClassifierSet population) { trainSet(iterations, population, true); // evolve = true } /** * Train a classifier set with all train instances. * * @param iterations * the number of full iterations (one iteration the LCS is * trained with all instances) to train the LCS * @param population * the population of the classifiers to train. * @param evolve * set true to evolve population, false to only update it * * * ekteleitai gia iterations fores me evolve = true * kai (int) 0.1 * iterations fores me evolve = false */ public final void trainSet(final int iterations, final ClassifierSet population, final boolean evolve) { final int numInstances = instances.length; repetition = 0; int trainsBeforeHook = 0; while (repetition < iterations) { System.out.print("["); while ((trainsBeforeHook < hookCallbackRate) && (repetition < iterations)) { System.out.print('/'); for (int i = 0; i < numInstances; i++) { cummulativeCurrentInstanceIndex = totalRepetition * instances.length + i; trainWithInstance(population, i, evolve); } repetition++; totalRepetition++; trainsBeforeHook++; // check for duplicates on every repetition if (!thoroughlyCheckWIthPopulation) { assimilateDuplicateClassifiers(rulePopulation, evolve); } } if (hookCallbackRate < iterations) { System.out.print("] "); System.out.print("(" + repetition + "/" + iterations + ")"); System.out.println(); } executeCallbacks(population, repetition); trainsBeforeHook = 0; } } /** * Train with instance main template. Trains the classifier set with a * single instance. * * @param population * the classifier's population. olos o plh9usmos dld, [P] * @param dataInstanceIndex * the index of the training data instance * @param evolve * whether to evolve the set or just train by updating it */ public final void trainWithInstance(final ClassifierSet population, final int dataInstanceIndex, final boolean evolve) { long time1, time2; int index = totalRepetition * instances.length + dataInstanceIndex; if (smp) { final ClassifierSet matchSetSmp = population.generateMatchSetNewSmp(dataInstanceIndex, pt); if (UPDATE_MODE == UPDATE_MODE_IMMEDIATE) getUpdateStrategy().updateSetSmp(population, matchSetSmp, dataInstanceIndex, evolve); else if (UPDATE_MODE == UPDATE_MODE_HOLD) getUpdateStrategy().updateSetNewSmp(population, matchSetSmp, dataInstanceIndex, evolve); recordInTimeMeasurements(population, index); } else { final ClassifierSet matchSet = population.generateMatchSetNew(dataInstanceIndex); if (UPDATE_MODE == UPDATE_MODE_IMMEDIATE) getUpdateStrategy().updateSet(population, matchSet, dataInstanceIndex, evolve); else if (UPDATE_MODE == UPDATE_MODE_HOLD) getUpdateStrategy().updateSetNew(population, matchSet, dataInstanceIndex, evolve); recordInTimeMeasurements(population, index); } } private void recordInTimeMeasurements(ClassifierSet population, int index) { MeanCoverageStatistic meanCov = new MeanCoverageStatistic(); double meanPopulationCoverage = meanCov.getMetric(this); int numberOfMacroclassifiersCovered = 0; int numberOfClassifiersCovered = 0; int numberOfMacroclassifiersGaed = 0; int numberOfClassifiersGaed = 0; int numberOfMacroclassifiersInited = 0; int numberOfClassifiersInited = 0; int numberOfSubsumptions = 0; double meanNs = 0; double meanAcc = 0; double meanCoveredAcc = 0; double meanGaedAcc = 0; double meanExplorationFitness = 0; double meanCoveredExplorationFitness = 0; double meanGaedExplorationFitness = 0; double meanPureFitness = 0; double meanCoveredPureFitness = 0; double meanGaedPureFitness = 0; for (int i = 0; i < population.getNumberOfMacroclassifiers(); i++) { Macroclassifier macro = population.getMacroclassifiersVector().get(i); numberOfSubsumptions += macro.numberOfSubsumptions; if (macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_COVER) { numberOfMacroclassifiersCovered++; numberOfClassifiersCovered += macro.numerosity; } else if (macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_GA) { numberOfMacroclassifiersGaed++; numberOfClassifiersGaed += macro.numerosity; } else if (macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_INIT) { numberOfMacroclassifiersInited++; numberOfClassifiersInited += macro.numerosity; } meanAcc += macro.numerosity * macro.myClassifier.getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_ACCURACY); meanExplorationFitness += macro.numerosity * macro.myClassifier.getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_EXPLORATION); meanPureFitness += macro.numerosity * macro.myClassifier.getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_FITNESS); meanNs += population.getClassifier(i).getNs(); if (macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_COVER || macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_INIT) { meanCoveredAcc += macro.numerosity * macro.myClassifier .getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_ACCURACY); meanCoveredExplorationFitness += macro.numerosity * macro.myClassifier.getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_EXPLORATION); meanCoveredPureFitness += macro.numerosity * macro.myClassifier .getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_FITNESS); } else if (macro.myClassifier.getClassifierOrigin() == Classifier.CLASSIFIER_ORIGIN_GA) { meanGaedAcc += macro.numerosity * macro.myClassifier .getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_ACCURACY); meanGaedExplorationFitness += macro.numerosity * macro.myClassifier.getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_EXPLORATION); meanGaedPureFitness += macro.numerosity * macro.myClassifier .getComparisonValue(AbstractUpdateStrategy.COMPARISON_MODE_PURE_FITNESS); } } meanAcc /= population.getTotalNumerosity(); meanNs /= population.getNumberOfMacroclassifiers(); meanCoveredAcc /= (numberOfClassifiersCovered + numberOfClassifiersInited); meanGaedAcc /= numberOfClassifiersGaed; meanExplorationFitness /= population.getTotalNumerosity(); meanCoveredExplorationFitness /= (numberOfClassifiersCovered + numberOfClassifiersInited); meanGaedExplorationFitness /= numberOfClassifiersGaed; meanPureFitness /= population.getTotalNumerosity(); meanCoveredPureFitness /= (numberOfClassifiersCovered + numberOfClassifiersInited); meanGaedPureFitness /= numberOfClassifiersGaed; timeMeasurements[index][2] = (int) population.firstDeletionFormula; timeMeasurements[index][3] = (int) population.secondDeletionFormula; } /** * Unregister an evaluator. * * @param evaluator * the evaluator to register * @return true if the evaluator has been unregisterd successfully */ public final boolean unregisterEvaluator(final ILCSMetric evaluator) { return hooks.remove(evaluator); } /** * Update population with all train instances but do not perform evolution. * * @param iterations * the number of full iterations (one iteration the LCS is * trained with all instances) to update the LCS * @param population * the population of the classifiers to update. */ public final void updatePopulation(final int iterations, final ClassifierSet population) { trainSet(iterations, population, false); // evolve = false } }