Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * LabeledItemSet.java * Copyright (C) 2004-2012 University of Waikato, Hamilton, New Zealand * */ package weka.associations; import java.io.Serializable; import java.util.ArrayList; import java.util.Enumeration; import java.util.Hashtable; import weka.core.Instance; import weka.core.Instances; import weka.core.RevisionHandler; import weka.core.RevisionUtils; import weka.core.WekaEnumeration; /** * Class for storing a set of items together with a class label. Item sets are * stored in a lexicographic order, which is determined by the header * information of the set of instances used for generating the set of items. All * methods in this class assume that item sets are stored in lexicographic * order. The class provides the methods used for item sets in class association * rule mining. Because every item set knows its class label the training set * can be splitted up virtually. * * @author Stefan Mutter (mutter@cs.waikato.ac.nz) * @version $Revision$ */ public class LabeledItemSet extends ItemSet implements Serializable, RevisionHandler { /** for serialization */ private static final long serialVersionUID = 4158771925518299903L; /** The class label. */ protected int m_classLabel; /** The support of the rule. */ protected int m_ruleSupCounter; /** * Constructor * * @param totalTrans the total number of transactions * @param classLabel the class lebel */ public LabeledItemSet(int totalTrans, int classLabel) { super(totalTrans); m_classLabel = classLabel; } /** * Deletes all item sets that don't have minimum support and have more than * maximum support * * @return the reduced set of item sets * @param maxSupport the maximum support * @param itemSets the set of item sets to be pruned * @param minSupport the minimum number of transactions to be covered */ public static ArrayList<Object> deleteItemSets(ArrayList<Object> itemSets, int minSupport, int maxSupport) { ArrayList<Object> newVector = new ArrayList<Object>(itemSets.size()); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.get(i); if ((current.m_ruleSupCounter >= minSupport) && (current.m_ruleSupCounter <= maxSupport)) { newVector.add(current); } } return newVector; } /** * Tests if two item sets are equal. * * @param itemSet another item set * @return true if this item set contains the same items as the given one */ @Override public final boolean equals(Object itemSet) { if (!(this.equalCondset(itemSet))) { return false; } if (m_classLabel != ((LabeledItemSet) itemSet).m_classLabel) { return false; } return true; } /** * Compares two item sets * * @param itemSet an item set * @return true if the the item sets are equal, false otherwise */ public final boolean equalCondset(Object itemSet) { if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) { return false; } if (m_items.length != ((ItemSet) itemSet).items().length) { return false; } for (int i = 0; i < m_items.length; i++) { if (m_items[i] != ((ItemSet) itemSet).itemAt(i)) { return false; } } return true; } /** * Return a hashtable filled with the given item sets. * * @param itemSets the set of item sets to be used for filling the hash table * @param initialSize the initial size of the hashtable * @return the generated hashtable */ public static Hashtable<ItemSet, Integer> getHashtable(ArrayList<Object> itemSets, int initialSize) { Hashtable<ItemSet, Integer> hashtable = new Hashtable<ItemSet, Integer>(initialSize); for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet current = (LabeledItemSet) itemSets.get(i); hashtable.put(current, new Integer(current.m_classLabel)); } return hashtable; } /** * Merges all item sets in the set of (k-1)-item sets to create the (k)-item * sets and updates the counters. * * @return the generated (k)-item sets * @param totalTrans the total number of transactions * @param itemSets the set of (k-1)-item sets * @param size the value of (k-1) */ public static ArrayList<Object> mergeAllItemSets(ArrayList<Object> itemSets, int size, int totalTrans) { ArrayList<Object> newVector = new ArrayList<Object>(); LabeledItemSet result; int numFound, k; for (int i = 0; i < itemSets.size(); i++) { LabeledItemSet first = (LabeledItemSet) itemSets.get(i); out: for (int j = i + 1; j < itemSets.size(); j++) { LabeledItemSet second = (LabeledItemSet) itemSets.get(j); while (first.m_classLabel != second.m_classLabel) { j++; if (j == itemSets.size()) { break out; } second = (LabeledItemSet) itemSets.get(j); } result = new LabeledItemSet(totalTrans, first.m_classLabel); result.m_items = new int[first.m_items.length]; // Find and copy common prefix of size 'size' numFound = 0; k = 0; while (numFound < size) { if (first.m_items[k] == second.m_items[k]) { if (first.m_items[k] != -1) { numFound++; } result.m_items[k] = first.m_items[k]; } else { break out; } k++; } // Check difference while (k < first.m_items.length) { if ((first.m_items[k] != -1) && (second.m_items[k] != -1)) { break; } else { if (first.m_items[k] != -1) { result.m_items[k] = first.m_items[k]; } else { result.m_items[k] = second.m_items[k]; } } k++; } if (k == first.m_items.length) { result.m_ruleSupCounter = 0; result.m_counter = 0; newVector.add(result); } } } return newVector; } /** * Splits the class attribute away. Depending on the invert flag, the * instances without class attribute or only the class attribute of all * instances is returned * * @param instances the instances * @param invert flag; if true only the class attribute remains, otherweise * the class attribute is the only attribute that is deleted. * @throws Exception exception if instances cannot be splitted * @return Instances without the class attribute or instances with only the * class attribute */ public static Instances divide(Instances instances, boolean invert) throws Exception { Instances newInstances = new Instances(instances); if (instances.classIndex() < 0) { throw new Exception("For class association rule mining a class attribute has to be specified."); } if (invert) { for (int i = 0; i < newInstances.numAttributes(); i++) { if (i != newInstances.classIndex()) { newInstances.deleteAttributeAt(i); i--; } } return newInstances; } else { newInstances.setClassIndex(-1); newInstances.deleteAttributeAt(instances.classIndex()); return newInstances; } } /** * Converts the header info of the given set of instances into a set of item * sets (singletons). The ordering of values in the header file determines the * lexicographic order. Each item set knows its class label. * * @return a set of item sets, each containing a single item * @param instancesNoClass instances without the class attribute * @param classes the values of the class attribute sorted according to * instances * @exception Exception if singletons can't be generated successfully */ public static ArrayList<Object> singletons(Instances instancesNoClass, Instances classes) throws Exception { ArrayList<Object> setOfItemSets = new ArrayList<Object>(); LabeledItemSet current; // make singletons for (int i = 0; i < instancesNoClass.numAttributes(); i++) { if (instancesNoClass.attribute(i).isNumeric()) { throw new Exception("Can't handle numeric attributes!"); } for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++) { for (int k = 0; k < (classes.attribute(0)).numValues(); k++) { current = new LabeledItemSet(instancesNoClass.numInstances(), k); current.m_items = new int[instancesNoClass.numAttributes()]; for (int l = 0; l < instancesNoClass.numAttributes(); l++) { current.m_items[l] = -1; } current.m_items[i] = j; setOfItemSets.add(current); } } } return setOfItemSets; } /** * Prunes a set of (k)-item sets using the given (k-1)-item sets. * * @param toPrune the set of (k)-item sets to be pruned * @param kMinusOne the (k-1)-item sets to be used for pruning * @return the pruned set of item sets */ public static ArrayList<Object> pruneItemSets(ArrayList<Object> toPrune, Hashtable<ItemSet, Integer> kMinusOne) { ArrayList<Object> newVector = new ArrayList<Object>(toPrune.size()); int help, j; for (int i = 0; i < toPrune.size(); i++) { LabeledItemSet current = (LabeledItemSet) toPrune.get(i); for (j = 0; j < current.m_items.length; j++) { if (current.m_items[j] != -1) { help = current.m_items[j]; current.m_items[j] = -1; if (kMinusOne.get(current) != null && (current.m_classLabel == (kMinusOne.get(current).intValue()))) { current.m_items[j] = help; } else { current.m_items[j] = help; break; } } } if (j == current.m_items.length) { newVector.add(current); } } return newVector; } /** * Outputs the support for an item set. * * @return the support */ @Override public final int support() { return m_ruleSupCounter; } /** * Updates counter of item set with respect to given transaction. * * @param instanceNoClass instances without the class attribute * @param instanceClass the values of the class attribute sorted according to * instances */ public final void upDateCounter(Instance instanceNoClass, Instance instanceClass) { if (containedBy(instanceNoClass)) { m_counter++; if (this.m_classLabel == instanceClass.value(0)) { m_ruleSupCounter++; } } } /** * Updates counter of item set with respect to given transaction. * * @param instanceNoClass instances without the class attribute * @param instanceClass the values of the class attribute sorted according to * instances */ public final void upDateCounterTreatZeroAsMissing(Instance instanceNoClass, Instance instanceClass) { if (containedByTreatZeroAsMissing(instanceNoClass)) { m_counter++; if (this.m_classLabel == instanceClass.value(0)) { m_ruleSupCounter++; } } } /** * Updates counter of a specific item set * * @param itemSets an item sets * @param instancesNoClass instances without the class attribute * @param instancesClass the values of the class attribute sorted according to * instances */ public static void upDateCounters(ArrayList<Object> itemSets, Instances instancesNoClass, Instances instancesClass) { for (int i = 0; i < instancesNoClass.numInstances(); i++) { Enumeration<Object> enu = new WekaEnumeration<Object>(itemSets); while (enu.hasMoreElements()) { ((LabeledItemSet) enu.nextElement()).upDateCounter(instancesNoClass.instance(i), instancesClass.instance(i)); } } } /** * Updates counter of a specific item set * * @param itemSets an item sets * @param instancesNoClass instances without the class attribute * @param instancesClass the values of the class attribute sorted according to * instances */ public static void upDateCountersTreatZeroAsMissing(ArrayList<LabeledItemSet> itemSets, Instances instancesNoClass, Instances instancesClass) { for (int i = 0; i < instancesNoClass.numInstances(); i++) { Enumeration<LabeledItemSet> enu = new WekaEnumeration<LabeledItemSet>(itemSets); while (enu.hasMoreElements()) { enu.nextElement().upDateCounterTreatZeroAsMissing(instancesNoClass.instance(i), instancesClass.instance(i)); } } } /** * Generates rules out of item sets * * @param minConfidence the minimum confidence * @param noPrune flag indicating whether the rules are pruned accoridng to * the minimum confidence value * @return a set of rules */ public final ArrayList<Object>[] generateRules(double minConfidence, boolean noPrune) { ArrayList<Object> premises = new ArrayList<Object>(); ArrayList<Object> consequences = new ArrayList<Object>(); ArrayList<Object> conf = new ArrayList<Object>(); @SuppressWarnings("unchecked") ArrayList<Object>[] rules = new ArrayList[3]; ItemSet premise, consequence; // Generate all rules with class in the consequence. premise = new ItemSet(m_totalTransactions); consequence = new ItemSet(m_totalTransactions); int[] premiseItems = new int[m_items.length]; int[] consequenceItems = new int[1]; System.arraycopy(m_items, 0, premiseItems, 0, m_items.length); consequence.setItem(consequenceItems); premise.setItem(premiseItems); consequence.setItemAt(m_classLabel, 0); consequence.setCounter(this.m_ruleSupCounter); premise.setCounter(this.m_counter); premises.add(premise); consequences.add(consequence); conf.add(new Double((double) this.m_ruleSupCounter / (double) this.m_counter)); rules[0] = premises; rules[1] = consequences; rules[2] = conf; if (!noPrune) { pruneRules(rules, minConfidence); } return rules; } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } }