com.stewel.dataflow.assocrules.AlgoAgrawalFaster94.java Source code

Java tutorial

Introduction

Here is the source code for com.stewel.dataflow.assocrules.AlgoAgrawalFaster94.java

Source

package com.stewel.dataflow.assocrules;

/* This file is copyright (c) 2008-2012 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/

import com.stewel.dataflow.fpgrowth.Itemset;
import com.stewel.dataflow.fpgrowth.Itemsets;
import org.apache.commons.digester.Rules;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

/**
 * This is an implementation of the "faster algorithm" for generating association rules,
 * described in Agrawal &
 * al. 1994, IBM Research Report RJ9839, June 1994.
 * <br/><br/>
 * <p>
 * This implementation saves the result to a file
 * or can alternatively keep it into memory if no output
 * path is provided by the user when the runAlgorithm()
 * method is called.
 *
 * @author Philippe Fournier-Viger
 * @see Rules
 **/

public class AlgoAgrawalFaster94 {

    private final ItemsetCandidateGenerator itemsetsCandidateGenerator;
    private final ItemsetSupportCalculator itemsetSupportCalculator;
    private final AssociationRuleWriter associationRuleWriter;

    private final Itemsets patterns;
    private final long databaseSize; // number of transactions in database
    private final double minimumConfidence;
    private final double minimumLift;
    private final boolean usingLift = true;

    public AlgoAgrawalFaster94(@Nonnull final ItemsetCandidateGenerator itemsetsCandidateGenerator,
            @Nonnull final ItemsetSupportCalculator itemsetSupportCalculator,
            @Nonnull final AssociationRuleWriter associationRuleWriter, @Nonnull final Itemsets patterns,
            final long databaseSize, final double minimumConfidence, final double minimumLift) {
        this.itemsetsCandidateGenerator = Objects.requireNonNull(itemsetsCandidateGenerator,
                "itemsetsCandidateGenerator");
        this.itemsetSupportCalculator = Objects.requireNonNull(itemsetSupportCalculator,
                "itemsetSupportCalculator");
        this.associationRuleWriter = Objects.requireNonNull(associationRuleWriter, "associationRuleWriter");
        this.patterns = Objects.requireNonNull(patterns, "patterns");
        this.databaseSize = databaseSize;
        this.minimumConfidence = minimumConfidence;
        this.minimumLift = minimumLift;
        itemsetSupportCalculator.init(patterns);
    }

    /**
     * Run the algorithm for generating association rules from a set of itemsets.
     *
     * @return the set of rules found if the user chose to save the result to memory
     * @throws IOException exception if error while writting to file
     */
    public void runAlgorithm() throws IOException {
        // For each frequent itemset of size >=2 that we will name "lk"
        for (int k = 2; k < patterns.getLevels().size(); k++) {
            for (Itemset lk : patterns.getLevels().get(k)) {

                // create a variable H1 for recursion
                List<int[]> H1_for_recursion = new ArrayList<int[]>();

                // For each itemset "itemsetSize1" of size 1 that is member of lk
                for (int item : lk.getItems()) {
                    int itemsetHm_P_1[] = new int[] { item };

                    // make a copy of  lk without items from  hm_P_1
                    int[] itemset_Lk_minus_hm_P_1 = Itemset.cloneItemSetMinusOneItem(lk.getItems(), item);

                    // Now we will calculate the support and confidence
                    // of the rule: itemset_Lk_minus_hm_P_1 ==>  hm_P_1
                    long support = itemsetSupportCalculator.calculateSupport(itemset_Lk_minus_hm_P_1); // THIS COULD BE
                    // OPTIMIZED ?
                    double supportAsDouble = (double) support;

                    // calculate the confidence of the rule : itemset_Lk_minus_hm_P_1 ==>  hm_P_1
                    double conf = lk.getAbsoluteSupport() / supportAsDouble;

                    // if the confidence is lower than minimumConfidence
                    if (conf < minimumConfidence || Double.isInfinite(conf)) {
                        continue;
                    }

                    double lift = 0;
                    long supportHm_P_1 = 0;
                    // if the user is using the minimumLift threshold, we will need
                    // to also calculate the lift of the rule:  itemset_Lk_minus_hm_P_1 ==>  hm_P_1
                    if (usingLift) {
                        // if we want to calculate the lift, we need the support of hm_P_1
                        supportHm_P_1 = itemsetSupportCalculator.calculateSupport(itemsetHm_P_1); // if we want to calculate the lift, we need to add this.
                        // calculate the lift
                        double term1 = ((double) lk.getAbsoluteSupport()) / databaseSize;
                        double term2 = supportAsDouble / databaseSize;
                        double term3 = ((double) supportHm_P_1 / databaseSize);
                        lift = term1 / (term2 * term3);

                        // if the lift is not enough
                        if (lift < minimumLift) {
                            continue;
                        }
                    }

                    // If we are here, it means that the rule respect the minimumConfidence and minimumLift parameters.
                    // Therefore, we output the rule.
                    associationRuleWriter.write(ImmutableAssociationRule.builder()
                            .antecedent(itemset_Lk_minus_hm_P_1).consequent(itemsetHm_P_1).coverage(support)
                            .transactionCount(lk.getAbsoluteSupport()).confidence(conf).lift(lift).build());

                    // Then we keep the itemset  hm_P_1 to find more rules using this itemset and lk.
                    H1_for_recursion.add(itemsetHm_P_1);
                    // ================ END OF WHAT I HAVE ADDED
                }
                // Finally, we make a recursive call to continue explores rules that can be made with "lk"
                apGenrules(k, 1, lk, H1_for_recursion);
            }

        }
    }

    /**
     * The ApGenRules as described in p.14 of the paper by Agrawal.
     * (see the Agrawal paper for more details).
     *
     * @param k  the size of the first itemset used to generate rules
     * @param m  the recursive depth of the call to this method (first time 1, then 2...)
     * @param lk the itemset that is used to generate rules
     * @param Hm a set of itemsets that can be used with lk to generate rules
     * @throws IOException exception if error while writing output file
     */
    private void apGenrules(int k, int m, Itemset lk, List<int[]> Hm) throws IOException {

        // if the itemset "lk" that is used to generate rules is larger than the size of itemsets in "Hm"
        if (k > m + 1) {
            // Create a list that we will be used to store itemsets for the recursive call
            List<int[]> Hm_plus_1_for_recursion = new ArrayList<int[]>();

            // generate candidates using Hm
            List<int[]> Hm_plus_1 = itemsetsCandidateGenerator.generateCandidateSizeK(Hm);

            // for each such candidates
            for (int[] hm_P_1 : Hm_plus_1) {

                // We subtract the candidate from the itemset "lk"
                int[] itemset_Lk_minus_hm_P_1 = Itemset.cloneItemSetMinusAnItemset(lk.getItems(), hm_P_1);

                // We will now calculate the support of the rule  Lk/(hm_P_1) ==> hm_P_1
                // we need it to calculate the confidence
                long support = itemsetSupportCalculator.calculateSupport(itemset_Lk_minus_hm_P_1);

                double supportAsDouble = (double) support;

                // calculate the confidence of the rule Lk/(hm_P_1) ==> hm_P_1
                double conf = lk.getAbsoluteSupport() / supportAsDouble;

                // if the confidence is not enough than we don't need to consider
                // the rule  Lk/(hm_P_1) ==> hm_P_1 anymore so we continue
                if (conf < minimumConfidence || Double.isInfinite(conf)) {
                    continue;
                }

                double lift = 0;
                long supportHm_P_1 = 0;
                // if the user is using the minimumLift threshold, then we will need to calculate the lift of the
                // rule as well and check if the lift is higher or equal to minimumLift.
                if (usingLift) {
                    // if we want to calculate the lift, we need the support of Hm+1
                    supportHm_P_1 = itemsetSupportCalculator.calculateSupport(hm_P_1);
                    // calculate the lift of the rule:  Lk/(hm_P_1) ==> hm_P_1
                    double term1 = ((double) lk.getAbsoluteSupport()) / databaseSize;
                    double term2 = (supportAsDouble) / databaseSize;

                    lift = term1 / (term2 * ((double) supportHm_P_1 / databaseSize));

                    // if the lift is not enough
                    if (lift < minimumLift) {
                        continue;
                    }
                }

                // The rule has passed the confidence and lift threshold requirements,
                // so we can output it
                associationRuleWriter.write(ImmutableAssociationRule.builder().antecedent(itemset_Lk_minus_hm_P_1)
                        .consequent(hm_P_1).coverage(support).transactionCount(lk.getAbsoluteSupport())
                        .confidence(conf).lift(lift).build());

                // if k == m+1, then we cannot explore further rules using Lk since Lk will be too small.
                if (k != m + 1) {
                    Hm_plus_1_for_recursion.add(hm_P_1);
                }
            }
            // recursive call to apGenRules to find more rules using "lk"
            apGenrules(k, m + 1, lk, Hm_plus_1_for_recursion);
        }
    }
}