se.uu.it.cs.recsys.ruleminer.impl.FPGrowthImpl.java Source code

Java tutorial

Introduction

Here is the source code for se.uu.it.cs.recsys.ruleminer.impl.FPGrowthImpl.java

Source

package se.uu.it.cs.recsys.ruleminer.impl;

/*
 * #%L
 * CourseRecommenderRuleMiner
 * %%
 * Copyright (C) 2015 Yong Huang  <yong.e.huang@gmail.com >
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
import com.google.common.collect.Sets;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import se.uu.it.cs.recsys.ruleminer.datastructure.FPTree;
import se.uu.it.cs.recsys.ruleminer.datastructure.HeaderTableItem;
import se.uu.it.cs.recsys.ruleminer.datastructure.Item;
import se.uu.it.cs.recsys.ruleminer.datastructure.builder.FPTreeBuilder;
import se.uu.it.cs.recsys.ruleminer.util.FPTreeUtil;
import se.uu.it.cs.recsys.ruleminer.util.Util;

/**
 * Implement the FP-Growth Algorithm
 */
@Component
public class FPGrowthImpl {

    private static final Logger LOGGER = LoggerFactory.getLogger(FPGrowthImpl.class);

    public static final Integer DEFAULT_MIN_SUPPORT = 10;

    private Integer minSupport = DEFAULT_MIN_SUPPORT;

    /**
     *
     *
     * @param fpTree non-null FP Tree
     * @param argMinSupport
     * @return all the frequent patterns that meets the requirement of the min
     * support
     * @throws IllegalArgumentException, if input is null
     */
    public Map<Set<Integer>, Integer> getAllFrequentPattern(FPTree fpTree, Integer argMinSupport) {

        this.minSupport = argMinSupport == null ? DEFAULT_MIN_SUPPORT : argMinSupport;

        if (fpTree == null) {
            throw new IllegalArgumentException("Input FP Tree is null!");
        }

        return miningWithFPGrowth(fpTree, Collections.<Integer>emptySet());
    }

    private Map<Set<Integer>, Integer> miningWithFPGrowth(FPTree fpTree, Set<Integer> suffixPattern) {

        Map<Set<Integer>, Integer> frequentPatternFromSinglePrefixPath = new HashMap<>();
        FPTree branchingTree = fpTree;

        if (fpTree.hasSinglePrefixPath()) {
            List<Item> singlePrefixPath = fpTree.getSinglePrefixPathInTopDownOrder();
            //            LOGGER.debug("Single prefix path: {}", singlePrefixPath);

            Map<Set<Integer>, Integer> frequentPatternWithinSinglePrefixPath = getFrequentPatternFromSinglePrefixPath(
                    singlePrefixPath);

            frequentPatternFromSinglePrefixPath = frequentPatternWithinSinglePrefixPath.entrySet().stream()
                    .collect(Collectors.toMap(entry -> {
                        Set<Integer> existingPattern = new HashSet<>(entry.getKey());
                        existingPattern.addAll(suffixPattern);
                        return existingPattern;
                    }, entry -> entry.getValue()));

            branchingTree = fpTree.getBranchingTree();

            if (branchingTree == null) {
                return frequentPatternFromSinglePrefixPath;
            }
        }

        Map<Set<Integer>, Integer> frequentPatternFromBranchingTree = new HashMap<>();

        List<HeaderTableItem> headerList = branchingTree.getHeaderTable();
        ListIterator<HeaderTableItem> itr = headerList.listIterator(headerList.size());

        while (itr.hasPrevious()) {
            HeaderTableItem visitingItem = itr.previous();

            Set<Integer> newPattern = new HashSet<>(suffixPattern);
            newPattern.add(visitingItem.getItem().getId());

            frequentPatternFromBranchingTree.put(newPattern, visitingItem.getItem().getCount());
            //            LOGGER.debug("Adding new pattern: {}, count: {}", newPattern, visitingItem.getItem().getCount());

            Map<List<Integer>, Integer> patternBase = FPTreeUtil.getPatternBase(visitingItem);
            //            LOGGER.debug("Pattern base for item {} is: {}", visitingItem.getItem(), patternBase);

            FPTree conditionalTree = FPTreeBuilder.buildConditionalFPTree(patternBase, this.minSupport);

            if (conditionalTree != null && !conditionalTree.getRoot().getChildren().isEmpty()) {
                frequentPatternFromBranchingTree.putAll(miningWithFPGrowth(conditionalTree, newPattern));
            }
        }

        return consolidatePatterns(frequentPatternFromSinglePrefixPath, frequentPatternFromBranchingTree);
    }

    private Map<Set<Integer>, Integer> consolidatePatterns(Map<Set<Integer>, Integer> patternFromSinglePrefixPath,
            Map<Set<Integer>, Integer> patternFromBranchingTree) {

        if (patternFromSinglePrefixPath.isEmpty()) {
            return patternFromBranchingTree;
        }

        Map<Set<Integer>, Integer> consolidated = new HashMap<>();
        consolidated.putAll(patternFromSinglePrefixPath);
        consolidated.putAll(patternFromBranchingTree);
        consolidated.putAll(getCrossProduct(patternFromSinglePrefixPath, patternFromBranchingTree));

        return consolidated;
    }

    private Map<Set<Integer>, Integer> getCrossProduct(Map<Set<Integer>, Integer> patternFromSinglePrefixPath,
            Map<Set<Integer>, Integer> patternFromBranchingTree) {

        Map<Set<Integer>, Integer> totalCrossProduct = new HashMap<>();

        patternFromSinglePrefixPath.entrySet().stream().forEach(entryFromPrefixPath -> {
            patternFromBranchingTree.entrySet().stream().forEach(entryFromBranchingTree -> {

                Set<Integer> singleCrossProduct = new HashSet<>(entryFromBranchingTree.getKey());
                singleCrossProduct.addAll(entryFromPrefixPath.getKey());

                totalCrossProduct.put(singleCrossProduct, entryFromBranchingTree.getValue());
            });

        });

        return totalCrossProduct;
    }

    /**
     * Call this method to give a customized min support. If this method is not
     * called, then the instance will use
     * {@link FPGrowthImpl#DEFAULT_MIN_SUPPORT} by default.
     *
     * @param minSupport, has to be a positive value.
     * @throws IllegalArgumentException if null or non-positive
     */
    public void setMinSupport(Integer minSupport) {

        if (minSupport == null || minSupport <= 0) {
            throw new IllegalArgumentException("Min support needs to be a positive value!");
        }

        this.minSupport = minSupport;
    }

    /**
     *
     * @param singlePrefixPath, ordered single prefix path from a FP Tree
     * @return pairs of (frequent pattern, its support); returns empty map if
     * input is null or empty
     */
    public static Map<Set<Integer>, Integer> getFrequentPatternFromSinglePrefixPath(List<Item> singlePrefixPath) {
        if (singlePrefixPath == null || singlePrefixPath.isEmpty()) {
            LOGGER.warn("Nonsence to give null or empty input. Do you agree?");
            return Collections.EMPTY_MAP;
        }

        Set<Item> itemSetFromPath = new HashSet<>(singlePrefixPath);

        Set<Set<Item>> powerSet = Sets.powerSet(itemSetFromPath);

        Map<Set<Integer>, Integer> r = new HashMap<>();

        Util.removeEmptySet(powerSet).forEach(itemSet -> {
            int localMinSupport = FPTreeUtil.getMinSupport(itemSet);

            r.put(itemSet.stream().map(item -> item.getId()).collect(Collectors.toSet()), localMinSupport);
        });

        return r;
    }
}