Java tutorial
/* * Copyright 2015 Norbert * * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package norbert.mynemo.core.selection; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.collect.Lists.newArrayList; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import norbert.mynemo.core.evaluation.MetricType; import norbert.mynemo.core.evaluation.PersonnalRecommenderEvaluator; import norbert.mynemo.core.evaluation.PreferenceMaskerModelBuilder; import norbert.mynemo.core.recommendation.RecommenderFamily; import norbert.mynemo.core.recommendation.RecommenderType; import norbert.mynemo.core.recommendation.configuration.BasicRecommenderConfiguration; import norbert.mynemo.core.recommendation.configuration.ItemBasedRecommenderConfiguration; import norbert.mynemo.core.recommendation.configuration.RecommenderConfiguration; import norbert.mynemo.core.recommendation.recommender.BasicRecommender; import norbert.mynemo.core.recommendation.recommender.ItemSimilarityRecommender; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.commons.math3.stat.inference.TTest; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.DataModelBuilder; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.model.DataModel; import com.google.common.base.Optional; /** * This class selects the best recommender for a given user. */ public class RecommenderSelector { /** * Configuration option regarding the speed and the precision of the selection. * * <p> * Expect that: * <ul> * <li><code>very_fast</code> may provide a misleading result</li> * <li><code>fast</code>: 2 slower, provides an imprecise result</li> * <li><code>normal</code>: 5 slower, provides an accurate result</li> * <li><code>slow</code>: 10 slower, provides an very accurate result</li> * <li><code>very_slow</code>: 20 slower, provides an extremely accurate result</li> * <li><code>extremely_slow</code>: way more slower, provides an exact and deterministic (if * possible) result</li> * </ul> * * <p> * The class is optimized for <code>trainingPercentage=1</code> and * <code>speed=EXTREMELY_SLOW</code>. */ public enum SpeedOption { EXTREMELY_SLOW(1, true), FAST(0.5, true), NORMAL(0.8, true), SLOW(0.9, true), VERY_FAST(0.7, false), VERY_SLOW(0.95, true); private final boolean exhaustive; private final double trainingPercentage; private SpeedOption(double trainingPercentage, boolean exhaustive) { this.trainingPercentage = trainingPercentage; this.exhaustive = exhaustive; } public double getTrainingPercentage() { return trainingPercentage; } } public static final double DEFAULT_EVALUATION_PERCENTAGE = 1; public static final MetricType DEFAULT_METRIC = MetricType.ROOT_MEAN_SQUARED_ERROR; public static final double DEFAULT_MINIMUM_COVERAGE = 0.5; /** If false, prevent any optimization based on reusing data between evaluations. */ private static final boolean DEFAULT_REUSE_STATE = true; public static final SpeedOption DEFAULT_SPEED = SpeedOption.EXTREMELY_SLOW; /** * The significance level is the maximum allowed for a p-value to consider a difference relevant. * If this level is lowered, more evaluation will be considered similar. On the contrary, if this * level is set higher, less evaluations will be considered similar. */ private static final double SIGNIFICANCE_LEVEL = 0.05; /** * Removes from the given collection the evaluations with a coverage lower than the given minimum. */ private static void removeUnallowedCoverage(Iterable<RecommenderEvaluation> evaluations, double minimumCoverage) { Iterator<RecommenderEvaluation> iterator = evaluations.iterator(); while (iterator.hasNext()) { if (iterator.next().getEvaluationReport().getCoverage() < minimumCoverage) { iterator.remove(); } } } private final PersonnalRecommenderEvaluator evaluator; private final MetricType metric; private final SelectorConfiguration selectorConfiguration; private final SvdRecommenderSelector svdRecommenderSelector; private final long targetUser; private final UserRecommenderSelector userRecommenderSelector; /** * Builds a selector for the given user. */ public RecommenderSelector(DataModel model, long user) throws TasteException { this(model, user, DEFAULT_METRIC, DEFAULT_SPEED, DEFAULT_EVALUATION_PERCENTAGE); } /** * Builds a selector for the given user. The selector will try to optimize the given metric. */ public RecommenderSelector(DataModel model, long user, MetricType metric, SpeedOption speed, double evaluationPercentage) throws TasteException { targetUser = user; this.metric = metric; evaluator = new PersonnalRecommenderEvaluator(targetUser, metric, speed.exhaustive); DataModelBuilder dataModelBuilder = null; if (evaluationPercentage == 1) { // the evaluation percentage must be 1 because the model builder will use the whole data // model (except some missing preferences from the target user), not the given one by the // evaluator dataModelBuilder = new PreferenceMaskerModelBuilder(model, targetUser); } boolean reuseIsAllowed = DEFAULT_REUSE_STATE && evaluationPercentage == 1 && speed.trainingPercentage == 1; selectorConfiguration = new SelectorConfiguration(model, user, evaluator, evaluationPercentage, reuseIsAllowed, speed, dataModelBuilder); userRecommenderSelector = new UserRecommenderSelector(selectorConfiguration); svdRecommenderSelector = new SvdRecommenderSelector(selectorConfiguration); } private boolean areSignificantlyDifferent(RecommenderEvaluation evalA, RecommenderEvaluation evalB) { DescriptiveStatistics valuesA = evalA.getEvaluationReport().getValues(DEFAULT_METRIC); DescriptiveStatistics valuesB = evalB.getEvaluationReport().getValues(DEFAULT_METRIC); return new TTest().tTest(valuesA, valuesB, SIGNIFICANCE_LEVEL); } /** * Runs the evaluator with the given builder, and generates an evaluation based on the given * configuration. */ private RecommenderEvaluation evaluate(RecommenderConfiguration recommenderConfiguration, RecommenderBuilder recommenderBuilder) throws TasteException { DataModelBuilder modelBuilder = selectorConfiguration.getDataModelBuilder(); evaluator.evaluate(recommenderBuilder, modelBuilder, selectorConfiguration.getDataModel(), selectorConfiguration.getSpeed().trainingPercentage, selectorConfiguration.getEvaluationPercentage()); return new RecommenderEvaluation(recommenderConfiguration, evaluator.getEvaluationReport()); } /** * Evaluates all given algorithms, and returns an unsorted list of evaluations. An algorithm may * be evaluated with different parameter values, thus the number of evaluations is usually greater * than the number of given algorithms. */ public List<RecommenderEvaluation> evaluateAll(Collection<RecommenderType> recommenderTypes, double minimumCoverage) throws TasteException { List<RecommenderEvaluation> result = new ArrayList<>(); for (RecommenderType current : recommenderTypes) { switch (current.getFamily()) { case BASIC: result.add(evaluateBasic(current)); break; case ITEM_SIMILARITY_BASED: result.add(evaluateItemBased(current)); break; case SVD_BASED: result.addAll(svdRecommenderSelector.select(current, minimumCoverage)); break; case USER_SIMILARITY_BASED: result.addAll(userRecommenderSelector.select(current, minimumCoverage)); break; default: throw new IllegalStateException(); } } return result; } /** * Evaluates the given recommender. The given recommender must be part of the item-similarity * based family. */ private RecommenderEvaluation evaluateBasic(RecommenderType type) throws TasteException { checkArgument(type.getFamily() == RecommenderFamily.BASIC); BasicRecommenderConfiguration configuration = new BasicRecommenderConfiguration(type); return evaluate(configuration, new BasicRecommender(configuration)); } /** * Evaluates the given recommender. The recommender type must be part of the item-similarity based * family. */ private RecommenderEvaluation evaluateItemBased(RecommenderType type) throws TasteException { checkArgument(type.getFamily() == RecommenderFamily.ITEM_SIMILARITY_BASED); ItemBasedRecommenderConfiguration configuration = new ItemBasedRecommenderConfiguration(type); return evaluate(configuration, new ItemSimilarityRecommender(configuration)); } /** * Removes from the given collection all evaluations that are significantly worst. The left * evaluations are non significantly different. */ private void retainBestEvaluations(Collection<RecommenderEvaluation> evaluations) { checkNotNull(evaluations); EvaluationComparator comparator = new EvaluationComparator(metric); Collection<RecommenderEvaluation> rejectedEvaluations = new ArrayList<>(); for (RecommenderEvaluation evalA : evaluations) { for (RecommenderEvaluation evalB : evaluations) { if (areSignificantlyDifferent(evalA, evalB)) { rejectedEvaluations.add(Collections.max(newArrayList(evalA, evalB), comparator)); } } } evaluations.removeAll(rejectedEvaluations); } /** * Returns the best recommender for the target user among available recommenders. */ public Optional<RecommenderEvaluation> select() throws TasteException { return selectAmong(RecommenderType.getSpeedOrderedRecommenders(), RecommenderSelector.DEFAULT_MINIMUM_COVERAGE); } /** * Returns the best recommender for the target user among the given recommenders. The given * algorithms are evaluated, then the minimum evaluation computed with the * {@link EvaluationComparator} class is returned. * * <p> * The evaluations are compared with the given metric. The evaluations with a coverage lower than * the given one are ignored. */ public Optional<RecommenderEvaluation> selectAmong(List<RecommenderType> types, double minimumCoverage) throws TasteException { checkNotNull(types); checkArgument(!types.isEmpty(), "The algorithm list must contain at least one algorithm."); checkArgument(0 <= minimumCoverage && minimumCoverage <= 1, "The minimum coverage must not be" + " lesser than 0 or greater than 1."); List<RecommenderEvaluation> evaluations = evaluateAll(types, minimumCoverage); removeUnallowedCoverage(evaluations, minimumCoverage); retainBestEvaluations(evaluations); Optional<RecommenderEvaluation> result; if (evaluations.isEmpty()) { result = Optional.absent(); } else { result = Optional.of(Collections.min(evaluations, new EvaluationComparator(metric))); } return result; } }