Java tutorial
/* * Copyright (C) 2016 jcastro * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package delfos.rs.contentbased.vsm.booleanvsm.symeonidis2007; import delfos.ERROR_CODES; import delfos.common.Global; import delfos.common.exceptions.CouldNotComputeSimilarity; import delfos.common.exceptions.dataset.CannotLoadContentDataset; import delfos.common.exceptions.dataset.CannotLoadRatingsDataset; import delfos.common.exceptions.dataset.entity.EntityNotFound; import delfos.common.exceptions.dataset.items.ItemNotFound; import delfos.common.exceptions.dataset.users.UserNotFound; import delfos.common.parameters.Parameter; import delfos.common.parameters.restriction.IntegerParameter; import delfos.dataset.basic.features.Feature; import delfos.dataset.basic.item.ContentDataset; import delfos.dataset.basic.item.Item; import delfos.dataset.basic.loader.types.ContentDatasetLoader; import delfos.dataset.basic.loader.types.DatasetLoader; import delfos.dataset.basic.rating.Rating; import delfos.dataset.basic.rating.RatingsDataset; import delfos.dataset.basic.rating.RelevanceCriteria; import delfos.rs.collaborativefiltering.knn.RecommendationEntity; import delfos.rs.collaborativefiltering.profile.Neighbor; import delfos.rs.contentbased.ContentBasedRecommender; import delfos.rs.contentbased.vsm.booleanvsm.BooleanFeaturesTransformation; import delfos.rs.contentbased.vsm.booleanvsm.SparseVector; import delfos.rs.recommendation.Recommendation; import delfos.similaritymeasures.CosineCoefficient; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.math4.util.Pair; /** * Clase que implementa el sistema de recomendacin propuesto en el paper: * <p> * <p> * Panagiotis Symeonidis, Alexandros Nanopoulos and Yannis Manolopoulos. "Feature-weighted user model for recommender * systems." In User Modeling 2007, pp. 97-106. Springer Berlin Heidelberg, 2007. * * @author jcastro-inf ( https://github.com/jcastro-inf ) * * @version 1.0 (19 Octubre 2011) * @version 2.0 (28 de Febrero de 2013) Refactorizacin de las clases asociadas a los perfiles de usuario. * @version 2.1 9-Octubre-2013 Incorporacin del mtodo makeUserModel * @verison 6-Noviembre-2013 Implementacin correcta segn el paper. */ public class Symeonidis2007FeatureWeighted extends ContentBasedRecommender<Symeonidis2007Model, Symeonidis2007UserProfile> { private static final long serialVersionUID = 1L; /** * Parmetro para almacenar el nmero de vecinos que se tienen en cuenta para la prediccin de la valoracin. Si no * se modifica, su valor por defecto es 20 */ public static final Parameter NEIGHBORHOOD_SIZE = new Parameter("Neighborhood_size", new IntegerParameter(1, 9999, 20)); /** * Constructor por defecto, que aade al sistema de recomendacin sus parmetros. */ public Symeonidis2007FeatureWeighted() { super(); addParameter(NEIGHBORHOOD_SIZE); } /** * Construye el sistema con el tamao de vecindario indicado. * * @param neighborhoodSize Tamao del vecindario. */ public Symeonidis2007FeatureWeighted(int neighborhoodSize) { this(); setParameterValue(NEIGHBORHOOD_SIZE, neighborhoodSize); } @Override protected Symeonidis2007UserProfile makeUserProfile(int idUser, DatasetLoader<? extends Rating> datasetLoader, Symeonidis2007Model model) throws CannotLoadRatingsDataset, CannotLoadContentDataset, UserNotFound { SparseVector<Long> userFF = makeFFUserProfile(idUser, datasetLoader, model.getBooleanFeaturesTransformation()); //Los multiplico por la ponderacin iuf. SparseVector<Long> iuf = model.getAllIUF(); userFF.multiply(iuf); Map<Feature, Map<Object, Double>> userProfileValuesMap = model.getBooleanFeaturesTransformation() .getFeatureValueMap(userFF); return new Symeonidis2007UserProfile(idUser, userProfileValuesMap); } private SparseVector<Long> makeFFItemProfile(int idItem, DatasetLoader<? extends Rating> datasetLoader, BooleanFeaturesTransformation booleanFeaturesTransformation) throws ItemNotFound { final ContentDataset contentDataset; if (datasetLoader instanceof ContentDatasetLoader) { ContentDatasetLoader contentDatasetLoader = (ContentDatasetLoader) datasetLoader; contentDataset = contentDatasetLoader.getContentDataset(); } else { throw new CannotLoadContentDataset( "The dataset loader is not a ContentDatasetLoader, cannot apply a content-based "); } try { SparseVector<Long> itemProfile = booleanFeaturesTransformation.newProfile(); Item item = contentDataset.get(idItem); for (Feature f : item.getFeatures()) { Object value = item.getFeatureValue(f); long indexFeature = booleanFeaturesTransformation.getFeatureIndex(f, value); itemProfile.set(indexFeature, 1); } return itemProfile; } catch (EntityNotFound ex) { throw new ItemNotFound(idItem, ex); } } protected SparseVector<Long> makeFFUserProfile(int idUser, DatasetLoader<? extends Rating> datasetLoader, BooleanFeaturesTransformation booleanFeaturesTransformation) throws CannotLoadRatingsDataset, CannotLoadContentDataset, UserNotFound { RelevanceCriteria relevanceCriteria = datasetLoader.getDefaultRelevanceCriteria(); SparseVector<Long> userProfileValues = booleanFeaturesTransformation.newProfile(); RatingsDataset<? extends Rating> ratingsDataset = datasetLoader.getRatingsDataset(); //Calculo del perfil, FF(u) for (Map.Entry<Integer, ? extends Rating> entry : ratingsDataset.getUserRatingsRated(idUser).entrySet()) { try { int idItem = entry.getKey(); Rating rating = entry.getValue(); if (relevanceCriteria.isRelevant(rating.getRatingValue())) { SparseVector<Long> itemProfile = makeFFItemProfile(idItem, datasetLoader, booleanFeaturesTransformation); for (Pair<Long, Double> entryItemProfile : itemProfile.fast()) { long idFeature = entryItemProfile.getKey(); double featureValue = entryItemProfile.getValue(); if (featureValue > 0) { if (userProfileValues.containsKey(idFeature)) { userProfileValues.add(idFeature, featureValue); } else { userProfileValues.set(idFeature, featureValue); } } } } } catch (ItemNotFound ex) { ERROR_CODES.ITEM_NOT_FOUND.exit(ex); } } return userProfileValues; } @Override public Symeonidis2007Model buildRecommendationModel(DatasetLoader<? extends Rating> datasetLoader) throws CannotLoadRatingsDataset, CannotLoadContentDataset { final RatingsDataset<? extends Rating> ratingsDataset = datasetLoader.getRatingsDataset(); final ContentDataset contentDataset; if (datasetLoader instanceof ContentDatasetLoader) { ContentDatasetLoader contentDatasetLoader = (ContentDatasetLoader) datasetLoader; contentDataset = contentDatasetLoader.getContentDataset(); } else { throw new CannotLoadContentDataset( "The dataset loader is not a ContentDatasetLoader, cannot apply a content-based "); } BooleanFeaturesTransformation booleanFeaturesTransformation = new BooleanFeaturesTransformation( contentDataset); Symeonidis2007Model model = new Symeonidis2007Model(booleanFeaturesTransformation); fireBuildingProgressChangedEvent("Model creation", 0, -1); { int i = 1; for (Item item : contentDataset) { try { SparseVector<Long> itemProfile = makeFFItemProfile(item.getId(), datasetLoader, booleanFeaturesTransformation); model.putItemProfile(item.getId(), itemProfile); fireBuildingProgressChangedEvent("Profile creation", (int) ((double) i++ * 100 / contentDataset.size()), -1); } catch (ItemNotFound ex) { ERROR_CODES.ITEM_NOT_FOUND.exit(ex); } } } fireBuildingProgressChangedEvent("Profile creation", 100, -1); RelevanceCriteria relevanceCriteria = datasetLoader.getDefaultRelevanceCriteria(); Map<Integer, SparseVector<Long>> ff_userProfiles = new TreeMap<Integer, SparseVector<Long>>(); //Calculo los perfiles de usuario, la parte FF(u) for (int idUser : ratingsDataset.allUsers()) { try { ff_userProfiles.put(idUser, makeFFUserProfile(idUser, datasetLoader, booleanFeaturesTransformation)); } catch (UserNotFound ex) { ERROR_CODES.USER_NOT_FOUND.exit(ex); } } //Calculo la IUF. SparseVector<Long> iuf = booleanFeaturesTransformation.newProfile(); { int i = 0; final double numUsers = ratingsDataset.allUsers().size(); fireBuildingProgressChangedEvent("IUF calculation", 0, -1); for (Feature feature : contentDataset.getFeatures()) { for (Object featureValue : booleanFeaturesTransformation.getAllFeatureValues(feature)) { long idFeatureValue = booleanFeaturesTransformation.getFeatureIndex(feature, featureValue); double count = 0; for (int idUser : ratingsDataset.allUsers()) { try { Map<Integer, ? extends Rating> userRatingsRated = ratingsDataset .getUserRatingsRated(idUser); for (Map.Entry<Integer, ? extends Rating> entry : userRatingsRated.entrySet()) { int idItemRatedByUser = entry.getKey(); Number rating = entry.getValue().getRatingValue().doubleValue(); //Si el rating es negativo, este producto no cuenta. if (relevanceCriteria.isRelevant(rating)) { SparseVector<Long> itemProfile = model.getItemProfile(idItemRatedByUser); if (itemProfile.containsKey(idFeatureValue) && itemProfile.get(idFeatureValue) > 0) { count++; //Como este usuario tiene algn producto valorado con la caracterstica, paro el clculo ya que no me interesa si tiene ms de uno. break; } } } } catch (UserNotFound ex) { ERROR_CODES.USER_NOT_FOUND.exit(ex); throw new IllegalArgumentException(ex); } } double u_div_uf = numUsers / count; double iufThisFeatureValue = Math.log10(u_div_uf); if (Global.isVerboseAnnoying()) { Global.showInfoMessage("Feature " + feature + " and value " + featureValue + " has an IUF of " + iufThisFeatureValue + "\n"); } iuf.set(idFeatureValue, iufThisFeatureValue); fireBuildingProgressChangedEvent("IUF calculation", (int) ((double) i++ * 100 / booleanFeaturesTransformation.sizeOfAllFeatureValues()), -1); } } } model.setAllIuf(iuf); //Ahora calculo los perfiles de los usuarios, para luego hacer vecindario... { for (int idUser : ratingsDataset.allUsers()) { SparseVector<Long> userFF = ff_userProfiles.get(idUser); SparseVector<Long> userProfileFinalVector = userFF.clone(); userProfileFinalVector.multiply(iuf); Map<Feature, Map<Object, Double>> userProfileValues = booleanFeaturesTransformation .getFeatureValueMap(userProfileFinalVector); model.putUserProfile(idUser, new Symeonidis2007UserProfile(idUser, userProfileValues)); } } return model; } @Override protected Collection<Recommendation> recommendOnly(DatasetLoader<? extends Rating> datasetLoader, Symeonidis2007Model model, Symeonidis2007UserProfile userProfile, Collection<Integer> candidateItems) throws UserNotFound, ItemNotFound, CannotLoadRatingsDataset, CannotLoadContentDataset { final RatingsDataset<? extends Rating> ratingsDataset = datasetLoader.getRatingsDataset(); final ContentDataset contentDataset; if (datasetLoader instanceof ContentDatasetLoader) { ContentDatasetLoader contentDatasetLoader = (ContentDatasetLoader) datasetLoader; contentDataset = contentDatasetLoader.getContentDataset(); } else { throw new CannotLoadContentDataset( "The dataset loader is not a ContentDatasetLoader, cannot apply a content-based "); } //Step1: Busco los vecinos ms cercanos. int neighborhoodSize = getNeighborhoodSize(); List<Neighbor> neighbors = getUserNeighbors(model, userProfile); //Step 2: We get the items in the neighborhood ( and perform intersection with candidate items). Set<Integer> itemsNeighborhood = new TreeSet<Integer>(); for (Neighbor neighbor : neighbors.subList(0, Math.min(neighbors.size(), neighborhoodSize))) { Collection<Integer> neighborRated = ratingsDataset.getUserRated(neighbor.getIdNeighbor()); itemsNeighborhood.addAll(neighborRated); } itemsNeighborhood.retainAll(candidateItems); //Step 3: We get the features of each item: I1: {F2}, I3: {F2, F3}, I5: {F1, F2, F3} //Step 4: We ?nd their frequency in the neighborhood:fr(F1)=1, fr(F2)=3, fr(F3)=2 SparseVector<Long> featureFrequency = model.getBooleanFeaturesTransformation().newProfile(); featureFrequency.fill(0); for (int idItem : itemsNeighborhood) { try { Item item = contentDataset.get(idItem); for (Feature feature : item.getFeatures()) { Object featureValue = item.getFeatureValue(feature); long idFeature = model.getBooleanFeaturesTransformation().getFeatureIndex(feature, featureValue); featureFrequency.add(idFeature, 1.0); } } catch (EntityNotFound ex) { ERROR_CODES.ITEM_NOT_FOUND.exit(ex); } } //Step 5: For each item, we add its features frequency ?nding its weight in the neighborhood: w(I1) = 3, w(I3) = 5, w(I5) = 6. Collection<Recommendation> recommendations = new ArrayList<>(); for (int idItem : candidateItems) { try { double itemScore = 0; Item item = contentDataset.get(idItem); for (Feature feature : item.getFeatures()) { Object featureValue = item.getFeatureValue(feature); long idFeature = model.getBooleanFeaturesTransformation().getFeatureIndex(feature, featureValue); itemScore += featureFrequency.get(idFeature); } recommendations.add(new Recommendation(idItem, itemScore)); } catch (EntityNotFound ex) { ERROR_CODES.ITEM_NOT_FOUND.exit(ex); } } return recommendations; } protected List<Neighbor> getUserNeighbors(Symeonidis2007Model model, Symeonidis2007UserProfile userProfile) { CosineCoefficient cosineCoefficient = new CosineCoefficient(); List<Neighbor> neighbors = new ArrayList<>(); for (Symeonidis2007UserProfile neighborProfile : model.userProfiles()) { if (neighborProfile.getId() != userProfile.getId()) { List<Double> v1 = new LinkedList<>(); List<Double> v2 = new LinkedList<>(); for (Feature feature : userProfile.getFeatures()) { for (Object value : userProfile.getValuedFeatureValues(feature)) { if (neighborProfile.contains(feature, value)) { double userValue = (double) userProfile.getFeatureValueValue(feature, value); double neighborValue = (double) neighborProfile.getFeatureValueValue(feature, value); v1.add(userValue); v2.add(neighborValue); } } } try { double sim = cosineCoefficient.similarity(v1, v2); neighbors.add(new Neighbor(RecommendationEntity.USER, neighborProfile.getId(), sim)); } catch (CouldNotComputeSimilarity ex) { } } } Collections.sort(neighbors); return neighbors; } /** * Devuelve el numero de usuarios vecinos que se consideran en el clculo de las recomendaciones. * * @return Nmero de vecinos. */ private int getNeighborhoodSize() { return (Integer) getParameterValue(NEIGHBORHOOD_SIZE); } }