List of usage examples for org.apache.mahout.cf.taste.model DataModel getNumItems
int getNumItems() throws TasteException;
From source file:norbert.mynemo.ui.SelectCommandParser.java
License:Apache License
/** * Parses and checks the "data-model" option. *//*from www. j a va2 s. c o m*/ private static DataModel parseDataModel(String dataModelValue) throws TasteException { if (!new File(dataModelValue).exists()) { throw new IllegalArgumentException("Error: unable to find the the data model file."); } DataModel result; try { result = new StringUserDataModel(new File(dataModelValue)); } catch (IOException e) { throw new IllegalStateException("Error: unable to load the data model.", e); } // check if (result.getNumUsers() == 0 || result.getNumItems() == 0) { throw new IllegalArgumentException("Error: the data model doesn't contain any data."); } return result; }
From source file:org.easyrec.mahout.EasyrecInMemoryDataModelTest.java
License:Open Source License
@Test public void testEasyrecDataModel_getNumItems() throws TasteException { DataModel easyrecDataModel = new EasyrecDataModel(TENANT_ID, RATE_ACTION_TYPE_ID, HAS_RATING_VALUES, mahoutDataModelMappingDAO);//from ww w .j a v a 2 s. c o m easyrecDataModel = new EasyrecInMemoryDataModel(easyrecDataModel); int numberOfItems = easyrecDataModel.getNumItems(); assertEquals(3, numberOfItems); }
From source file:org.easyrec.plugin.mahout.MahoutBooleanGenerator.java
License:Open Source License
@Override protected void doExecute(ExecutionControl executionControl, MahoutBooleanGeneratorStats stats) throws Exception { // when doExecute() is called, the generator has been initialized with the configuration we should use Date execution = new Date(); MahoutBooleanGeneratorConfig config = getConfiguration(); TypeMappingService typeMappingService = (TypeMappingService) super.getTypeMappingService(); ItemAssocService itemAssocService = getItemAssocService(); executionControl.updateProgress("initialize DataModel"); DataModel easyrecDataModel = new EasyrecDataModel(config.getTenantId(), typeMappingService.getIdOfActionType(config.getTenantId(), config.getActionType()), false, mahoutDataModelMappingDAO);//from w w w. ja v a 2 s. c o m if (config.getCacheDataInMemory() == 1) { executionControl.updateProgress("initialize EasyrecInMemoryDataModel"); easyrecDataModel = new EasyrecInMemoryDataModel(easyrecDataModel); } /*TanimotoCoefficientSimilarity is intended for "binary" data sets where a user either expresses a generic "yes" preference for an item or has no preference.*/ UserSimilarity userSimilarity = null; switch (config.getUserSimilarityMethod()) { case 1: executionControl.updateProgress("using LogLikelihoodSimilarity as UserSimilarity"); userSimilarity = new LogLikelihoodSimilarity(easyrecDataModel); break; case 2: executionControl.updateProgress("using TanimotoCoefficientSimilarity as UserSimilarity"); userSimilarity = new TanimotoCoefficientSimilarity(easyrecDataModel); break; case 3: executionControl.updateProgress("using SpearmanCorrelationSimilarity as UserSimilarity"); userSimilarity = new SpearmanCorrelationSimilarity(easyrecDataModel); break; case 4: executionControl.updateProgress("using CityBlockSimilarity as UserSimilarity"); userSimilarity = new CityBlockSimilarity(easyrecDataModel); break; } /*ThresholdUserNeighborhood is preferred in situations where we go in for a similarity measure between neighbors and not any number*/ UserNeighborhood neighborhood = null; Double userNeighborhoodSamplingRate = config.getUserNeighborhoodSamplingRate(); Double neighborhoodThreshold = config.getUserNeighborhoodThreshold(); int neighborhoodSize = config.getUserNeighborhoodSize(); double userNeighborhoodMinSimilarity = config.getUserNeighborhoodMinSimilarity(); switch (config.getUserNeighborhoodMethod()) { case 1: executionControl.updateProgress("using ThresholdUserNeighborhood as UserNeighborhood"); neighborhood = new ThresholdUserNeighborhood(neighborhoodThreshold, userSimilarity, easyrecDataModel, userNeighborhoodSamplingRate); break; case 2: executionControl.updateProgress("using NearestNUserNeighborhood as UserNeighborhood"); neighborhood = new NearestNUserNeighborhood(neighborhoodSize, userNeighborhoodMinSimilarity, userSimilarity, easyrecDataModel, userNeighborhoodSamplingRate); break; } /*GenericBooleanPrefUserBasedRecommender is appropriate for use when no notion of preference value exists in the data. */ executionControl.updateProgress("using GenericBooleanPrefUserBasedRecommender as Recommender"); Recommender recommender = new GenericBooleanPrefUserBasedRecommender(easyrecDataModel, neighborhood, userSimilarity); itemTypeDAO.insertOrUpdate(config.getTenantId(), "USER", true); Integer assocType = typeMappingService.getIdOfAssocType(config.getTenantId(), config.getAssociationType()); Integer userType = typeMappingService.getIdOfItemType(config.getTenantId(), "USER"); Integer sourceType = typeMappingService.getIdOfSourceType(config.getTenantId(), getId().toString()); Integer viewType = typeMappingService.getIdOfViewType(config.getTenantId(), config.getViewType()); stats.setNumberOfItems(easyrecDataModel.getNumItems()); int totalSteps = easyrecDataModel.getNumUsers(); int currentStep = 1; for (LongPrimitiveIterator it = easyrecDataModel.getUserIDs(); it.hasNext() && !executionControl.isAbortRequested();) { executionControl.updateProgress(currentStep++, totalSteps, "Saving Recommendations..."); long userId = it.nextLong(); List<RecommendedItem> recommendations = recommender.recommend(userId, config.getNumberOfRecs()); if (recommendations.isEmpty()) { logger.debug("User " + userId + " : no recommendations"); } // print the list of recommendations for each for (RecommendedItem recommendedItem : recommendations) { logger.debug("User " + userId + " : " + recommendedItem); Integer itemToId = (int) recommendedItem.getItemID(); Integer itemToType = itemDAO.getItemTypeIdOfItem(config.getTenantId(), itemToId); ItemVO<Integer, Integer> fromItem = new ItemVO<Integer, Integer>(config.getTenantId(), (int) userId, userType); Double recommendationStrength = (double) recommendedItem.getValue(); ItemVO<Integer, Integer> toItem = new ItemVO<Integer, Integer>(config.getTenantId(), itemToId, itemToType); ItemAssocVO<Integer, Integer> itemAssoc = new ItemAssocVO<Integer, Integer>(config.getTenantId(), fromItem, assocType, recommendationStrength, toItem, sourceType, "Mahout Boolean Generator", viewType, null, execution); itemAssocService.insertOrUpdateItemAssoc(itemAssoc); stats.incNumberOfRulesCreated(); } } }
From source file:org.plista.kornakapi.core.recommender.CachingAllUnknownItemsCandidateItemsStrategy.java
License:Apache License
private FastIDSet loadAllItemIDs(DataModel dataModel) throws TasteException { int numItems = dataModel.getNumItems(); log.info("Loading {} itemIDs into memory", numItems); FastIDSet collectedItemIDs = new FastIDSet(dataModel.getNumItems()); LongPrimitiveIterator allItemIDsIterator = dataModel.getItemIDs(); while (allItemIDsIterator.hasNext()) { collectedItemIDs.add(allItemIDsIterator.next()); }/*from w w w. j a v a2 s . c o m*/ return collectedItemIDs; }
From source file:org.plista.kornakapi.core.training.FactorizationbasedInMemoryTrainer.java
License:Apache License
@Override protected void doTrain(File targetFile, DataModel inmemoryData, int numProcessors) throws IOException { try {/*w ww. ja v a 2s. c o m*/ if (inmemoryData.getNumItems() >= 5 && inmemoryData.getNumUsers() >= 10) {//preventing matrix singularity ALSWRFactorizer factorizer = new ALSWRFactorizer(inmemoryData, conf.getNumberOfFeatures(), conf.getLambda(), conf.getNumberOfIterations(), conf.isUsesImplicitFeedback(), conf.getAlpha(), numProcessors); long start = System.currentTimeMillis(); Factorization factorization = factorizer.factorize(); long estimateDuration = System.currentTimeMillis() - start; if (log.isInfoEnabled()) { log.info("Model trained in {} ms", estimateDuration); } new FilePersistenceStrategy(targetFile).maybePersist(factorization); } } catch (Exception e) { throw new IOException(e); } }
From source file:org.plista.kornakapi.core.training.MultithreadedItembasedInMemoryTrainer.java
License:Apache License
@Override protected void doTrain(File targetFile, DataModel inmemoryData, int numProcessors) throws IOException { BufferedWriter writer = null; ExecutorService executorService = Executors.newFixedThreadPool(numProcessors + 1); try {/*ww w. j av a 2s . com*/ ItemSimilarity similarity = (ItemSimilarity) Class.forName(conf.getSimilarityClass()) .getConstructor(DataModel.class).newInstance(inmemoryData); ItemBasedRecommender trainer = new GenericItemBasedRecommender(inmemoryData, similarity); writer = new BufferedWriter(new FileWriter(targetFile)); int batchSize = 100; int numItems = inmemoryData.getNumItems(); List<long[]> itemIDBatches = queueItemIDsInBatches(inmemoryData.getItemIDs(), numItems, batchSize); log.info("Queued {} items in {} batches", numItems, itemIDBatches.size()); BlockingQueue<long[]> itemsIDsToProcess = new LinkedBlockingQueue<long[]>(itemIDBatches); BlockingQueue<String> output = new LinkedBlockingQueue<String>(); AtomicInteger numActiveWorkers = new AtomicInteger(numProcessors); for (int n = 0; n < numProcessors; n++) { executorService.execute(new SimilarItemsWorker(n, itemsIDsToProcess, output, trainer, conf.getSimilarItemsPerItem(), numActiveWorkers)); } executorService.execute(new OutputWriter(output, writer, numActiveWorkers)); } catch (Exception e) { throw new IOException(e); } finally { executorService.shutdown(); try { executorService.awaitTermination(6, TimeUnit.HOURS); } catch (InterruptedException e) { } Closeables.closeQuietly(writer); } }
From source file:recommender.GenericRecommenderIRStatsEvaluatorCustom.java
License:Apache License
@Override public IRStatistics evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder, DataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) throws TasteException { prop = new Properties(); try {//from w w w.ja va2 s . co m prop.load(GenericRecommenderIRStatsEvaluatorCustom.class.getResourceAsStream("settings.properties")); } catch (IOException ex) { java.util.logging.Logger.getLogger(GenericRecommenderIRStatsEvaluatorCustom.class.getName()) .log(Level.SEVERE, null, ex); } // load settings from MySQL database loadSettings(); Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); Preconditions.checkArgument(dataModel != null, "dataModel is null"); Preconditions.checkArgument(at >= 1, "at must be at least 1"); Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, "Invalid evaluationPercentage: " + evaluationPercentage + ". Must be: 0.0 < evaluationPercentage <= 1.0"); int numItems = dataModel.getNumItems(); System.out.println("Data model numItems: " + numItems); RunningAverage precision = new FullRunningAverage(); RunningAverage recall = new FullRunningAverage(); RunningAverage fallOut = new FullRunningAverage(); RunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; // map to store diversity ranges => number of users HashMap<String, String> map = new HashMap<>(); LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { long userID = it.nextLong(); if (userID == 0) { continue; } // get the top users if (!list.contains(userID + "")) { continue; } if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } long start = System.currentTimeMillis(); PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); System.out.println("User preferences: " + prefs); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = 0;//Double.isNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.getRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); System.out.println("Relevant items: " + relevantItemIDs); System.out.println("Relevance threshold: " + theRelevanceThreshold); int numRelevantItems = relevantItemIDs.size(); if (numRelevantItems <= 0) { continue; } FastByIDMap<PreferenceArray> trainingUsers = new FastByIDMap<>(dataModel.getNumUsers()); LongPrimitiveIterator it2 = dataModel.getUserIDs(); while (it2.hasNext()) { dataSplitter.processOtherUser(userID, relevantItemIDs, trainingUsers, it2.nextLong(), dataModel); } DataModel trainingModel = dataModelBuilder == null ? new GenericDataModel(trainingUsers) : dataModelBuilder.buildDataModel(trainingUsers); try { trainingModel.getPreferencesFromUser(userID); } catch (NoSuchUserException nsee) { continue; // Oops we excluded all prefs for the user -- just move on } int size = numRelevantItems + trainingModel.getItemIDsFromUser(userID).size(); if (size < 2 * at) { // Really not enough prefs to meaningfully evaluate this user System.out .println("Really not enough prefs (" + size + ") to meaningfully evaluate user: " + userID); continue; } Recommender recommender = recommenderBuilder.buildRecommender(trainingModel); int intersectionSize = 0; List<RecommendedItem> recommendedItems = recommender.recommend(userID, at, rescorer); HashMap<Long, Double> user_preferences = getUserPreferencesList(userID); for (RecommendedItem recommendedItem : recommendedItems) { double preference = isRelevant(user_preferences, recommendedItem); System.out.println("Preference: " + preference); if (relevantItemIDs.contains(recommendedItem.getItemID()) || preference != 0) { intersectionSize++; } } int numRecommendedItems = recommendedItems.size(); // Precision if (numRecommendedItems > 0) { precision.addDatum((double) intersectionSize / (double) numRecommendedItems); System.out.println( "intersectionSize: " + intersectionSize + " numRecommendedItems: " + numRecommendedItems); } // Recall recall.addDatum((double) intersectionSize / (double) numRelevantItems); // Fall-out if (numRelevantItems < size) { fallOut.addDatum( (double) (numRecommendedItems - intersectionSize) / (double) (numItems - numRelevantItems)); } // nDCG // In computing, assume relevant IDs have relevance 1 and others 0 double cumulativeGain = 0.0; double idealizedGain = 0.0; for (int i = 0; i < numRecommendedItems; i++) { RecommendedItem item = recommendedItems.get(i); double discount = 1.0 / log2(i + 2.0); // Classical formulation says log(i+1), but i is 0-based here if (relevantItemIDs.contains(item.getItemID())) { cumulativeGain += discount; } // otherwise we're multiplying discount by relevance 0 so it doesn't do anything // Ideally results would be ordered with all relevant ones first, so this theoretical // ideal list starts with number of relevant items equal to the total number of relevant items if (i < numRelevantItems) { idealizedGain += discount; } } if (idealizedGain > 0.0) { nDCG.addDatum(cumulativeGain / idealizedGain); } // Reach numUsersRecommendedFor++; if (numRecommendedItems > 0) { numUsersWithRecommendations++; } long end = System.currentTimeMillis(); log.info("Evaluated with user {} in {}ms", userID, end - start); log.info("Precision/recall/fall-out/nDCG/reach: {} / {} / {} / {} / {}", precision.getAverage(), recall.getAverage(), fallOut.getAverage(), nDCG.getAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); System.out.println("Relevant items: " + numRelevantItems); System.out.println("Precision: " + precision.getAverage()); System.out.println("Recall: " + recall.getAverage()); System.out.println("Fall-out: " + fallOut.getAverage()); System.out.println("nDCG: " + nDCG.getAverage()); System.out.println("Reach: " + (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); double diversity = getDiversity(recommendedItems); System.out.println("Diversity: " + diversity); if (diversity >= 0 && diversity < 0.1) { int count = map.get("0-0.1") != null ? Integer.parseInt(map.get("0-0.1")) + 1 : 1; map.put("0-0.1", count + ""); } else if (diversity >= 0.1 && diversity < 0.2) { int count = map.get("0.1-0.2") != null ? Integer.parseInt(map.get("0.1-0.2")) + 1 : 1; map.put("0.1-0.2", count + ""); } else if (diversity >= 0.2 && diversity < 0.3) { int count = map.get("0.2-0.3") != null ? Integer.parseInt(map.get("0.2-0.3")) + 1 : 1; map.put("0.2-0.3", count + ""); } else if (diversity >= 0.3 && diversity < 0.4) { int count = map.get("0.3-0.4") != null ? Integer.parseInt(map.get("0.3-0.4")) + 1 : 1; map.put("0.3-0.4", count + ""); } else if (diversity >= 0.4 && diversity < 0.5) { int count = map.get("0.4-0.5") != null ? Integer.parseInt(map.get("0.4-0.5")) + 1 : 1; map.put("0.4-0.5", count + ""); } else if (diversity >= 0.5 && diversity < 0.6) { int count = map.get("0.5-0.6") != null ? Integer.parseInt(map.get("0.5-0.6")) + 1 : 1; map.put("0.5-0.6", count + ""); } else if (diversity >= 0.6 && diversity < 0.7) { int count = map.get("0.6-0.7") != null ? Integer.parseInt(map.get("0.6-0.7")) + 1 : 1; map.put("0.6-0.7", count + ""); } else if (diversity >= 0.7 && diversity < 0.8) { int count = map.get("0.7-0.8") != null ? Integer.parseInt(map.get("0.7-0.8")) + 1 : 1; map.put("0.7-0.8", count + ""); } else if (diversity >= 0.8 && diversity < 0.9) { int count = map.get("0.8-0.9") != null ? Integer.parseInt(map.get("0.8-0.9")) + 1 : 1; map.put("0.8-0.9", count + ""); } else if (diversity >= 0.9) { int count = map.get("0.9-1") != null ? Integer.parseInt(map.get("0.9-1")) + 1 : 1; map.put("0.9-1", count + ""); } } JSONObject json = new JSONObject(map); writeFile(prop.getProperty("metrics_file"), json.toJSONString()); return new IRStatisticsImplCustom(precision.getAverage(), recall.getAverage(), fallOut.getAverage(), nDCG.getAverage(), (double) numUsersWithRecommendations / (double) numUsersRecommendedFor); }
From source file:uit.tkorg.pr.method.cf.KNNCF.java
/** * //from w w w. j a v a2s .c o m * @param inputFile * @param similarityScheme: 1: CoPearson, 2: Cosine. * @param k * @param authorTestSet * @param outputFile * @throws IOException * @throws TasteException */ public static void computeCFRatingAndPutIntoModelForAuthorList(String inputFile, int similarityScheme, int k, HashMap<String, Author> authorTestSet, HashSet<String> paperIdsInTestSet, String outputFile) throws IOException, TasteException { DataModel dataModel = new FileDataModel(new File(inputFile)); UserSimilarity userSimilarity = null; if (similarityScheme == 1) { userSimilarity = new PearsonCorrelationSimilarity(dataModel); } else if (similarityScheme == 2) { userSimilarity = new UncenteredCosineSimilarity(dataModel); } UserNeighborhood userNeighborhood = new NearestNUserNeighborhood(k, userSimilarity, dataModel); // Create a generic user based recommender with the dataModel, the userNeighborhood and the userSimilarity Recommender genericRecommender = new GenericUserBasedRecommender(dataModel, userNeighborhood, userSimilarity); FileUtils.deleteQuietly(new File(outputFile)); try (BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile))) { int count = 0; System.out.println("Number of users:" + authorTestSet.size()); for (LongPrimitiveIterator iterator = dataModel.getUserIDs(); iterator.hasNext();) { long userId = iterator.nextLong(); // Generate a list of n recommendations for the user if (authorTestSet.containsKey(String.valueOf(userId).trim())) { System.out.println("Computing CF rating value for user no. " + count); List<RecommendedItem> recommendationList = genericRecommender.recommend(userId, dataModel.getNumItems()); if (!recommendationList.isEmpty()) { // Display the list of recommendations for (RecommendedItem recommendedItem : recommendationList) { String authorId = String.valueOf(userId).trim(); String paperId = String.valueOf(recommendedItem.getItemID()).trim(); if (paperIdsInTestSet.contains(paperId)) { authorTestSet.get(authorId).getCfRatingHM().put(paperId, Float.valueOf(recommendedItem.getValue())); bw.write(userId + "," + recommendedItem.getItemID() + "," + recommendedItem.getValue() + "\r\n"); } } } count++; } } } }
From source file:uit.tkorg.pr.method.cf.SVDCF.java
public static void computeCFRatingAndPutIntoModelForAuthorList(String inputFile, int numFeatures, double lamda, int numIterations, HashMap<String, Author> authorTestSet, HashSet<String> paperIdsInTestSet, String outputFile) throws IOException, TasteException { DataModel dataModel = new FileDataModel(new File(inputFile)); Factorizer factorizer = new ALSWRFactorizer(dataModel, numFeatures, lamda, numIterations); Recommender svdRecommender = new SVDRecommender(dataModel, factorizer); FileUtils.deleteQuietly(new File(outputFile)); try (BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile))) { int count = 0; System.out.println("Number of users:" + authorTestSet.size()); for (LongPrimitiveIterator iterator = dataModel.getUserIDs(); iterator.hasNext();) { long userId = iterator.nextLong(); // Generate a list of n recommendations for the user if (authorTestSet.containsKey(String.valueOf(userId).trim())) { System.out.println("Computing CF rating value for user no. " + count); List<RecommendedItem> recommendationList = svdRecommender.recommend(userId, dataModel.getNumItems()); if (!recommendationList.isEmpty()) { // Display the list of recommendations for (RecommendedItem recommendedItem : recommendationList) { String authorId = String.valueOf(userId).trim(); String paperId = String.valueOf(recommendedItem.getItemID()).trim(); if (paperIdsInTestSet.contains(paperId)) { authorTestSet.get(authorId).getCfRatingHM().put(paperId, Float.valueOf(recommendedItem.getValue())); bw.write(userId + "," + recommendedItem.getItemID() + "," + recommendedItem.getValue() + "\r\n"); }// w ww . ja v a2 s . co m } } count++; } } } }