de.tuberlin.dima.recsys.ssnmm.ratingprediction.AverageRating.java Source code

Java tutorial

Introduction

Here is the source code for de.tuberlin.dima.recsys.ssnmm.ratingprediction.AverageRating.java

Source

/*
 * Copyright (C) 2012 Sebastian Schelter <sebastian.schelter [at] tu-berlin.de>
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */

package de.tuberlin.dima.recsys.ssnmm.ratingprediction;

import de.tuberlin.dima.recsys.ssnmm.Utils;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;

import java.io.File;
import java.io.FilenameFilter;
import java.util.regex.Pattern;

/**
 * Compute the average rating from the trainingset in a streaming fashion
 */
public class AverageRating {

    public static void main(String[] args) {

        File dir = new File("/home/ssc/Entwicklung/datasets/yahoo-songs/");

        File[] trainingFiles = dir.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.startsWith("train_");
            }
        });

        Pattern sep = Pattern.compile("\t");

        RunningAverage avg = new FullRunningAverage();

        int ratingsProcessed = 0;
        for (File trainingFile : trainingFiles) {
            for (String line : Utils.readLines(trainingFile)) {
                int rating = Integer.parseInt(sep.split(line)[2]);
                avg.addDatum(rating);
                if (++ratingsProcessed % 10000000 == 0) {
                    System.out.println(ratingsProcessed + " ratings processed");
                }
            }
        }
        System.out.println("average rating " + avg.getAverage());

    }
}