norbert.mynemo.dataimport.scraping.CkRating.java Source code

Java tutorial

Introduction

Here is the source code for norbert.mynemo.dataimport.scraping.CkRating.java

Source

/*
 * Copyright 2015 Norbert
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package norbert.mynemo.dataimport.scraping;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;

/**
 * A rating from the CK web site encapsulates the user that gives the rating, the rated movie and
 * the value of the rating. The rated movie is defined by its CK id.
 *
 * <p>
 * A list of ratings can be persisted in a tab-separated value file, where each line represents a
 * rating. The columns are:
 * <ul>
 * <li>user id from CK
 * <li>movie id from CK
 * <li>value of the rating
 * </ul>
 */
public class CkRating {

    private static final String MOVIE_HEADER = "ck_movie";
    private static final String USER_HEADER = "ck_user";
    private static final String VALUE_HEADER = "rating_value";
    /**
     * Format of the CSV entries for the parser. The header names are not set in order to detect if a
     * wrong file is parsed. Indeed, when the {@link CSVRecord#get(String)} is called, an exception is
     * thrown if the header name doesn't exist.
     */
    private static final CSVFormat CSV_FORMAT_FOR_PARSER = CSVFormat.MYSQL.withHeader().withSkipHeaderRecord();
    /** Format of the CSV entries for the printer. */
    private static final CSVFormat CSV_FORMAT_FOR_PRINTER = CSVFormat.MYSQL
            .withHeader(USER_HEADER, MOVIE_HEADER, VALUE_HEADER).withSkipHeaderRecord();
    /**
     * Set containing all different movie, user and rating ids built by this class.
     */
    private static final Map<String, String> ID_CACHE = new HashMap<String, String>();

    /**
     * Returns a parser able to read a CK rating file.
     *
     * @param filepath file to read
     * @return a new parser
     */
    public static CSVParser createParser(String filepath) throws IOException {
        return new CSVParser(new BufferedReader(new FileReader(filepath)), CSV_FORMAT_FOR_PARSER);
    }

    /**
     * Creates a printer where a rating can be printed. The given file must not exist.
     *
     * @param filepath the filepath where the printer will write the data.
     * @return a new printer
     * @throws IOException
     */
    public static CSVPrinter createPrinter(String filepath) throws IOException {
        checkArgument(filepath != null, "The filepath must be not null.");
        checkArgument(!new File(filepath).exists(), "The file must not exist.");

        return new CSVPrinter(new BufferedWriter(new FileWriter(filepath)), CSV_FORMAT_FOR_PRINTER);
    }

    /**
     * Creates a rating from a record. The record was usually created from a parser created by the
     * {@link #createParser(String)} method.
     */
    public static CkRating createRating(CSVRecord record) {
        return new CkRating(record.get(USER_HEADER), record.get(MOVIE_HEADER), record.get(VALUE_HEADER));
    }

    /**
     * Puts the given id in the cache, and returns it, or an equal cached id.
     */
    private static String getCacheId(String id) {
        if (!ID_CACHE.containsKey(id)) {
            ID_CACHE.put(id, id);
        }
        return ID_CACHE.get(id);
    }

    /**
     * Returns <code>true</code> if the rating can be created from the given parameters. Returns
     * <code>false</code> otherwise.
     */
    public static boolean isValid(CSVRecord record) {
        return record != null && record.isConsistent() && record.isMapped(USER_HEADER)
                && record.isMapped(MOVIE_HEADER) && record.isMapped(VALUE_HEADER);
    }

    private final String movie;
    private final String user;
    private final String value;

    public CkRating(String user, String movie, String value) {
        this.user = getCacheId(user);
        this.movie = getCacheId(movie);
        this.value = getCacheId(value);
    }

    public String getMovie() {
        return movie;
    }

    public String getUser() {
        return user;
    }

    public String getValue() {
        return value;
    }

    /**
     * Prints the internal data to the given printer. The printer is usually created by calling the
     * {@link #createPrinter(String)} method.
     */
    public void printOn(CSVPrinter printer) throws IOException {
        checkNotNull(printer);

        // the write order depends on the order of the headers in the csv format
        printer.print(user);
        printer.print(movie);
        printer.print(value);
        // end of the record
        printer.println();
    }
}