norbert.mynemo.ui.ImportCommandParser.java Source code

Java tutorial

Introduction

Here is the source code for norbert.mynemo.ui.ImportCommandParser.java

Source

/*
 * Copyright 2015 Norbert
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package norbert.mynemo.ui;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;

import norbert.mynemo.dataimport.FileImporter;
import norbert.mynemo.dataimport.fileformat.output.UserSimilarityType;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

import com.google.common.base.Optional;

/**
 * This parser handles a command line to import some rating files. The input files are merged. The
 * imported rating are written into an output file. The ratings can be filtered.
 */
public class ImportCommandParser {

    private static final String COMMAND_SYNTAX = "import  --out <file>  --in <file> [<file>]"
            + "  [--movies <file> [<file>]]  [--user <id>]  [--max-users <number> [--similarity <type>]]"
            + "  [--min-ratings-by-movie <number>]  [min-common-ratings <number>]";

    // maximum number of users
    private static final String MAX_USERS_ARG_NAME = "number";
    private static final String MAX_USERS_DESCRIPTION = "maximum number of users allowed in the"
            + " output file. Once the limit is reached, the ratings of other users are ignored. This"
            + " option can be further refined with the 'similarity' and the 'user' options, to retain"
            + " the nearest neighbors of the specified user.";
    private static final String MAX_USERS_LONG_OPTION = "max-users";

    // minimum number of common ratings
    private static final String MIN_COMMON_RATINGS_ARG_NAME = "number";
    private static final String MIN_COMMON_RATINGS_DESCRIPTION = "minimum number of ratings in"
            + " common with the given user. The ouput file will only contain ratings of users that have"
            + " at least this number of common ratings with the given user. Two ratings are common"
            + " between two users if there movies are equal.";
    private static final String MIN_COMMON_RATINGS_LONG_OPTION = "min-common-ratings";

    // minimum number of ratings for a movie
    private static final String MIN_RATINGS_BY_MOVIE_ARG_NAME = "number";
    private static final String MIN_RATINGS_BY_MOVIE_DESCRIPTION = "minimum ratings for an item."
            + " The ouput file will only contain items that have at least this number of ratings";
    private static final String MIN_RATINGS_BY_MOVIE_LONG_OPTION = "min-ratings-by-movie";

    // movie
    private static final String MOVIES_ARG_NAME = "files";
    private static final char MOVIES_CHAR_OPTION = 'm';
    private static final String MOVIES_DESCRIPTION = "one or two files containing the mappings"
            + " between MovieLens or CK ids and IMDb ids. Theses files are only required if the 10"
            + " million  rating file from the MovieLens data set is an input file, or if a CK rating"
            + " file is an input file. The first file can be generated from the MovieLens website:"
            + " download the result of the search for any genre. The second file is generated by the"
            + " scraping.";
    private static final String MOVIES_LONG_OPTION = "movies";

    // output file
    private static final String OUT_ARG_NAME = "file";
    private static final char OUT_CHAR_OPTION = 'o';
    private static final String OUT_DESCRIPTION = "output file where the converted ratings are"
            + " written. If more than one input files is provided, they are merged.";
    private static final String OUT_LONG_OPTION = "out";

    // rating file
    private static final String RATINGS_ARG_NAME = "file";
    private static final char RATINGS_CHAR_OPTION = 'i';
    private static final String RATINGS_DESCRIPTION = "file containing ratings from MovieLens.";
    private static final String RATINGS_LONG_OPTION = "in";

    // similarity type
    private static final String SIMILARITY_ARG_NAME = "type";
    private static final char SIMILARITY_CHAR_OPTION = 's';
    private static final String SIMILARITY_DESCRIPTION = "type of similarity used to find the"
            + " nearest users of the target user. This option works with the '" + MAX_USERS_LONG_OPTION
            + "' and the 'user' options: only the nearest users will be retained.";
    private static final String SIMILARITY_LONG_OPTION = "similarity";

    // user
    private static final String USER_ARG_NAME = "id";
    private static final char USER_CHAR_OPTION = 'u';
    private static final String USER_DESCRIPTION = "user identifier of the input ratings, and target"
            + " user of the similarity.";
    private static final String USER_LONG_OPTION = "user";

    /**
     * Performs various checks on the parameters.
     */
    private static void check(String ouputFilepath, String[] ratingFilepaths, String[] mappingFilepaths,
            Optional<Integer> maxUsers, Optional<Integer> minCommonRatings, Optional<String> user,
            Optional<UserSimilarityType> similarityType) throws FileNotFoundException {

        // output filepath
        if (new File(ouputFilepath).exists()) {
            throw new IllegalArgumentException("Error: the output file " + ouputFilepath + " already" + " exist.");
        }

        // input filepaths
        for (String filepath : ratingFilepaths) {
            if (!new File(filepath).exists()) {
                throw new FileNotFoundException("Error: cannot find the intput file " + filepath);
            }
        }
        // mapping filepaths
        for (String filepath : mappingFilepaths) {
            if (!new File(filepath).exists()) {
                throw new FileNotFoundException("Error: cannot find the movie file " + filepath);
            }
        }

        // minimum number of common ratings
        if (minCommonRatings.isPresent() && (!user.isPresent())) {
            throw new IllegalArgumentException(
                    "Error: if a minimum common rating is given, then the" + " user must be given.");
        }

        // similarity type
        if (similarityType.isPresent() && (!user.isPresent() || !maxUsers.isPresent())) {
            throw new IllegalArgumentException("Error: if a similarity type is given, then the user and"
                    + " the maximum number of users must be given.");
        }
    }

    private static Options getOptions() {
        OptionBuilder.isRequired();
        OptionBuilder.hasArg();
        OptionBuilder.withArgName(OUT_ARG_NAME);
        OptionBuilder.withDescription(OUT_DESCRIPTION);
        OptionBuilder.withLongOpt(OUT_LONG_OPTION);
        Option out = OptionBuilder.create(OUT_CHAR_OPTION);

        OptionBuilder.isRequired();
        OptionBuilder.hasArgs();
        OptionBuilder.withArgName(RATINGS_ARG_NAME);
        OptionBuilder.withLongOpt(RATINGS_LONG_OPTION);
        OptionBuilder.withDescription(RATINGS_DESCRIPTION);
        Option ratings = OptionBuilder.create(RATINGS_CHAR_OPTION);

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(MOVIES_ARG_NAME);
        OptionBuilder.withLongOpt(MOVIES_LONG_OPTION);
        OptionBuilder.withDescription(MOVIES_DESCRIPTION);
        Option movies = OptionBuilder.create(MOVIES_CHAR_OPTION);

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(USER_ARG_NAME);
        OptionBuilder.withLongOpt(USER_LONG_OPTION);
        OptionBuilder.withDescription(USER_DESCRIPTION);
        Option user = OptionBuilder.create(USER_CHAR_OPTION);

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(MAX_USERS_ARG_NAME);
        OptionBuilder.withLongOpt(MAX_USERS_LONG_OPTION);
        OptionBuilder.withDescription(MAX_USERS_DESCRIPTION);
        Option maxUsers = OptionBuilder.create();

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(MIN_RATINGS_BY_MOVIE_ARG_NAME);
        OptionBuilder.withLongOpt(MIN_RATINGS_BY_MOVIE_LONG_OPTION);
        OptionBuilder.withDescription(MIN_RATINGS_BY_MOVIE_DESCRIPTION);
        Option minRatingsByMovie = OptionBuilder.create();

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(MIN_COMMON_RATINGS_ARG_NAME);
        OptionBuilder.withLongOpt(MIN_COMMON_RATINGS_LONG_OPTION);
        OptionBuilder.withDescription(MIN_COMMON_RATINGS_DESCRIPTION);
        Option minCommonRatings = OptionBuilder.create();

        OptionBuilder.hasArg();
        OptionBuilder.withArgName(SIMILARITY_ARG_NAME);
        OptionBuilder.withLongOpt(SIMILARITY_LONG_OPTION);
        OptionBuilder.withDescription(SIMILARITY_DESCRIPTION);
        Option similarity = OptionBuilder.create(SIMILARITY_CHAR_OPTION);

        return new Options().addOption(out).addOption(ratings).addOption(movies).addOption(user).addOption(maxUsers)
                .addOption(minRatingsByMovie).addOption(minCommonRatings).addOption(similarity);
    }

    public static void main(String[] args) {
        try {
            ImportCommandParser.parse(args);
        } catch (ParseException e) {
            System.err.println(e.getMessage());
            ImportCommandParser.printUsage();
        } catch (Exception e) {
            System.err.println(e.getMessage());
            e.printStackTrace();
        }
    }

    /**
     * Parses and checks the given arguments, then calls
     * {@link FileImporter#convert(String, java.util.Collection, Optional, Optional, Optional, Optional)
     * FileImporter.convert()}.
     */
    public static void parse(String[] args) throws ParseException, IOException {
        CommandLineParser parser = new BasicParser();

        Options options = getOptions();

        CommandLine commandLine = parser.parse(options, args);

        String outputFilepath = commandLine.getOptionValue(OUT_LONG_OPTION);
        String[] ratingsFilepaths = commandLine.getOptionValues(RATINGS_LONG_OPTION);
        String[] moviesFilepath = Optional.fromNullable(commandLine.getOptionValues(MOVIES_LONG_OPTION))
                .or(new String[0]);
        Optional<String> user = Optional.fromNullable(commandLine.getOptionValue(USER_LONG_OPTION));
        Optional<Integer> maxUsers = parseMaxUser(commandLine.getOptionValue(MAX_USERS_LONG_OPTION));
        Optional<Integer> minRatingsByMovie = parseMinRatingsByMovie(
                commandLine.getOptionValue(MIN_RATINGS_BY_MOVIE_LONG_OPTION));
        Optional<Integer> minCommonRatings = parseMinCommonRatings(
                commandLine.getOptionValue(MIN_COMMON_RATINGS_LONG_OPTION));
        Optional<UserSimilarityType> similarityType = parseSimilarityType(
                commandLine.getOptionValue(SIMILARITY_LONG_OPTION));

        check(outputFilepath, ratingsFilepaths, moviesFilepath, maxUsers, minCommonRatings, user, similarityType);

        FileImporter.convert(outputFilepath, Arrays.asList(ratingsFilepaths), Arrays.asList(moviesFilepath), user,
                maxUsers, minRatingsByMovie, minCommonRatings, similarityType);
    }

    /**
     * Parses and checks the "max-users" option.
     */
    private static Optional<Integer> parseMaxUser(String optionValue) {
        if (optionValue == null) {
            return Optional.absent();
        }

        Integer result;

        try {
            result = Integer.parseInt(optionValue);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Error: the maximum number of users is not a valid" + " integer.",
                    e);
        }

        // check
        if (result <= 0) {
            throw new IllegalArgumentException("Error: the maximum number of users must be greater than" + " 0.");
        }

        return Optional.of(result);
    }

    /**
     * Parses and checks the "min-common-ratings" option.
     */
    private static Optional<Integer> parseMinCommonRatings(String optionValue) {
        if (optionValue == null) {
            return Optional.absent();
        }

        Integer result;

        // parse
        try {
            result = Integer.parseInt(optionValue);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Error: the minimum ratings in common is not a valid" + " integer.",
                    e);
        }

        // check
        if (result <= 0) {
            throw new IllegalArgumentException(
                    "Error: the minimum number of common ratings must be" + " greater than 0.");
        }

        return Optional.of(result);
    }

    /**
     * Parses and checks the "min-ratings-by-movie" option.
     */
    private static Optional<Integer> parseMinRatingsByMovie(String optionValue) {
        if (optionValue == null) {
            return Optional.absent();
        }

        Integer result;

        try {
            result = Integer.parseInt(optionValue);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                    "Error: the minimum ratings for a movie is not a valid" + " integer.", e);
        }

        // minimum ratings for a movie
        if (result <= 0) {
            throw new IllegalArgumentException(
                    "Error: the minimum number of ratings for a movie must" + " be greater than 0.");
        }

        return Optional.of(result);
    }

    private static Optional<UserSimilarityType> parseSimilarityType(String optionValue) {
        if (optionValue == null) {
            return Optional.absent();
        }

        UserSimilarityType result;

        try {
            result = UserSimilarityType.valueOf(optionValue.toUpperCase());
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Error: unable to find the given similarity (" + optionValue + ")");
        }

        return Optional.of(result);
    }

    public static void printUsage() {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(COMMAND_SYNTAX, getOptions());

        System.out.print("Available user similarities: ");
        for (UserSimilarityType current : UserSimilarityType.values()) {
            System.out.print(current.name().toLowerCase() + "  ");
        }
        System.out.println();
    }

    /**
     * Instantiates a new object. Private to prevents instantiation.
     */
    private ImportCommandParser() {
        throw new AssertionError();
    }
}