edu.wpi.checksims.ChecksimsCommandLine.java Source code

Java tutorial

Introduction

Here is the source code for edu.wpi.checksims.ChecksimsCommandLine.java

Source

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * See LICENSE.txt included in this distribution for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at LICENSE.txt.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright (c) 2014-2015 Matthew Heon and Dolan Murvihill
 */

package edu.wpi.checksims;

import edu.wpi.checksims.algorithm.AlgorithmRegistry;
import edu.wpi.checksims.algorithm.commoncode.CommonCodeHandler;
import edu.wpi.checksims.algorithm.commoncode.CommonCodeLineRemovalHandler;
import edu.wpi.checksims.algorithm.commoncode.CommonCodePassthroughHandler;
import edu.wpi.checksims.algorithm.preprocessor.PreprocessorRegistry;
import edu.wpi.checksims.algorithm.preprocessor.SubmissionPreprocessor;
import edu.wpi.checksims.algorithm.similaritymatrix.output.MatrixPrinter;
import edu.wpi.checksims.algorithm.similaritymatrix.output.MatrixPrinterRegistry;
import edu.wpi.checksims.submission.EmptySubmissionException;
import edu.wpi.checksims.submission.Submission;
import edu.wpi.checksims.token.TokenType;
import edu.wpi.checksims.token.tokenizer.Tokenizer;
import edu.wpi.checksims.util.output.OutputAsFilePrinter;
import edu.wpi.checksims.util.output.OutputPrinter;
import org.apache.commons.cli.*;
import org.apache.commons.collections4.list.SetUniqueList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.impl.SimpleLogger;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;

import static com.google.common.base.Preconditions.checkNotNull;

/**
 * Parses Checksims' command-line options.
 */
public final class ChecksimsCommandLine {
    private static Logger logs;

    /**
     * @param level Logging level to use. Supported levels are 0 (nonverbose), 1 (verbose), 2 (very verbose)
     * @return Logger with appropriate logging level
     */
    static Logger startLogger(int level) {
        if (level == 1) {
            // Set verbose logging level
            System.setProperty(SimpleLogger.DEFAULT_LOG_LEVEL_KEY, "DEBUG");
        } else if (level == 2) {
            // Set very verbose logging level
            System.setProperty(SimpleLogger.DEFAULT_LOG_LEVEL_KEY, "TRACE");
        } else if (level == 0) {
            System.setProperty(SimpleLogger.DEFAULT_LOG_LEVEL_KEY, "INFO");
        } else {
            throw new RuntimeException("Unrecognized verbosity level passed to startLogger!");
        }

        System.setProperty(SimpleLogger.SHOW_LOG_NAME_KEY, "false");
        System.setProperty(SimpleLogger.SHOW_THREAD_NAME_KEY, "false");
        System.setProperty(SimpleLogger.LEVEL_IN_BRACKETS_KEY, "true");

        return LoggerFactory.getLogger(ChecksimsCommandLine.class);
    }

    /**
     * @return CLI options used in Checksims
     */
    static Options getOpts() {
        Options opts = new Options();

        Option alg = new Option("a", "algorithm", true, "algorithm to use");
        Option token = new Option("t", "token", true, "tokenization type to use");
        Option out = new Option("o", "output", true, "output format");
        Option file = new Option("f", "file", true, "file to output to");
        Option preprocess = new Option("p", "preprocess", true, "preprocessors to apply");
        Option jobs = new Option("j", "jobs", true, "number of threads to use");
        Option verbose = new Option("v", "verbose", false, "specify verbose output");
        Option doubleVerbose = new Option("vv", "veryverbose", false,
                "specify very verbose output. supercedes -v if both specified.");
        Option help = new Option("h", "help", false, "show usage information");
        Option common = new Option("c", "common", true, "remove common code contained in given directory");
        Option recursive = new Option("r", "recursive", false,
                "recursively traverse subdirectories to generate submissions");
        Option version = new Option("version", false, "print version of Checksims");

        opts.addOption(alg);
        opts.addOption(token);
        opts.addOption(out);
        opts.addOption(file);
        opts.addOption(preprocess);
        opts.addOption(jobs);
        opts.addOption(verbose);
        opts.addOption(doubleVerbose);
        opts.addOption(help);
        opts.addOption(common);
        opts.addOption(recursive);
        opts.addOption(version);

        return opts;
    }

    /**
     * Parse a given set of CLI arguments into a Commons CLI CommandLine.
     *
     * @param args Arguments to parse
     * @return CommandLine from parsed arguments
     * @throws ParseException Thrown on error parsing arguments
     */
    static CommandLine parseOpts(String[] args) throws ParseException {
        checkNotNull(args);

        Parser parser = new GnuParser();

        // Parse the CLI args
        return parser.parse(getOpts(), args);
    }

    /**
     * Print help message.
     */
    static void printHelp() {
        HelpFormatter f = new HelpFormatter();
        PrintWriter systemErr = new PrintWriter(System.err, true);

        f.printHelp(systemErr, 80, "checksims [args] glob directory [directory2 ...]",
                "checksims: check similarity of student submissions", getOpts(), 2, 4, "");

        System.err.println("\nSupported Similarity Detection Algorithms:");
        AlgorithmRegistry.getInstance().getSupportedImplementationNames().stream()
                .forEach((name) -> System.err.print(name + ", "));
        System.err.println(
                "\nDefault algorithm is " + AlgorithmRegistry.getInstance().getDefaultImplementationName());

        System.err.println("\nSupported Output Strategies:");
        MatrixPrinterRegistry.getInstance().getSupportedImplementationNames().stream()
                .forEach((name) -> System.err.print(name + ", "));
        System.err.println(
                "\nDefault strategy is " + MatrixPrinterRegistry.getInstance().getDefaultImplementationName());

        System.err.println("\nAvailable Preprocessors:");
        PreprocessorRegistry.getInstance().getSupportedImplementationNames().stream()
                .forEach((name) -> System.err.print(name + ", "));
        System.err.println();

        System.err.println("\nChecksims Version " + ChecksimsRunner.getChecksimsVersion() + "\n\n");

        System.exit(0);
    }

    /**
     * Parse basic CLI flags and produce a ChecksimsConfig.
     *
     * @param cli Parsed command line
     * @return Config derived from parsed CLI
     * @throws ChecksimsException Thrown on invalid user input or internal error
     */
    static ChecksimsConfig parseBaseFlags(CommandLine cli) throws ChecksimsException {
        checkNotNull(cli);

        // If we don't have a logger, set one up
        if (logs == null) {
            logs = LoggerFactory.getLogger(ChecksimsCommandLine.class);
        }

        // Create a base config to work from
        ChecksimsConfig config = new ChecksimsConfig();

        // Parse plagiarism detection algorithm
        if (cli.hasOption("a")) {
            config = config.setAlgorithm(
                    AlgorithmRegistry.getInstance().getImplementationInstance(cli.getOptionValue("a")));
            config = config.setTokenization(config.getAlgorithm().getDefaultTokenType());
        }

        // Parse tokenization
        if (cli.hasOption("t")) {
            config = config.setTokenization(TokenType.fromString(cli.getOptionValue("t")));
        }

        // Parse file output value
        boolean outputToFile = cli.hasOption("f");
        if (outputToFile) {
            File outputFile = new File(cli.getOptionValue("f"));
            OutputPrinter filePrinter = new OutputAsFilePrinter(outputFile);
            config = config.setOutputMethod(filePrinter);
            logs.info("Saving output to file " + outputFile.getName());
        }

        // Parse number of threads to use
        if (cli.hasOption("j")) {
            int numThreads = Integer.parseInt(cli.getOptionValue("j"));

            if (numThreads < 1) {
                throw new ChecksimsException("Thread count must be positive!");
            }

            config = config.setNumThreads(numThreads);
        }

        // Parse preprocessors
        // Ensure no duplicates
        if (cli.hasOption("p")) {
            List<SubmissionPreprocessor> preprocessors = SetUniqueList.setUniqueList(new ArrayList<>());
            String[] splitPreprocessors = cli.getOptionValue("p").split(",");
            for (String s : splitPreprocessors) {
                SubmissionPreprocessor p = PreprocessorRegistry.getInstance().getImplementationInstance(s);
                preprocessors.add(p);
            }
            config = config.setPreprocessors(preprocessors);
        }

        // Parse output strategies
        // Ensure no duplicates
        if (cli.hasOption("o")) {
            String[] desiredStrategies = cli.getOptionValue("o").split(",");
            Set<String> deduplicatedStrategies = new HashSet<>(Arrays.asList(desiredStrategies));

            if (deduplicatedStrategies.isEmpty()) {
                throw new ChecksimsException("Error: did not obtain a valid output strategy!");
            }

            // Convert to MatrixPrinters
            List<MatrixPrinter> printers = new ArrayList<>();
            for (String name : deduplicatedStrategies) {
                printers.add(MatrixPrinterRegistry.getInstance().getImplementationInstance(name));
            }

            config = config.setOutputPrinters(printers);
        }

        return config;
    }

    /**
     * Parse common code removal settings.
     *
     * If the -c flag is not present, a CommonCodePassthroughHandler will be returned
     *
     * TODO add unit tests
     *
     * @param cli Parsed command line options
     * @param glob Glob matcher to use when building common code submission
     * @param tokenizer Tokenizer to use when building common code submission
     * @param recursive Whether to recursively traverse common code directory
     * @return Handler for common code
     * @throws ChecksimsException Thrown if no files matching the glob pattern are found in the common code directory
     * @throws IOException Thrown on error creating common code submission
     */
    static CommonCodeHandler parseCommonCodeSetting(CommandLine cli, String glob, Tokenizer tokenizer,
            boolean recursive) throws ChecksimsException, IOException {
        checkNotNull(cli);
        checkNotNull(glob);

        // Parse common code detection
        boolean removeCommonCode = cli.hasOption("c");
        if (removeCommonCode) {
            File commonCodeDir = new File(cli.getOptionValue("c"));
            Submission commonCode = Submission.submissionFromDir(commonCodeDir, glob, tokenizer, recursive);

            try {
                return new CommonCodeLineRemovalHandler(commonCode);
            } catch (EmptySubmissionException e) {
                // The common code submission was empty
                // Inform the user we're not actually removing common code because of this
                logs.warn(e.getMessage());
                return CommonCodePassthroughHandler.getInstance();
            }
        }

        return CommonCodePassthroughHandler.getInstance();
    }

    /**
     * Build the collection of submissions Checksims will be run on.
     *
     * TODO add unit tests
     *
     * @param cli Parsed command line options
     * @param glob Glob matcher to use when building submissions
     * @param tokenizer Tokenizer to use when building submissions
     * @param recursive Whether to recursively traverse when building submissions
     * @return Collection of submissions which will be used to run Checksims
     * @throws IOException Thrown on issue reading files or traversing directories to build submissions
     */
    static Set<Submission> getSubmissions(CommandLine cli, String glob, Tokenizer tokenizer, boolean recursive)
            throws IOException, ChecksimsException {
        checkNotNull(cli);
        checkNotNull(glob);

        String[] unusedArgs = cli.getArgs();
        List<File> submissionDirs = new ArrayList<>();

        if (unusedArgs.length < 2) {
            throw new ChecksimsException("Expected at least 2 arguments: glob pattern and a submission directory!");
        }

        // The first element in args should be the glob matcher, so start at index 1
        for (int i = 1; i < unusedArgs.length; i++) {
            logs.debug("Adding directory " + unusedArgs[i]);
            submissionDirs.add(new File(unusedArgs[i]));
        }

        // Generate submissions to work on
        Set<Submission> submissions = new HashSet<>();
        for (File dir : submissionDirs) {
            submissions.addAll(Submission.submissionListFromDir(dir, glob, tokenizer, recursive));
        }

        if (submissions.isEmpty()) {
            throw new ChecksimsException("Did not obtain any submissions to operate on!");
        }

        return submissions;
    }

    /**
     * Parse CLI arguments into a ChecksimsConfig.
     *
     * Also configures logger, and sets parallelism level in ParallelAlgorithm
     *
     * TODO add unit tests
     *
     * @param args CLI arguments to parse
     * @return Config created from CLI arguments
     * @throws ParseException Thrown on error parsing CLI arguments
     * @throws IOException Thrown on error building a submission from files
     */
    static ChecksimsConfig parseCLI(String[] args) throws ParseException, ChecksimsException, IOException {
        checkNotNull(args);

        CommandLine cli = parseOpts(args);

        // Print CLI Help
        if (cli.hasOption("h")) {
            printHelp();
        }

        // Print version
        if (cli.hasOption("version")) {
            System.err.println("Checksims version " + ChecksimsRunner.getChecksimsVersion());
            System.exit(0);
        }

        // Parse verbose setting
        if (cli.hasOption("vv")) {
            logs = startLogger(2);
        } else if (cli.hasOption("v")) {
            logs = startLogger(1);
        } else {
            logs = startLogger(0);
        }

        // Parse recursive flag
        boolean recursive = false;
        if (cli.hasOption("r")) {
            recursive = true;
            logs.trace("Recursively traversing subdirectories of student directories");
        }

        // Get unconsumed arguments
        String[] unusedArgs = cli.getArgs();

        if (unusedArgs.length < 2) {
            throw new ChecksimsException(
                    "Expecting at least two arguments: File match glob, and folder(s) to check");
        }

        // First non-flag argument is the glob matcher
        // All the rest are directories containing student submissions
        String glob = unusedArgs[0];

        // First, parse basic flags
        ChecksimsConfig config = parseBaseFlags(cli);

        // Set up a tokenizer to use
        Tokenizer tokenizer = Tokenizer.getTokenizer(config.getTokenization());

        // Next, parse common code settings
        CommonCodeHandler handler = parseCommonCodeSetting(cli, glob, tokenizer, recursive);
        config = config.setCommonCodeHandler(handler);

        // Next, build submissions
        Set<Submission> submissions = getSubmissions(cli, glob, tokenizer, recursive);
        config = config.setSubmissions(submissions);

        logs.trace("CLI parsing complete!");

        return config;
    }
}