org.wikidata.wdtk.client.ClientConfiguration.java Source code

Java tutorial

Introduction

Here is the source code for org.wikidata.wdtk.client.ClientConfiguration.java

Source

package org.wikidata.wdtk.client;

/*
 * #%L
 * Wikidata Toolkit Examples
 * %%
 * Copyright (C) 2014 Wikidata Toolkit Developers
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.ini4j.Ini;
import org.ini4j.Profile.Section;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.wdtk.datamodel.helpers.Datamodel;
import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue;

/**
 * This class handles the program arguments from the conversion command line
 * tool.
 * 
 * @author Michael Gnther
 * @author Markus Kroetzsch
 */
public class ClientConfiguration {

    static final Logger logger = LoggerFactory.getLogger(ClientConfiguration.class);

    /**
     * Short command-line alternative to {@link #OPTION_HELP}.
     */
    public static final String CMD_OPTION_HELP = "h";
    /**
     * Short command-line alternative to {@link #OPTION_CONFIG_FILE}.
     */
    public static final String CMD_OPTION_CONFIG_FILE = "c";
    /**
     * Short command-line alternative to {@link #OPTION_DUMP_LOCATION}.
     */
    public static final String CMD_OPTION_DUMP_LOCATION = "d";
    /**
     * Short command-line alternative to {@link #OPTION_QUIET}.
     */
    public static final String CMD_OPTION_QUIET = "q";
    /**
     * Short command-line alternative to {@link #OPTION_OFFLINE_MODE}.
     */
    public static final String CMD_OPTION_OFFLINE_MODE = "n";
    /**
     * Short command-line alternative to {@link #OPTION_ACTION}.
     */
    public static final String CMD_OPTION_ACTION = "a";
    /**
     * Short command-line alternative to {@link #OPTION_OUTPUT_DESTINATION}.
     */
    public static final String CMD_OPTION_OUTPUT_DESTINATION = "o";
    /**
     * Short command-line alternative to {@link #OPTION_OUTPUT_COMPRESSION}.
     */
    public static final String CMD_OPTION_OUTPUT_COMPRESSION = "z";
    /**
     * Short command-line alternative to {@link #OPTION_OUTPUT_STDOUT}.
     */
    public static final String CMD_OPTION_OUTPUT_STDOUT = "s";
    /**
     * Short command-line alternative to {@link #OPTION_CREATE_REPORT}.
     */
    public static final String CMD_OPTION_CREATE_REPORT = "r";

    /**
     * Name of the long command line option for printing the help text.
     */
    public static final String OPTION_HELP = "help";
    /**
     * Name of the long command line option for specifying the location of the
     * configuration file.
     */
    public static final String OPTION_CONFIG_FILE = "config";
    /**
     * Name of the long command line option and configuration file field for
     * specifying the base location of the dump files to use.
     */
    public static final String OPTION_DUMP_LOCATION = "dumps";
    /**
     * Name of the long command line option and configuration file field for
     * switching to offline mode.
     */
    public static final String OPTION_OFFLINE_MODE = "offline";
    /**
     * Name of the long command line option and configuration file field for
     * requesting that no messages are logged to stdout.
     */
    public static final String OPTION_QUIET = "quiet";
    /**
     * Name of the long command line option for selecting an action that should
     * be performed. Available actions are registered in the field
     * {@link #KNOWN_ACTIONS}.
     */
    public static final String OPTION_ACTION = "action";
    /**
     * Name of the long command line option and configuration file field for
     * specifying the language filters.
     */
    public static final String OPTION_FILTER_LANGUAGES = "fLang";
    /**
     * Name of the long command line option and configuration file field for
     * specifying the site link filters.
     */
    public static final String OPTION_FILTER_SITES = "fSite";
    /**
     * Name of the long command line option and configuration file field for
     * specifying the property filters.
     */
    public static final String OPTION_FILTER_PROPERTIES = "fProp";
    /**
     * Name of the long command line option to create a report file about the
     * files produced by DumpProcessingOutputActions.
     */
    public static final String OPTION_CREATE_REPORT = "report";
    /**
     * Name of the long command line option and configuration file field for
     * defining the destination (usually output file name) of actions that
     * produce output.
     */
    public static final String OPTION_OUTPUT_DESTINATION = DumpProcessingOutputAction.OPTION_DESTINATION;
    /**
     * Name of the long command line option and configuration file field for
     * specifying the compression to use for the output.
     */
    public static final String OPTION_OUTPUT_COMPRESSION = DumpProcessingOutputAction.OPTION_COMPRESSION;
    /**
     * Name of the long command line option and configuration file field for
     * directing output to stdout.
     */
    public static final String OPTION_OUTPUT_STDOUT = DumpProcessingOutputAction.OPTION_USE_STDOUT;
    /**
     * Name of the long command line option and configuration file field for
     * specifying the tasks for RDF serialization.
     */
    public static final String OPTION_OUTPUT_RDF_TYPE = "rdftasks";

    static final Map<String, Class<? extends DumpProcessingOutputAction>> KNOWN_ACTIONS = new HashMap<>();
    static {
        KNOWN_ACTIONS.put("rdf", RdfSerializationAction.class);
        KNOWN_ACTIONS.put("json", JsonSerializationAction.class);
    }

    /**
     * Available command-line options.
     */
    static final Options options = new Options();
    static {
        initOptions();
    }

    /**
     * List of actions that were configured to be performed.
     */
    final List<DumpProcessingAction> actions;

    /**
     * Global configuration that defines if the operations should avoid all
     * internet access.
     */
    boolean offlineMode = false;

    /**
     * String representation of the directory where the dump files should be
     * sought.
     */
    String dumpLocation = null;

    /**
     * String representation of the path where the final report should be
     * stored.
     */
    String reportFilename = null;

    /**
     * True if no status messages should be written to stdout.
     */
    boolean quiet = false;

    /**
     * Set language codes to use as a filter, or null if no filter should be
     * used.
     */
    Set<String> filterLanguages = null;

    /**
     * Set site keys to use as a filter, or null if no filter should be used.
     */
    Set<String> filterSites = null;

    /**
     * Set property ids to use as a filter, or null if no filter should be used.
     */
    Set<PropertyIdValue> filterProperties = null;

    /**
     * Constructs a new object for the given arguments.
     * 
     * @param args
     *            command-line arguments
     */
    public ClientConfiguration(String[] args) {
        this.actions = handleArguments(args);
    }

    /**
     * Returns the list of actions defined for this object.
     * 
     * @return list of actions
     */
    public List<DumpProcessingAction> getActions() {
        return this.actions;
    }

    public String getDumpLocation() {
        return this.dumpLocation;
    }

    /**
     * Returns true if all operations should be performed in offline mode,
     * without accessing the Internet.
     * 
     * @return true if in offline mode
     */
    public boolean getOfflineMode() {
        return this.offlineMode;
    }

    /**
     * Returns true if the application should not write anything to stdout. This
     * can be set explicitly or indirectly if one of the actions wants to write
     * to stdout.
     * 
     * @return true if the application should not log messages to stdout
     */
    public boolean isQuiet() {
        return this.quiet;
    }

    /**
     * Returns the output destination where a report file should be created. If
     * the client should not create such a file the function will return null.
     * 
     * @return report filename
     */
    public String getReportFilename() {
        return this.reportFilename;
    }

    /**
     * Returns a set of language codes that should be used as a filter, or null
     * if no filter is set. An empty set means that all languages should be
     * filtered.
     * 
     * @return language filter
     */
    public Set<String> getFilterLanguages() {
        return this.filterLanguages;
    }

    /**
     * Returns a set of site keys that should be used as a filter for site
     * links, or null if no filter is set. An empty set means that all site
     * links should be filtered.
     * 
     * @return site key filter
     */
    public Set<String> getFilterSiteKeys() {
        return this.filterSites;
    }

    /**
     * Returns a set of property ids that should be used as a filter for
     * statements, or null if no filter is set. An empty set means that all
     * statements should be filtered.
     * 
     * @return property filter
     */
    public Set<PropertyIdValue> getFilterProperties() {
        return this.filterProperties;
    }

    /**
     * Prints a help text to the console.
     */
    public void printHelp() {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("wdtk-client", options);
    }

    /**
     * This function interprets the arguments of the main function. By doing
     * this it will set flags for the dump generation. See in the help text for
     * more specific information about the options.
     * 
     * @param args
     *            array of arguments from the main function.
     * @return list of {@link DumpProcessingOutputAction}
     */
    private List<DumpProcessingAction> handleArguments(String[] args) {
        CommandLine cmd;
        CommandLineParser parser = new GnuParser();

        try {
            cmd = parser.parse(options, args);
        } catch (ParseException e) {
            logger.error("Failed to parse arguments: " + e.getMessage());
            return Collections.<DumpProcessingAction>emptyList();
        }

        // Stop processing if a help text is to be printed:
        if ((cmd.hasOption(CMD_OPTION_HELP)) || (args.length == 0)) {
            return Collections.<DumpProcessingAction>emptyList();
        }

        List<DumpProcessingAction> configuration = new ArrayList<>();

        handleGlobalArguments(cmd);

        if (cmd.hasOption(CMD_OPTION_ACTION)) {
            DumpProcessingAction action = handleActionArguments(cmd);
            if (action != null) {
                configuration.add(action);
            }
        }

        if (cmd.hasOption(CMD_OPTION_CONFIG_FILE)) {
            try {
                List<DumpProcessingAction> configFile = readConfigFile(cmd.getOptionValue(CMD_OPTION_CONFIG_FILE));
                configuration.addAll(configFile);
            } catch (IOException e) {
                logger.error("Failed to read configuration file \"" + cmd.getOptionValue(CMD_OPTION_CONFIG_FILE)
                        + "\": " + e.toString());
            }

        }

        return configuration;

    }

    /**
     * 
     * Reads the properties defined in a configuration file. Returns a set of
     * configuration property blocks stored in {@link DumpProcessingAction}
     * objects and call
     * {@link ClientConfiguration#handleGlobalArguments(CommandLine)} to
     * interpreted the properties from the general section. The first element of
     * the list contains general information (about all dumps).
     * 
     * @param path
     *            filename and path of the configuration file
     * @return the list with configurations for all output dumps
     * @throws IOException
     */
    private List<DumpProcessingAction> readConfigFile(String path) throws IOException {
        List<DumpProcessingAction> result = new ArrayList<>();

        FileReader reader = new FileReader(path);
        Ini ini = new Ini(reader);

        for (Section section : ini.values()) {
            if (section.getName().toLowerCase().equals("general")) {
                handleGlobalArguments(section);
            } else {
                DumpProcessingAction action = handleActionArguments(section);
                if (action != null) {
                    action.setActionName(section.getName());
                    result.add(action);
                }
            }
        }
        return result;
    }

    /**
     * Analyses the command-line arguments which are relevant for the
     * serialization process in general. It fills out the class arguments with
     * this data.
     * 
     * @param cmd
     *            {@link CommandLine} objects; contains the command line
     *            arguments parsed by a {@link CommandLineParser}
     */
    private void handleGlobalArguments(CommandLine cmd) {
        if (cmd.hasOption(CMD_OPTION_DUMP_LOCATION)) {
            this.dumpLocation = cmd.getOptionValue(CMD_OPTION_DUMP_LOCATION);
        }

        if (cmd.hasOption(CMD_OPTION_OFFLINE_MODE)) {
            this.offlineMode = true;
        }

        if (cmd.hasOption(CMD_OPTION_QUIET)) {
            this.quiet = true;
        }

        if (cmd.hasOption(CMD_OPTION_CREATE_REPORT)) {
            this.reportFilename = cmd.getOptionValue(CMD_OPTION_CREATE_REPORT);
        }

        if (cmd.hasOption(OPTION_FILTER_LANGUAGES)) {
            setLanguageFilters(cmd.getOptionValue(OPTION_FILTER_LANGUAGES));
        }

        if (cmd.hasOption(OPTION_FILTER_SITES)) {
            setSiteFilters(cmd.getOptionValue(OPTION_FILTER_SITES));
        }

        if (cmd.hasOption(OPTION_FILTER_PROPERTIES)) {
            setPropertyFilters(cmd.getOptionValue(OPTION_FILTER_PROPERTIES));
        }
    }

    /**
     * Analyses the content of the general section of an ini configuration file
     * and fills out the class arguments with this data.
     * 
     * @param section
     *            {@link Section} with name "general"
     */
    private void handleGlobalArguments(Section section) {
        for (String key : section.keySet()) {
            switch (key) {
            case OPTION_OFFLINE_MODE:
                if (section.get(key).toLowerCase().equals("true")) {
                    this.offlineMode = true;
                }
                break;
            case OPTION_QUIET:
                if (section.get(key).toLowerCase().equals("true")) {
                    this.quiet = true;
                }
                break;
            case OPTION_CREATE_REPORT:
                this.reportFilename = section.get(key);
                break;
            case OPTION_DUMP_LOCATION:
                this.dumpLocation = section.get(key);
                break;
            case OPTION_FILTER_LANGUAGES:
                setLanguageFilters(section.get(key));
                break;
            case OPTION_FILTER_SITES:
                setSiteFilters(section.get(key));
                break;
            case OPTION_FILTER_PROPERTIES:
                setPropertyFilters(section.get(key));
                break;
            default:
                logger.warn("Unrecognized option: " + key);
            }
        }
    }

    /**
     * Analyses the command-line arguments which are relevant for the specific
     * action that is to be executed, and returns a corresponding
     * {@link DumpProcessingAction} object.
     * 
     * @param cmd
     *            {@link CommandLine} objects; contains the command line
     *            arguments parsed by a {@link CommandLineParser}
     * @return {@link DumpProcessingAction} for the given arguments
     */
    private DumpProcessingAction handleActionArguments(CommandLine cmd) {

        DumpProcessingAction result = makeDumpProcessingAction(cmd.getOptionValue(CMD_OPTION_ACTION).toLowerCase());
        if (result == null) {
            return null;
        }

        for (Option option : cmd.getOptions()) {
            result.setOption(option.getLongOpt(), option.getValue());
        }

        checkDuplicateStdOutOutput(result);

        return result;
    }

    /**
     * Analyses the content of a section of an ini configuration file (not the
     * "general" section) and fills out the class arguments of an new
     * {@link DumpProcessingAction} with this data.
     * 
     * @param section
     *            {@link Section} with name "general"
     * @return {@link DumpProcessingtAction} containing the output parameters
     */
    private DumpProcessingAction handleActionArguments(Section section) {
        DumpProcessingAction result = makeDumpProcessingAction(section.get(OPTION_ACTION).toLowerCase());
        if (result == null) {
            return null;
        }

        for (String key : section.keySet()) {
            if (!result.setOption(key.toLowerCase(), section.get(key))
                    && !OPTION_ACTION.equals(key.toLowerCase())) {
                logger.warn("Unrecognized option: " + key);
            }
        }

        checkDuplicateStdOutOutput(result);

        return result;
    }

    /**
     * Checks if a newly created action wants to write output to stdout, and
     * logs a warning if other actions are doing the same.
     * 
     * @param newAction
     *            the new action to be checked
     */
    private void checkDuplicateStdOutOutput(DumpProcessingAction newAction) {
        if (newAction.useStdOut()) {
            if (this.quiet) {
                logger.warn("Multiple actions are using stdout as output destination.");
            }
            this.quiet = true;
        }
    }

    /**
     * Creates a {@link DumpProcessingAction} object for the action of the given
     * name. The operation may fail if the name is not associated with any
     * action, or if the associated action class cannot be instantiated.
     * 
     * @param name
     *            of the action
     * @return the {@link DumpProcessingAction} or null if creation failed
     */
    private DumpProcessingAction makeDumpProcessingAction(String name) {
        if (!KNOWN_ACTIONS.containsKey(name)) {
            logger.error("Unknown action \"" + name + "\".");
            return null;
        }

        try {
            Constructor<? extends DumpProcessingOutputAction> constructor = KNOWN_ACTIONS.get(name)
                    .getConstructor();
            return constructor.newInstance();
        } catch (NoSuchMethodException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
            logger.error("Class \"" + KNOWN_ACTIONS.get(name) + "\" that was registered to handle action \"" + name
                    + "\" does not have an appropriate constructor.");
            return null;
        } catch (InstantiationException | InvocationTargetException e) {
            logger.error("Error when trying to instantiate handler for action \"" + name + "\":" + e.getMessage());
            return null;
        }

    }

    /**
     * Sets the set of language filters based on the given string.
     * 
     * @param filters
     *            comma-separates list of language codes, or "-" to filter all
     *            languages
     */
    private void setLanguageFilters(String filters) {
        this.filterLanguages = new HashSet<>();
        if (!"-".equals(filters)) {
            for (String lang : filters.split(",")) {
                this.filterLanguages.add(lang);
            }
        }
    }

    /**
     * Sets the set of site filters based on the given string.
     * 
     * @param filters
     *            comma-separates list of site keys, or "-" to filter all site
     *            links
     */
    private void setSiteFilters(String filters) {
        this.filterSites = new HashSet<>();
        if (!"-".equals(filters)) {
            for (String siteKey : filters.split(",")) {
                this.filterSites.add(siteKey);
            }
        }
    }

    /**
     * Sets the set of property filters based on the given string.
     * 
     * @param filters
     *            comma-separates list of property ids, or "-" to filter all
     *            statements
     */
    private void setPropertyFilters(String filters) {
        this.filterProperties = new HashSet<>();
        if (!"-".equals(filters)) {
            for (String pid : filters.split(",")) {
                this.filterProperties.add(Datamodel.makeWikidataPropertyIdValue(pid));
            }
        }
    }

    /**
     * Builds a list of legal options and store them into the options objects.
     */
    @SuppressWarnings("static-access")
    private static void initOptions() {

        List<String> actions = new ArrayList<>(KNOWN_ACTIONS.keySet());
        Collections.sort(actions);
        Option action = OptionBuilder.hasArg().withArgName("action")
                .withDescription("define the action that should be performed; avaible actions: " + actions)
                .withLongOpt(OPTION_ACTION).create(CMD_OPTION_ACTION);

        Option destination = OptionBuilder.withArgName("path").hasArg()
                .withDescription("place the output into the file at <path>").withLongOpt(OPTION_OUTPUT_DESTINATION)
                .create(CMD_OPTION_OUTPUT_DESTINATION);

        Option dumplocation = OptionBuilder.hasArg().withArgName("path")
                .withDescription("set the location of the dump files").withLongOpt(OPTION_DUMP_LOCATION)
                .create(CMD_OPTION_DUMP_LOCATION);

        Option config = OptionBuilder.hasArg().withArgName("file")
                .withDescription("set a config file; use this to define multiple actions for a single run")
                .withLongOpt(OPTION_CONFIG_FILE).create(CMD_OPTION_CONFIG_FILE);

        Option rdfdump = OptionBuilder.hasArgs().withArgName("task").withDescription(
                "specify which data to include in RDF dump (use with action \"rdf\"); run with options \"-a rdf -n\" for help")
                .withLongOpt(OPTION_OUTPUT_RDF_TYPE).create();

        Option filterLanguages = OptionBuilder.hasArgs().withArgName("languages").withDescription(
                "specifies a list of language codes; if given, only terms in languages in this list will be processed; the value \"-\" denotes the empty list (no terms are processed)")
                .withLongOpt(OPTION_FILTER_LANGUAGES).create();

        Option filterSites = OptionBuilder.hasArgs().withArgName("sites").withDescription(
                "specifies a list of site keys; if given, only site links to sites in this list will be processed; the value \"-\" denotes the empty list (no site links are processed)")
                .withLongOpt(OPTION_FILTER_SITES).create();

        Option filterProperties = OptionBuilder.hasArgs().withArgName("ids").withDescription(
                "specifies a list of property ids; if given, only statements for properties in this list will be processed; the value \"-\" denotes the empty list (no statements are processed)")
                .withLongOpt(OPTION_FILTER_PROPERTIES).create();

        Option compressionExtention = OptionBuilder.hasArg().withArgName("type")
                .withDescription("define a compression format to be used for the output; possible values: "
                        + DumpProcessingOutputAction.COMPRESS_GZIP + ", " + DumpProcessingOutputAction.COMPRESS_BZ2)
                .withLongOpt(OPTION_OUTPUT_COMPRESSION).create(CMD_OPTION_OUTPUT_COMPRESSION);

        Option report = OptionBuilder.hasArg().withArgName("path")
                .withDescription("specifies a path to print a final report after dump generations.")
                .withLongOpt(OPTION_CREATE_REPORT).create(CMD_OPTION_CREATE_REPORT);

        options.addOption(config);
        options.addOption(action);
        options.addOption(CMD_OPTION_QUIET, OPTION_QUIET, false,
                "perform all actions quietly, without printing status messages to the console; errors/warnings are still printed to stderr");
        options.addOption(destination);
        options.addOption(dumplocation);
        options.addOption(filterLanguages);
        options.addOption(filterSites);
        options.addOption(filterProperties);
        options.addOption(compressionExtention);
        options.addOption(report);
        options.addOption(rdfdump);
        options.addOption(CMD_OPTION_OFFLINE_MODE, OPTION_OFFLINE_MODE, false,
                "execute all operations in offline mode, using only previously downloaded dumps");
        options.addOption(CMD_OPTION_HELP, OPTION_HELP, false, "print this message");

        options.addOption(CMD_OPTION_OUTPUT_STDOUT, OPTION_OUTPUT_STDOUT, false, "write output to stdout");
    }

}