Example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

Introduction

In this page you can find the example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder.

Prototype

public DefaultOptionBuilder() 

Source Link

Document

Creates a new DefaultOptionBuilder using defaults

Usage

From source file:com.digitalpebble.behemoth.tika.TikaDriver.java

private Option buildOption(String name, String shortName, String description, boolean hasArg, boolean required,
        String defaultValue) {/*from  www.  j a v  a 2  s . c  o m*/

    DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
            .withRequired(required);

    if (shortName != null) {
        optBuilder.withShortName(shortName);
    }

    if (hasArg) {
        ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1);

        if (defaultValue != null) {
            argBuilder = argBuilder.withDefault(defaultValue);
        }

        optBuilder.withArgument(argBuilder.create());
    }

    return optBuilder.create();
}

From source file:com.tamingtext.util.SplitInput.java

/** Configure this instance based on the command-line arguments contained within provided array. 
 * Calls {@link #validate()} to ensure consistency of configuration.
 * //from  w w w . j  a va 2  s. co  m
 * @return true if the arguments were parsed successfully and execution should proceed.
 * @throws Exception if there is a problem parsing the command-line arguments or the particular
 *   combination would violate class invariants.
 */
public boolean parseArgs(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("inputDir").withRequired(true)
            .withArgument(abuilder.withName("inputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory").withShortName("i").create();

    Option trainingOutputDirOpt = obuilder.withLongName("trainingOutputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The training data output directory").withShortName("tr").create();

    Option testOutputDirOpt = obuilder.withLongName("testOutputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The test data output directory").withShortName("te").create();

    Option testSplitSizeOpt = obuilder.withLongName("testSplitSize").withRequired(false)
            .withArgument(abuilder.withName("splitSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of documents held back as test data for each category")
            .withShortName("ss").create();

    Option testSplitPctOpt = obuilder.withLongName("testSplitPct").withRequired(false)
            .withArgument(abuilder.withName("splitPct").withMinimum(1).withMaximum(1).create())
            .withDescription("The percentage of documents held back as test data for each category")
            .withShortName("sp").create();

    Option splitLocationOpt = obuilder.withLongName("splitLocation").withRequired(false)
            .withArgument(abuilder.withName("splitLoc").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "Location for start of test data expressed as a percentage of the input file size (0=start, 50=middle, 100=end")
            .withShortName("sl").create();

    Option randomSelectionSizeOpt = obuilder.withLongName("randomSelectionSize").withRequired(false)
            .withArgument(abuilder.withName("randomSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The number of itemr to be randomly selected as test data ").withShortName("rs")
            .create();

    Option randomSelectionPctOpt = obuilder.withLongName("randomSelectionPct").withRequired(false)
            .withArgument(abuilder.withName("randomPct").withMinimum(1).withMaximum(1).create())
            .withDescription("Percentage of items to be randomly selected as test data ").withShortName("rp")
            .create();

    Option charsetOpt = obuilder.withLongName("charset").withRequired(true)
            .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
            .withDescription("The name of the character encoding of the input files").withShortName("c")
            .create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(trainingOutputDirOpt)
            .withOption(testOutputDirOpt).withOption(testSplitSizeOpt).withOption(testSplitPctOpt)
            .withOption(splitLocationOpt).withOption(randomSelectionSizeOpt).withOption(randomSelectionPctOpt)
            .withOption(charsetOpt).create();

    try {

        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return false;
        }

        inputDirectory = new Path((String) cmdLine.getValue(inputDirOpt));
        trainingOutputDirectory = new Path((String) cmdLine.getValue(trainingOutputDirOpt));
        testOutputDirectory = new Path((String) cmdLine.getValue(testOutputDirOpt));

        charset = Charset.forName((String) cmdLine.getValue(charsetOpt));

        if (cmdLine.hasOption(testSplitSizeOpt) && cmdLine.hasOption(testSplitPctOpt)) {
            throw new OptionException(testSplitSizeOpt,
                    "must have either split size or split percentage option, not BOTH");
        } else if (!cmdLine.hasOption(testSplitSizeOpt) && !cmdLine.hasOption(testSplitPctOpt)) {
            throw new OptionException(testSplitSizeOpt,
                    "must have either split size or split percentage option");
        }

        if (cmdLine.hasOption(testSplitSizeOpt)) {
            setTestSplitSize(Integer.parseInt((String) cmdLine.getValue(testSplitSizeOpt)));
        }

        if (cmdLine.hasOption(testSplitPctOpt)) {
            setTestSplitPct(Integer.parseInt((String) cmdLine.getValue(testSplitPctOpt)));
        }

        if (cmdLine.hasOption(splitLocationOpt)) {
            setSplitLocation(Integer.parseInt((String) cmdLine.getValue(splitLocationOpt)));
        }

        if (cmdLine.hasOption(randomSelectionSizeOpt)) {
            setTestRandomSelectionSize(Integer.parseInt((String) cmdLine.getValue(randomSelectionSizeOpt)));
        }

        if (cmdLine.hasOption(randomSelectionPctOpt)) {
            setTestRandomSelectionPct(Integer.parseInt((String) cmdLine.getValue(randomSelectionPctOpt)));
        }

        fs.mkdirs(trainingOutputDirectory);
        fs.mkdirs(testOutputDirectory);

    } catch (OptionException e) {
        log.error("Command-line option Exception", e);
        CommandLineUtil.printHelp(group);
        return false;
    }

    validate();
    return true;
}

From source file:it.jnrpe.client.JNRPEClient.java

/**
 * Configures the command line parser./*w  w  w  .j a va 2 s. c  o  m*/
 * 
 * @return The command line parser configuration
 */
private static Group configureCommandLine() {
    DefaultOptionBuilder oBuilder = new DefaultOptionBuilder();
    ArgumentBuilder aBuilder = new ArgumentBuilder();
    GroupBuilder gBuilder = new GroupBuilder();

    DefaultOption nosslOption = oBuilder.withLongName("nossl").withShortName("n")
            .withDescription("Do no use SSL").create();

    DefaultOption weakSslOption = oBuilder.withLongName("weakCiherSuites").withShortName("w")
            .withDescription("Enable weak cipher suites").create();

    DefaultOption unknownOption = oBuilder.withLongName("unknown").withShortName("u")
            .withDescription("Make socket timeouts return an UNKNOWN state instead of CRITICAL").create();

    DefaultOption hostOption = oBuilder.withLongName("host").withShortName("H")
            .withDescription("The address of the host running the JNRPE/NRPE daemon")
            .withArgument(aBuilder.withName("host").withMinimum(1).withMaximum(1).create()).create();

    NumberValidator positiveInt = NumberValidator.getIntegerInstance();
    positiveInt.setMinimum(0);
    DefaultOption portOption = oBuilder.withLongName("port").withShortName("p")
            .withDescription("The port on which the daemon is running (default=5666)")
            .withArgument(aBuilder.withName("port").withMinimum(1).withMaximum(1)
                    .withDefault(Long.valueOf(DEFAULT_PORT)).withValidator(positiveInt).create())
            .create();

    DefaultOption timeoutOption = oBuilder.withLongName("timeout").withShortName("t")
            .withDescription("Number of seconds before connection times out (default=10)")
            .withArgument(aBuilder.withName("timeout").withMinimum(1).withMaximum(1)
                    .withDefault(Long.valueOf(DEFAULT_TIMEOUT)).withValidator(positiveInt).create())
            .create();

    DefaultOption commandOption = oBuilder.withLongName("command").withShortName("c")
            .withDescription("The name of the command that the remote daemon should run")
            .withArgument(aBuilder.withName("command").withMinimum(1).withMaximum(1).create()).create();

    DefaultOption argsOption = oBuilder.withLongName("arglist").withShortName("a").withDescription(
            "Optional arguments that should be passed to the command.  Multiple arguments should be separated by "
                    + "a space (' '). If provided, this must be the last option supplied on the command line.")
            .withArgument(aBuilder.withName("arglist").withMinimum(1).create()).create();

    DefaultOption helpOption = oBuilder.withLongName("help").withShortName("h")
            .withDescription("Shows this help").create();

    Group executionOption = gBuilder.withOption(nosslOption).withOption(weakSslOption).withOption(unknownOption)
            .withOption(hostOption).withOption(portOption).withOption(timeoutOption).withOption(commandOption)
            .withOption(argsOption).create();

    return gBuilder.withOption(executionOption).withOption(helpOption).withMinimum(1).withMaximum(1).create();
}

From source file:it.jnrpe.plugins.PluginOption.java

/**
 * Convert this {@link PluginOption} to the Option required by Apache.
 * Commons Cli./* www. jav  a2 s  .c o m*/
 *
        
 * @return The option object required by commons cli */
public Option toOption() {
    DefaultOptionBuilder oBuilder = new DefaultOptionBuilder();

    oBuilder.withShortName(option).withDescription(description).withRequired(required);

    if (longOptionName != null) {
        oBuilder.withLongName(longOptionName);
    }

    if (hasArgs) {
        ArgumentBuilder aBuilder = new ArgumentBuilder();

        if (argName != null) {
            aBuilder.withName(argName);
        }

        if (argsAreOptional) {
            aBuilder.withMinimum(0);
        }

        if (argsCount != null) {
            aBuilder.withMaximum(argsCount);
        } else {
            aBuilder.withMaximum(1);
        }

        if (argsValueSeparator != null && argsValueSeparator.length() != 0) {
            aBuilder.withInitialSeparator(argsValueSeparator.charAt(0));
            aBuilder.withSubsequentSeparator(argsValueSeparator.charAt(0));
        }
        oBuilder.withArgument(aBuilder.create());
    }

    return oBuilder.create();
}

From source file:com.gsinnovations.howdah.AbstractJob.java

/** Build an option with the given parameters. Name and description are
 *  required.//from  w ww .j a  v  a 2 s  . c o m
 *
 * @param name the long name of the option prefixed with '--' on the command-line
 * @param shortName the short name of the option, prefixed with '-' on the command-line
 * @param description description of the option displayed in help method
 * @param hasArg true if the option has an argument.
 * @param required true if the option is required.
 * @param defaultValue default argument value, can be null.
 * @return the option.
 */
private static Option buildOption(String name, String shortName, String description, boolean hasArg,
        boolean required, String defaultValue) {

    DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
            .withRequired(required);

    if (shortName != null) {
        optBuilder.withShortName(shortName);
    }

    if (hasArg) {
        ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1);

        if (defaultValue != null) {
            argBuilder = argBuilder.withDefault(defaultValue);
        }

        optBuilder.withArgument(argBuilder.create());
    }

    return optBuilder.create();
}

From source file:TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();/*w ww  .ja  v  a2 s  .  c  o m*/

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias)
            .withOption(features).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        System.out.println(args);
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);

    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);

    return true;
}

From source file:my.mahout.AbstractJob.java

protected static Option buildOption(String name, String shortName, String description, boolean hasArg, int min,
        int max, boolean required, String defaultValue) {

    DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description)
            .withRequired(required);// w w w .  jav  a 2  s  .com

    if (shortName != null) {
        optBuilder.withShortName(shortName);
    }

    if (hasArg) {
        ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(min).withMaximum(max);

        if (defaultValue != null) {
            argBuilder = argBuilder.withDefault(defaultValue);
        }

        optBuilder.withArgument(argBuilder.create());
    }

    return optBuilder.create();
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

public static int main2(String[] args, Configuration conf) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having "
                    + "one doc per line")
            .withShortName("i").create();

    Option dictOpt = obuilder.withLongName("dictionary").withRequired(false)
            .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create();

    Option dfsOpt = obuilder.withLongName("dfs").withRequired(false)
            .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create())
            .withDescription("HDFS namenode URI").withShortName("dfs").create();

    Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true)
            .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of topics to learn").withShortName("top").create();

    Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true)
            .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(term | topic)").withShortName("to").create();

    Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true)
            .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create())
            .withDescription("File to write out p(topic | docid)").withShortName("do").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create();

    Option etaOpt = obuilder.withLongName("eta").withRequired(false)
            .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create())
            .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create();

    Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false)
            .withArgument(/* w  ww .j ava 2  s  . c o  m*/
                    abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault(10).create())
            .withDescription("Maximum number of training passes").withShortName("m").create();

    Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false)
            .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1)
                    .withDefault(0.0).create())
            .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|")
            .create();

    Option burnInOpt = obuilder.withLongName("burnInIterations").withRequired(false)
            .withArgument(
                    abuilder.withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault(5).create())
            .withDescription("Minimum number of iterations").withShortName("b").create();

    Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false)
            .withArgument(
                    abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create())
            .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c")
            .create();

    Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false)
            .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no")
                    .create())
            .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt")
            .create();

    Option numTrainThreadsOpt = obuilder
            .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder
                    .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create())
            .withDescription("number of threads to train with").withShortName("ntt").create();

    Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false)
            .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1")
                    .create())
            .withDescription("number of threads to update the model with").withShortName("nut").create();

    Option verboseOpt = obuilder.withLongName("verbose").withRequired(false)
            .withArgument(
                    abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create())
            .withDescription("print verbose information, like top-terms in each topic, during iteration")
            .withShortName("v").create();

    Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt)
            .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt)
            .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt)
            .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt)
            .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt)
            .withOption(modelCorpusFractionOption).withOption(verboseOpt).create();

    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String inputDirString = (String) cmdLine.getValue(inputDirOpt);
        String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null;
        int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt));
        double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt));
        double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt));
        int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt));
        int burnInIterations = (Integer) cmdLine.getValue(burnInOpt);
        double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt));
        int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt));
        int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt));
        String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt);
        String docOutFile = (String) cmdLine.getValue(outputDocFileOpt);
        String reInferDocTopics = (String) cmdLine.getValue(reInferDocTopicsOpt);
        boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt));
        double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption);

        long start = System.nanoTime();

        if (conf.get("fs.default.name") == null) {
            String dfsNameNode = (String) cmdLine.getValue(dfsOpt);
            conf.set("fs.default.name", dfsNameNode);
        }
        String[] terms = loadDictionary(dictDirString, conf);
        logTime("dictionary loading", System.nanoTime() - start);
        start = System.nanoTime();
        Matrix corpus = loadVectors(inputDirString, conf);
        logTime("vector seqfile corpus loading", System.nanoTime() - start);
        start = System.nanoTime();
        InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms,
                numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234);
        logTime("cvb0 init", System.nanoTime() - start);

        start = System.nanoTime();
        cvb0.setVerbose(verbose);
        cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations);
        logTime("total training time", System.nanoTime() - start);

        if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
            cvb0.inferDocuments(0.0, 100, true);
        } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
            cvb0.inferDocuments(0.0, 100, false);
        }

        start = System.nanoTime();
        cvb0.writeModel(new Path(topicOutFile));
        DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts);
        logTime("printTopics", System.nanoTime() - start);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:org.apache.mahout.avro.text.AvroDocumentsFromDirectory.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser p = new GenericOptionsParser(conf, args);
    args = p.getRemainingArgs();/*w  ww. jav a 2  s.  co  m*/

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option parentOpt = obuilder.withLongName("parent").withRequired(true)
            .withArgument(abuilder.withName("parent").withMinimum(1).withMaximum(1).create())
            .withDescription("Parent dir containing the documents").withShortName("p").create();

    Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize")
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The chunkSize in documents. Defaults to " + DEFAULT_CHUNK_SIZE)
            .withShortName("chunk").create();

    Option keyPrefixOpt = obuilder.withLongName("keyPrefix")
            .withArgument(abuilder.withName("keyPrefix").withMinimum(1).withMaximum(1).create())
            .withDescription("The prefix to be prepended to the key").withShortName("prefix").create();

    Option charsetOpt = obuilder.withLongName("charset").withRequired(true)
            .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
            .withDescription("The name of the character encoding of the input files").withShortName("c")
            .create();

    Group group = gbuilder.withName("Options").withOption(keyPrefixOpt).withOption(chunkSizeOpt)
            .withOption(charsetOpt).withOption(outputDirOpt).withOption(parentOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    File parentDir = new File((String) cmdLine.getValue(parentOpt));
    String outputDir = (String) cmdLine.getValue(outputDirOpt);

    int documentsPerChunk = DEFAULT_CHUNK_SIZE;
    if (cmdLine.hasOption(chunkSizeOpt)) {
        documentsPerChunk = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
    }

    String prefix = "";
    if (cmdLine.hasOption(keyPrefixOpt)) {
        prefix = (String) cmdLine.getValue(keyPrefixOpt);
    }
    Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
    AvroDocumentsFromDirectory dir = new AvroDocumentsFromDirectory();
    dir.createAvroDocuments(conf, parentDir, outputDir, prefix, documentsPerChunk, charset);
}

From source file:org.apache.mahout.avro.text.mapred.WikipediaToAvroDocuments.java

/**
 * Takes in two arguments:/*from w ww  . java 2 s.c  om*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents
 * live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the
 * classifier as a {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 * 
 * @param args
 *          The args
 */
public int run(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = obuilder.withLongName("input").withRequired(true)
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The input directory path").withShortName("i").create();

    Option dirOutputPathOpt = obuilder.withLongName("output").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory Path").withShortName("o").create();

    Option categoriesOpt = obuilder.withLongName("categories")
            .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of the categories file.  One entry per line. "
                    + "Will be used to make a string match in Wikipedia Category field")
            .withShortName("c").create();

    Option exactMatchOpt = obuilder.withLongName("exactMatch")
            .withDescription("If set, then the category name must exactly match the "
                    + "entry in the categories file. Default is false")
            .withShortName("e").create();

    Option allOpt = obuilder.withLongName("all").withDescription("If set, Select all files. Default is false")
            .withShortName("all").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
            .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(allOpt).withOption(helpOpt)
            .create();

    Parser parser = new Parser();
    parser.setGroup(group);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return 0;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);

        String catFile = "";
        if (cmdLine.hasOption(categoriesOpt)) {
            catFile = (String) cmdLine.getValue(categoriesOpt);
        }

        boolean all = false;
        if (cmdLine.hasOption(allOpt)) {
            all = true;
        }
        runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return 0;
    }

    return 1;
}