Example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

Introduction

In this page you can find the example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder.

Prototype

public DefaultOptionBuilder()

Source Link

Document

Creates a new DefaultOptionBuilder using defaults

Usage

From source file:com.ml.hadoop.nlp.SparseVectorsFromSequenceFiles.java

@Override
public int run(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option minSupportOpt = obuilder.withLongName("minSupport")
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create();

    Option analyzerNameOpt = obuilder.withLongName("analyzerName")
            .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create())
            .withDescription("The class name of the analyzer").withShortName("a").create();

    Option dictionaryPathOpt = obuilder.withLongName("dictionaryPath")
            .withArgument(abuilder.withName("dictionaryPath").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary path for update TFIDF").withShortName("dp").create();

    Option docFrequencyPathOpt = obuilder.withLongName("docFrequencyPath")
            .withArgument(abuilder.withName("docFrequencyPath").withMinimum(1).withMaximum(1).create())
            .withDescription("Doc frequency path for update TFIDF").withShortName("dfp").create();

    Option tfVectorsPathOpt = obuilder.withLongName("tfVectorsPath")
            .withArgument(abuilder.withName("tfVectorsPath").withMinimum(1).withMaximum(1).create())
            .withDescription("TF Vectors path").withShortName("tfvp").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize")
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The chunkSize in MegaBytes. 100-10000 MB").withShortName("chunk").create();

    Option weightOpt = obuilder.withLongName("weight").withRequired(false)
            .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create())
            .withDescription("The kind of weight to use. Currently TF , TFIDF or TFIDF_UPDATE")
            .withShortName("wt").create();

    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false)
            .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create())
            .withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false)
            .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create())
            .withDescription(//www.j a va  2  s. c o m
                    "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
                            + " Expressed as an integer between 0 and 100. Default is 99.  If maxDFSigma is also set, "
                            + "it will override this value.")
            .withShortName("x").create();

    Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false)
            .withArgument(abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
                            + "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
                            + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
                            + "than 0 no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent")
            .withShortName("xs").create();

    Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false)
            .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
                    + LLRReducer.DEFAULT_MIN_LLR)
            .withShortName("ml").create();

    Option numReduceTasksOpt = obuilder.withLongName("numReducers")
            .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr")
            .create();

    Option powerOpt = obuilder.withLongName("norm").withRequired(false)
            .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
                            + "Must be greater or equal to 0.  The default is not to normalize")
            .withShortName("n").create();

    Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be logNormalize. If set true else false")
            .withShortName("lnorm").create();

    Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false)
            .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) The maximum size of ngrams to create"
                    + " (2 = bigrams, 3 = trigrams, etc) Default Value:1")
            .withShortName("ng").create();

    Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false)
            .withDescription(
                    "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false")
            .withShortName("seq").create();

    Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be NamedVectors. If set true else false")
            .withShortName("nv").create();

    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false)
            .withDescription("If set, overwrite the output directory").withShortName("ow").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt)
            .withOption(dictionaryPathOpt).withOption(docFrequencyPathOpt).withOption(tfVectorsPathOpt)
            .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt)
            .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt)
            .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(sequentialAccessVectorOpt)
            .withOption(namedVectorOpt).withOption(logNormalizeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt));

        int chunkSize = 100;
        if (cmdLine.hasOption(chunkSizeOpt)) {
            chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
        }
        int minSupport = 2;
        if (cmdLine.hasOption(minSupportOpt)) {
            String minSupportString = (String) cmdLine.getValue(minSupportOpt);
            minSupport = Integer.parseInt(minSupportString);
        }

        int maxNGramSize = 1;

        if (cmdLine.hasOption(maxNGramSizeOpt)) {
            try {
                maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
            } catch (NumberFormatException ex) {
                log.warn("Could not parse ngram size option");
            }
        }
        log.info("Maximum n-gram size is: {}", maxNGramSize);

        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.delete(getConf(), outputDir);
        }

        float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
        if (cmdLine.hasOption(minLLROpt)) {
            minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
        }
        log.info("Minimum LLR value: {}", minLLRValue);

        int reduceTasks = 1;
        if (cmdLine.hasOption(numReduceTasksOpt)) {
            reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        }
        log.info("Changed... Number of reduce tasks: {}", reduceTasks);

        Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
        if (cmdLine.hasOption(analyzerNameOpt)) {
            String className = cmdLine.getValue(analyzerNameOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            AnalyzerUtils.createAnalyzer(analyzerClass);
        }

        //default process tfidf:1, tf:2, update tfidf:3
        int processIdf;

        if (cmdLine.hasOption(weightOpt)) {
            String wString = cmdLine.getValue(weightOpt).toString();
            if ("tf".equalsIgnoreCase(wString)) {
                processIdf = 2;
            } else if ("tfidf".equalsIgnoreCase(wString)) {
                processIdf = 1;
            } else if ("tfidf_update".equalsIgnoreCase(wString)) {
                processIdf = 3;
            } else {
                throw new OptionException(weightOpt);
            }
        } else {
            processIdf = 1;
        }

        int minDf = 1;
        if (cmdLine.hasOption(minDFOpt)) {
            minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
        }
        int maxDFPercent = 99;
        if (cmdLine.hasOption(maxDFPercentOpt)) {
            maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
        }
        double maxDFSigma = -1.0;
        if (cmdLine.hasOption(maxDFSigmaOpt)) {
            maxDFSigma = Double.parseDouble(cmdLine.getValue(maxDFSigmaOpt).toString());
        }

        float norm = PartialVectorMerger.NO_NORMALIZING;
        if (cmdLine.hasOption(powerOpt)) {
            String power = cmdLine.getValue(powerOpt).toString();
            if ("INF".equals(power)) {
                norm = Float.POSITIVE_INFINITY;
            } else {
                norm = Float.parseFloat(power);
            }
        }

        boolean logNormalize = false;
        if (cmdLine.hasOption(logNormalizeOpt)) {
            logNormalize = true;
        }
        log.info("Tokenizing documents in {}", inputDir);
        Configuration conf = getConf();
        Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER);
        DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath, conf);

        boolean sequentialAccessOutput = false;
        if (cmdLine.hasOption(sequentialAccessVectorOpt)) {
            sequentialAccessOutput = true;
        }

        boolean namedVectors = false;
        if (cmdLine.hasOption(namedVectorOpt)) {
            namedVectors = true;
        }
        boolean shouldPrune = maxDFSigma >= 0.0 || maxDFPercent > 0.00;
        String tfDirName = shouldPrune ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune"
                : DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER;
        log.info("Creating Term Frequency Vectors, prune {}", shouldPrune);

        String dictionaryPath = null;
        if (cmdLine.hasOption(dictionaryPathOpt)) {
            dictionaryPath = (String) cmdLine.getValue(dictionaryPathOpt);
            log.info("begin dic path {}", dictionaryPath);
        }

        if (processIdf == 1) {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, -1.0f, false, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        } else if (processIdf == 3) {
            log.info("begin update term----------------");
            DictionaryVectorizer.createUpdateTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    maxNGramSize, dictionaryPath, norm, logNormalize, reduceTasks, sequentialAccessOutput,
                    namedVectors);
        } else {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, norm, logNormalize, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        }

        String docFrequencyPaths = null;
        if (cmdLine.hasOption(dictionaryPathOpt)) {
            docFrequencyPaths = (String) cmdLine.getValue(docFrequencyPathOpt);
            log.info("doc frequency path {}", docFrequencyPaths);
        }
        String tfVectorsPaths = null;
        if (cmdLine.hasOption(tfVectorsPathOpt)) {
            tfVectorsPaths = (String) cmdLine.getValue(tfVectorsPathOpt);
            log.info("tf vectors path {}", tfVectorsPaths);
        }

        Pair<Long[], List<Path>> docFrequenciesFeatures = null;
        // Should document frequency features be processed
        if (processIdf == 1) {
            log.info("Calculating IDF");
            docFrequenciesFeatures = TFIDFConverter.calculateDF(new Path(outputDir, tfDirName), outputDir, conf,
                    chunkSize);
            log.info("...docFrequencyPathBase {}, docFrequencyFile {}", docFrequenciesFeatures.getFirst()[0],
                    docFrequenciesFeatures.getFirst()[1]);
        } else if (processIdf == 3) {
            // load docFrequency path
            List<Path> docFrequencyChunks = Lists.newArrayList();
            String[] paths = docFrequencyPaths.split(",");

            long featureCount = 0;
            for (String path : paths) {
                int splitPos = path.lastIndexOf("/");
                String docFrequencyPathBase = path.substring(0, splitPos);
                String docFrequencyFile = path.substring(splitPos + 1, path.length());
                log.info("docFrequencyPathBase {}, docFrequencyFile {}", docFrequencyPathBase,
                        docFrequencyFile);
                Path docFrequencyPath = new Path(docFrequencyPathBase, docFrequencyFile);
                docFrequencyChunks.add(docFrequencyPath);

                /*for (Pair<IntWritable, LongWritable> record
                         : new SequenceFileIterable<IntWritable, LongWritable>(docFrequencyPath, true, conf)) {
                     featureCount = Math.max(record.getFirst().get(), featureCount);
                 }*/
            }
            featureCount = 107623;
            featureCount++;

            long vectorCount = Long.MAX_VALUE;
            /*Path tfDirPath = new Path(tfVectorsPaths + "/part-r-00000");
            int i = 0;
            for (Pair<Text, VectorWritable> record
                     : new SequenceFileIterable<Text, VectorWritable>(tfDirPath, true, conf)) {
               i++;
             }
            if (i > 0) {
               vectorCount = i;
            }*/
            vectorCount = 80000;
            //read docFrequencyFile to get featureCount and vectorCount
            Long[] counts = { featureCount, vectorCount };
            log.info("featureCount {}, vectorCount------------------ {}", featureCount, vectorCount);
            docFrequenciesFeatures = new Pair<Long[], List<Path>>(counts, docFrequencyChunks);
        }

        long maxDF = maxDFPercent; //if we are pruning by std dev, then this will get changed
        if (shouldPrune) {
            long vectorCount = docFrequenciesFeatures.getFirst()[1];
            if (maxDFSigma >= 0.0) {
                Path dfDir = new Path(outputDir, TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER);
                Path stdCalcDir = new Path(outputDir, HighDFWordsPruner.STD_CALC_DIR);

                // Calculate the standard deviation
                double stdDev = BasicStats.stdDevForGivenMean(dfDir, stdCalcDir, 0.0, conf);
                maxDF = (int) (100.0 * maxDFSigma * stdDev / vectorCount);
            }

            long maxDFThreshold = (long) (vectorCount * (maxDF / 100.0f));

            // Prune the term frequency vectors
            Path tfDir = new Path(outputDir, tfDirName);
            Path prunedTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER);
            Path prunedPartialTFDir = new Path(outputDir,
                    DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-partial");
            log.info("Pruning");
            if (processIdf == 1 || processIdf == 3) {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, -1.0f, false, reduceTasks);
            } else {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, norm, logNormalize, reduceTasks);
            }
            HadoopUtil.delete(new Configuration(conf), tfDir);
        }
        if (processIdf == 1 || processIdf == 3) {
            TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
                    outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize,
                    sequentialAccessOutput, namedVectors, reduceTasks);
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:imageClassify.TestForest.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Decision Forest").create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option analyzeOpt = obuilder.withLongName("analyze").withShortName("a").withRequired(false).create();

    Option mrOpt = obuilder.withLongName("mapreduce").withShortName("mr").withRequired(false).create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
            .withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create();

    try {//from w  w w. j  a va  2 s.co m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(inputOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null;
        analyze = cmdLine.hasOption(analyzeOpt);
        useMapreduce = cmdLine.hasOption(mrOpt);

        if (log.isDebugEnabled()) {
            log.debug("inout     : {}", dataName);
            log.debug("dataset   : {}", datasetName);
            log.debug("model     : {}", modelName);
            log.debug("output    : {}", outputName);
            log.debug("analyze   : {}", analyze);
            log.debug("mapreduce : {}", useMapreduce);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        modelPath = new Path(modelName);
        if (outputName != null) {
            outputPath = new Path(outputName);
        }
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    testForest();

    return 0;
}

From source file:it.jnrpe.server.console.PluginCommand.java

private Option toOption(PluginOption po) {
    DefaultOptionBuilder oBuilder = new DefaultOptionBuilder();

    oBuilder.withShortName(po.getOption()).withDescription(po.getDescription())
            .withRequired("true".equalsIgnoreCase(po.getRequired()));

    if (po.getLongOpt() != null) {
        oBuilder.withLongName(po.getLongOpt());
    }/*from   w ww .ja  va2 s.  c om*/

    if (po.hasArgs()) {
        ArgumentBuilder aBuilder = new ArgumentBuilder();

        if (po.getArgName() != null) {
            aBuilder.withName(po.getArgName());
        }

        if (po.getArgsOptional()) {
            aBuilder.withMinimum(0);
        }

        if (po.getArgsCount() != null) {
            aBuilder.withMaximum(po.getArgsCount());
        } else {
            aBuilder.withMaximum(1);
        }

        if (po.getValueSeparator() != null && po.getValueSeparator().length() != 0) {
            aBuilder.withInitialSeparator(po.getValueSeparator().charAt(0));
            aBuilder.withSubsequentSeparator(po.getValueSeparator().charAt(0));
        }
        oBuilder.withArgument(aBuilder.create());
    }

    return oBuilder.create();
}

From source file:de.tu_chemnitz.mi.barcd.app.CommandLineArgumentsParser.java

private DefaultOption createXmlSchemaOption() {
    ArgumentBuilder ab = new ArgumentBuilder();
    DefaultOptionBuilder ob = new DefaultOptionBuilder();

    UrlValidator urlValidator = new UrlValidator();

    Argument xmlSchemaArgument = ab.withName("URL").withMinimum(1).withMaximum(1).withValidator(urlValidator)
            .create();/*w ww .j a v a2  s .  c o  m*/

    DefaultOption xmlSchemaOption = ob.withLongName("xml-schema").withShortName("xs")
            .withDescription("Specify the location of the XML schema used for validation.")
            .withArgument(xmlSchemaArgument).withRequired(false).create();

    return xmlSchemaOption;
}

From source file:com.ibm.jaql.util.shell.JaqlShellArguments.java

@SuppressWarnings("unchecked")
static JaqlShellArguments parseArgs(String... args) {
    // option builders
    final DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    final ArgumentBuilder abuilder = new ArgumentBuilder();
    final GroupBuilder gbuilder = new GroupBuilder();

    // create standard options
    Option optHelp = obuilder.withShortName("h").withShortName("?").withLongName("help")
            .withDescription("print this message").create();

    Option optJars = obuilder.withShortName("j").withLongName("jars")
            .withDescription(/*from  w ww. j av  a  2s .c om*/
                    "comma-separated list of jar files to include user defined expressions or data stores")
            .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create();

    Option optSearchPath = obuilder.withShortName("jp").withLongName("jaql-path")
            .withDescription("colon seperated list of all search path entries")
            .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create();

    Option optBatch = obuilder.withShortName("b").withLongName("batch")
            .withDescription("run in batch mode (i.e., do not read from stdin)").create();

    Option optOutOptions = obuilder.withShortName("o").withLongName("outoptions")
            .withDescription("output options: json, del and xml or an output IO descriptor. "
                    + "This option is ignored when not running in batch mode.")
            .withArgument(abuilder.withName("outoptions").withMinimum(1).withMaximum(1).create()).create();

    Option optEval = obuilder.withShortName("e").withLongName("eval")
            .withDescription("evaluate Jaql expression")
            .withArgument(abuilder.withName("expr").withMinimum(1).withMaximum(1).create()).create();

    // create mini-cluster options
    Option optCluster = obuilder.withShortName("c").withLongName("cluster")
            .withDescription("use existing cluster (i.e., do not launch a mini-cluster)").create();

    Option optNumNodes = obuilder.withShortName("n").withLongName("no-nodes")
            .withDescription("mini-cluster option: number of nodes to spawn")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1)
                    .withValidator(NumberValidator.getIntegerInstance()).create())
            .create();

    Option optDir = obuilder.withShortName("d").withLongName("hdfs-dir")
            .withDescription("mini-cluster option: root HDFs directory")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create();

    Group clusterOptions = gbuilder.withName("Cluster options").withOption(optCluster).withOption(optDir)
            .withOption(optNumNodes).create();

    // create input files option
    Option optInputFiles = abuilder.withName("file").withDescription("list of input files").withMinimum(0)
            .create();

    Option optLog = obuilder.withShortName("l").withLongName("log")
            .withDescription("log options: json, del and xml or an output IO descriptor. ")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create();

    // combine all options
    Group options = gbuilder.withName("options").withOption(optHelp).withOption(optJars)
            .withOption(optSearchPath).withOption(optBatch).withOption(optLog).withOption(optOutOptions)
            .withOption(optEval).withOption(optInputFiles).withOption(clusterOptions).create();

    // parse and print help if necessary
    CommandLine cl;
    try {
        Parser parser = new Parser();
        parser.setGroup(options);
        cl = parser.parse(args);
    } catch (OptionException e) {
        printHelpAndExit(e, null, options);
        return null;
    }
    if (cl.hasOption(optHelp)) {
        printHelpAndExit(null, options);
    }

    // validate arguments
    JaqlShellArguments result = new JaqlShellArguments();

    // mini-cluster options         
    if (cl.hasOption(optCluster)) {
        result.useExistingCluster = true;
    }
    if (cl.hasOption(optDir)) {
        if (result.useExistingCluster) {
            printHelpAndExit("Options " + optCluster.getPreferredName() + " and " + optDir.getPreferredName()
                    + " are mutually exclusive", options);
        }
        result.hdfsDir = (String) cl.getValue(optDir);
    }
    if (cl.hasOption(optNumNodes)) {
        if (result.useExistingCluster) {
            printHelpAndExit("Options " + optCluster.getPreferredName() + " and "
                    + optNumNodes.getPreferredName() + " are mutually exclusive", options);
        }
        result.numNodes = ((Number) cl.getValue(optNumNodes)).intValue();
    }

    // jar files
    if (cl.hasOption(optJars)) {
        result.jars = ((String) cl.getValue(optJars)).split(",");
        for (String jar : result.jars) {
            if (!new File(jar).exists()) {
                printHelpAndExit("Jar file " + jar + " not found", options);
            }
        }
    }

    // search path directories
    if (cl.hasOption(optSearchPath)) {
        result.searchPath = ((String) cl.getValue(optSearchPath)).split(":");
        for (String dir : result.searchPath) {
            if (!new File(dir).exists() || !new File(dir).isDirectory()) {
                printHelpAndExit("Search-path entry " + dir + " not found or is no directory", options);
            }
        }
    }

    if (cl.hasOption(optBatch)) {
        result.batchMode = true;
        if (cl.hasOption(optOutOptions)) {
            String format = (String) cl.getValue(optOutOptions);
            try {
                result.outputAdapter = getOutputAdapter(format);
            } catch (Exception e) {
                printHelpAndExit(e,
                        "\"" + format + "\" is neither a valid output format nor a valid IO descriptor",
                        options);
            }
        }
    }

    // input
    if (cl.hasOption(optEval)) {
        String eval = (String) cl.getValue(optEval);
        if (!eval.endsWith(";"))
            eval += ";";
        result.addInputStream(new ByteArrayInputStream(eval.getBytes()));
    }
    if (cl.hasOption(optInputFiles)) {
        List<String> files = (List<String>) cl.getValues(optInputFiles);
        for (String file : files) {
            try {
                result.addInputStream(new FileInputStream(file));
            } catch (FileNotFoundException e) {
                printHelpAndExit(e, "Input file " + file + " not found", options);
            }
        }
    }

    // error log
    if (cl.hasOption(optLog)) {
        String path = (String) cl.getValue(optLog);
        try {
            BufferedJsonRecord logFD = new BufferedJsonRecord();
            logFD.add(Adapter.TYPE_NAME, new JsonString("local"));
            logFD.add(Adapter.LOCATION_NAME, new JsonString(path));
            OutputAdapter oa = (OutputAdapter) JaqlUtil.getAdapterStore().output.getAdapter(logFD);
            result.logAdapter = oa;
        } catch (Exception e) {
            printHelpAndExit(e, "\"" + path + "\" invalid", options);
        }
    }

    if (!result.batchMode) {
        result.addStdin();
    }

    return result;
}

From source file:haflow.component.mahout.logistic.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();
    Option outputFileOption = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to store predicting data").create();
    Option accurateFileOption = builder.withLongName("accurate").withRequired(true)
            .withArgument(argumentBuilder.withName("accurate").withMaximum(1).create())
            .withDescription("where to store accurate information").create();
    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(outputFileOption).withOption(accurateFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);//from   w  w w .  j a v a2 s . co m
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    outputFile = getStringArgument(cmdLine, outputFileOption);
    accurateFile = getStringArgument(cmdLine, accurateFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:com.ml.ira.algos.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Option fieldNames = builder.withLongName("fdnames").withRequired(true)
            .withArgument(argumentBuilder.withName("fns").create())
            .withDescription("the field names of training data set").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(fieldNames).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);/*from ww  w  .j av a2 s.c  om*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);
    RunLogistic.fieldNames = getStringArgument(cmdLine, fieldNames);

    System.out.println("inputFile: " + inputFile);
    System.out.println("modelFile: " + modelFile);
    System.out.println("fieldNames: " + RunLogistic.fieldNames);

    return true;
}

From source file:com.memonews.mahout.sentiment.SentimentModelTester.java

boolean parseArgs(final String[] args) {
    final DefaultOptionBuilder builder = new DefaultOptionBuilder();

    final Option help = builder.withLongName("help").withDescription("print this list").create();

    final ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    final Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    final Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    final Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(modelFileOption).create();

    final Parser parser = new Parser();
    parser.setHelpOption(help);/*www  .ja  va  2  s.c o  m*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    final CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    modelFile = (String) cmdLine.getValue(modelFileOption);
    return true;
}

From source file:haflow.component.mahout.logistic.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//from  w  w w  .j av a2s  .c o m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();
    Option inforFile = builder.withLongName("infor").withRequired(true)
            .withArgument(argumentBuilder.withName("infor").withMaximum(1).create())
            .withDescription("where to store information about the training").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(inforFile).withOption(target).withOption(targetCategories)
            .withOption(predictors).withOption(types).withOption(passes).withOption(lambda).withOption(rate)
            .withOption(noBias).withOption(features).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);
    TrainLogistic.inforFile = getStringArgument(cmdLine, inforFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);

    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);
    System.out.print("infor:" + TrainLogistic.inforFile);

    System.out.println("target:" + getStringArgument(cmdLine, target));
    System.out.println("targetCategories:" + String.valueOf(getStringArgument(cmdLine, targetCategories)));
    System.out.println("features:" + String.valueOf(getStringArgument(cmdLine, features)));
    System.out.println("lambda:" + String.valueOf(getStringArgument(cmdLine, lambda)));
    System.out.println("rate:" + String.valueOf(getStringArgument(cmdLine, rate)));
    return true;
}

From source file:com.ml.ira.algos.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//w ww.j  a v  a2  s  .co  m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Option fieldNames = builder.withLongName("fdnames").withRequired(true)
            .withArgument(argumentBuilder.withName("fns").create())
            .withDescription("the field names of training data set").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias)
            .withOption(features).withOption(fieldNames).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);
    TrainLogistic.fieldNames = getStringArgument(cmdLine, fieldNames);

    List<String> typeList = Lists.newArrayList();
    String tmp = getStringArgument(cmdLine, types);
    if (tmp != null) {
        typeList.addAll(Arrays.asList(tmp.split(",")));
    }

    tmp = getStringArgument(cmdLine, predictors);
    List<String> predictorList = Lists.newArrayList();
    if (tmp != null) {
        predictorList.addAll(Arrays.asList(tmp.split(",")));
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);
    lmp.setFieldNames(TrainLogistic.fieldNames);
    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);

    System.out.println("@Train inputFile: " + TrainLogistic.inputFile);
    System.out.println("@Train fieldNames: " + TrainLogistic.fieldNames);
    System.out.println("@Train typeList: " + typeList);
    System.out.println("@Train predictorList: " + predictorList);

    return true;
}