Example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

Introduction

In this page you can find the example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder.

Prototype

public DefaultOptionBuilder() 

Source Link

Document

Creates a new DefaultOptionBuilder using defaults

Usage

From source file:com.ml.hadoop.nlp.SparseVectorsFromSequenceFiles.java

@Override
public int run(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputDirOpt = DefaultOptionCreator.outputOption().create();

    Option minSupportOpt = obuilder.withLongName("minSupport")
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create();

    Option analyzerNameOpt = obuilder.withLongName("analyzerName")
            .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create())
            .withDescription("The class name of the analyzer").withShortName("a").create();

    Option dictionaryPathOpt = obuilder.withLongName("dictionaryPath")
            .withArgument(abuilder.withName("dictionaryPath").withMinimum(1).withMaximum(1).create())
            .withDescription("Dictionary path for update TFIDF").withShortName("dp").create();

    Option docFrequencyPathOpt = obuilder.withLongName("docFrequencyPath")
            .withArgument(abuilder.withName("docFrequencyPath").withMinimum(1).withMaximum(1).create())
            .withDescription("Doc frequency path for update TFIDF").withShortName("dfp").create();

    Option tfVectorsPathOpt = obuilder.withLongName("tfVectorsPath")
            .withArgument(abuilder.withName("tfVectorsPath").withMinimum(1).withMaximum(1).create())
            .withDescription("TF Vectors path").withShortName("tfvp").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize")
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The chunkSize in MegaBytes. 100-10000 MB").withShortName("chunk").create();

    Option weightOpt = obuilder.withLongName("weight").withRequired(false)
            .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create())
            .withDescription("The kind of weight to use. Currently TF , TFIDF or TFIDF_UPDATE")
            .withShortName("wt").create();

    Option minDFOpt = obuilder.withLongName("minDF").withRequired(false)
            .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create())
            .withDescription("The minimum document frequency.  Default is 1").withShortName("md").create();

    Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false)
            .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create())
            .withDescription(//www.j a va  2  s. c o m
                    "The max percentage of docs for the DF.  Can be used to remove really high frequency terms."
                            + " Expressed as an integer between 0 and 100. Default is 99.  If maxDFSigma is also set, "
                            + "it will override this value.")
            .withShortName("x").create();

    Option maxDFSigmaOpt = obuilder.withLongName("maxDFSigma").withRequired(false)
            .withArgument(abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
                            + "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
                            + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
                            + "than 0 no vectors will be filtered out. Default is -1.0.  Overrides maxDFPercent")
            .withShortName("xs").create();

    Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false)
            .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional)The minimum Log Likelihood Ratio(Float)  Default is "
                    + LLRReducer.DEFAULT_MIN_LLR)
            .withShortName("ml").create();

    Option numReduceTasksOpt = obuilder.withLongName("numReducers")
            .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr")
            .create();

    Option powerOpt = obuilder.withLongName("norm").withRequired(false)
            .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm.  "
                            + "Must be greater or equal to 0.  The default is not to normalize")
            .withShortName("n").create();

    Option logNormalizeOpt = obuilder.withLongName("logNormalize").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be logNormalize. If set true else false")
            .withShortName("lnorm").create();

    Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false)
            .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("(Optional) The maximum size of ngrams to create"
                    + " (2 = bigrams, 3 = trigrams, etc) Default Value:1")
            .withShortName("ng").create();

    Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false)
            .withDescription(
                    "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false")
            .withShortName("seq").create();

    Option namedVectorOpt = obuilder.withLongName("namedVector").withRequired(false)
            .withDescription("(Optional) Whether output vectors should be NamedVectors. If set true else false")
            .withShortName("nv").create();

    Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false)
            .withDescription("If set, overwrite the output directory").withShortName("ow").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt)
            .withOption(dictionaryPathOpt).withOption(docFrequencyPathOpt).withOption(tfVectorsPathOpt)
            .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt)
            .withOption(maxDFSigmaOpt).withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt)
            .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt)
            .withOption(overwriteOutput).withOption(helpOpt).withOption(sequentialAccessVectorOpt)
            .withOption(namedVectorOpt).withOption(logNormalizeOpt).create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt));

        int chunkSize = 100;
        if (cmdLine.hasOption(chunkSizeOpt)) {
            chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));
        }
        int minSupport = 2;
        if (cmdLine.hasOption(minSupportOpt)) {
            String minSupportString = (String) cmdLine.getValue(minSupportOpt);
            minSupport = Integer.parseInt(minSupportString);
        }

        int maxNGramSize = 1;

        if (cmdLine.hasOption(maxNGramSizeOpt)) {
            try {
                maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString());
            } catch (NumberFormatException ex) {
                log.warn("Could not parse ngram size option");
            }
        }
        log.info("Maximum n-gram size is: {}", maxNGramSize);

        if (cmdLine.hasOption(overwriteOutput)) {
            HadoopUtil.delete(getConf(), outputDir);
        }

        float minLLRValue = LLRReducer.DEFAULT_MIN_LLR;
        if (cmdLine.hasOption(minLLROpt)) {
            minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString());
        }
        log.info("Minimum LLR value: {}", minLLRValue);

        int reduceTasks = 1;
        if (cmdLine.hasOption(numReduceTasksOpt)) {
            reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString());
        }
        log.info("Changed... Number of reduce tasks: {}", reduceTasks);

        Class<? extends Analyzer> analyzerClass = StandardAnalyzer.class;
        if (cmdLine.hasOption(analyzerNameOpt)) {
            String className = cmdLine.getValue(analyzerNameOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            AnalyzerUtils.createAnalyzer(analyzerClass);
        }

        //default process tfidf:1, tf:2, update tfidf:3
        int processIdf;

        if (cmdLine.hasOption(weightOpt)) {
            String wString = cmdLine.getValue(weightOpt).toString();
            if ("tf".equalsIgnoreCase(wString)) {
                processIdf = 2;
            } else if ("tfidf".equalsIgnoreCase(wString)) {
                processIdf = 1;
            } else if ("tfidf_update".equalsIgnoreCase(wString)) {
                processIdf = 3;
            } else {
                throw new OptionException(weightOpt);
            }
        } else {
            processIdf = 1;
        }

        int minDf = 1;
        if (cmdLine.hasOption(minDFOpt)) {
            minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString());
        }
        int maxDFPercent = 99;
        if (cmdLine.hasOption(maxDFPercentOpt)) {
            maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString());
        }
        double maxDFSigma = -1.0;
        if (cmdLine.hasOption(maxDFSigmaOpt)) {
            maxDFSigma = Double.parseDouble(cmdLine.getValue(maxDFSigmaOpt).toString());
        }

        float norm = PartialVectorMerger.NO_NORMALIZING;
        if (cmdLine.hasOption(powerOpt)) {
            String power = cmdLine.getValue(powerOpt).toString();
            if ("INF".equals(power)) {
                norm = Float.POSITIVE_INFINITY;
            } else {
                norm = Float.parseFloat(power);
            }
        }

        boolean logNormalize = false;
        if (cmdLine.hasOption(logNormalizeOpt)) {
            logNormalize = true;
        }
        log.info("Tokenizing documents in {}", inputDir);
        Configuration conf = getConf();
        Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER);
        DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath, conf);

        boolean sequentialAccessOutput = false;
        if (cmdLine.hasOption(sequentialAccessVectorOpt)) {
            sequentialAccessOutput = true;
        }

        boolean namedVectors = false;
        if (cmdLine.hasOption(namedVectorOpt)) {
            namedVectors = true;
        }
        boolean shouldPrune = maxDFSigma >= 0.0 || maxDFPercent > 0.00;
        String tfDirName = shouldPrune ? DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-toprune"
                : DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER;
        log.info("Creating Term Frequency Vectors, prune {}", shouldPrune);

        String dictionaryPath = null;
        if (cmdLine.hasOption(dictionaryPathOpt)) {
            dictionaryPath = (String) cmdLine.getValue(dictionaryPathOpt);
            log.info("begin dic path {}", dictionaryPath);
        }

        if (processIdf == 1) {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, -1.0f, false, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        } else if (processIdf == 3) {
            log.info("begin update term----------------");
            DictionaryVectorizer.createUpdateTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    maxNGramSize, dictionaryPath, norm, logNormalize, reduceTasks, sequentialAccessOutput,
                    namedVectors);
        } else {
            DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, tfDirName, conf,
                    minSupport, maxNGramSize, minLLRValue, norm, logNormalize, reduceTasks, chunkSize,
                    sequentialAccessOutput, namedVectors);
        }

        String docFrequencyPaths = null;
        if (cmdLine.hasOption(dictionaryPathOpt)) {
            docFrequencyPaths = (String) cmdLine.getValue(docFrequencyPathOpt);
            log.info("doc frequency path {}", docFrequencyPaths);
        }
        String tfVectorsPaths = null;
        if (cmdLine.hasOption(tfVectorsPathOpt)) {
            tfVectorsPaths = (String) cmdLine.getValue(tfVectorsPathOpt);
            log.info("tf vectors path {}", tfVectorsPaths);
        }

        Pair<Long[], List<Path>> docFrequenciesFeatures = null;
        // Should document frequency features be processed
        if (processIdf == 1) {
            log.info("Calculating IDF");
            docFrequenciesFeatures = TFIDFConverter.calculateDF(new Path(outputDir, tfDirName), outputDir, conf,
                    chunkSize);
            log.info("...docFrequencyPathBase {}, docFrequencyFile {}", docFrequenciesFeatures.getFirst()[0],
                    docFrequenciesFeatures.getFirst()[1]);
        } else if (processIdf == 3) {
            // load docFrequency path
            List<Path> docFrequencyChunks = Lists.newArrayList();
            String[] paths = docFrequencyPaths.split(",");

            long featureCount = 0;
            for (String path : paths) {
                int splitPos = path.lastIndexOf("/");
                String docFrequencyPathBase = path.substring(0, splitPos);
                String docFrequencyFile = path.substring(splitPos + 1, path.length());
                log.info("docFrequencyPathBase {}, docFrequencyFile {}", docFrequencyPathBase,
                        docFrequencyFile);
                Path docFrequencyPath = new Path(docFrequencyPathBase, docFrequencyFile);
                docFrequencyChunks.add(docFrequencyPath);

                /*for (Pair<IntWritable, LongWritable> record
                         : new SequenceFileIterable<IntWritable, LongWritable>(docFrequencyPath, true, conf)) {
                     featureCount = Math.max(record.getFirst().get(), featureCount);
                 }*/
            }
            featureCount = 107623;
            featureCount++;

            long vectorCount = Long.MAX_VALUE;
            /*Path tfDirPath = new Path(tfVectorsPaths + "/part-r-00000");
            int i = 0;
            for (Pair<Text, VectorWritable> record
                     : new SequenceFileIterable<Text, VectorWritable>(tfDirPath, true, conf)) {
               i++;
             }
            if (i > 0) {
               vectorCount = i;
            }*/
            vectorCount = 80000;
            //read docFrequencyFile to get featureCount and vectorCount
            Long[] counts = { featureCount, vectorCount };
            log.info("featureCount {}, vectorCount------------------ {}", featureCount, vectorCount);
            docFrequenciesFeatures = new Pair<Long[], List<Path>>(counts, docFrequencyChunks);
        }

        long maxDF = maxDFPercent; //if we are pruning by std dev, then this will get changed
        if (shouldPrune) {
            long vectorCount = docFrequenciesFeatures.getFirst()[1];
            if (maxDFSigma >= 0.0) {
                Path dfDir = new Path(outputDir, TFIDFConverter.WORDCOUNT_OUTPUT_FOLDER);
                Path stdCalcDir = new Path(outputDir, HighDFWordsPruner.STD_CALC_DIR);

                // Calculate the standard deviation
                double stdDev = BasicStats.stdDevForGivenMean(dfDir, stdCalcDir, 0.0, conf);
                maxDF = (int) (100.0 * maxDFSigma * stdDev / vectorCount);
            }

            long maxDFThreshold = (long) (vectorCount * (maxDF / 100.0f));

            // Prune the term frequency vectors
            Path tfDir = new Path(outputDir, tfDirName);
            Path prunedTFDir = new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER);
            Path prunedPartialTFDir = new Path(outputDir,
                    DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER + "-partial");
            log.info("Pruning");
            if (processIdf == 1 || processIdf == 3) {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, -1.0f, false, reduceTasks);
            } else {
                HighDFWordsPruner.pruneVectors(tfDir, prunedTFDir, prunedPartialTFDir, maxDFThreshold, minDf,
                        conf, docFrequenciesFeatures, norm, logNormalize, reduceTasks);
            }
            HadoopUtil.delete(new Configuration(conf), tfDir);
        }
        if (processIdf == 1 || processIdf == 3) {
            TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
                    outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize,
                    sequentialAccessOutput, namedVectors, reduceTasks);
        }
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
    return 0;
}

From source file:imageClassify.TestForest.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Decision Forest").create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option analyzeOpt = obuilder.withLongName("analyze").withShortName("a").withRequired(false).create();

    Option mrOpt = obuilder.withLongName("mapreduce").withShortName("mr").withRequired(false).create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
            .withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create();

    try {//from w  w w. j  a va  2 s.co m
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(inputOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null;
        analyze = cmdLine.hasOption(analyzeOpt);
        useMapreduce = cmdLine.hasOption(mrOpt);

        if (log.isDebugEnabled()) {
            log.debug("inout     : {}", dataName);
            log.debug("dataset   : {}", datasetName);
            log.debug("model     : {}", modelName);
            log.debug("output    : {}", outputName);
            log.debug("analyze   : {}", analyze);
            log.debug("mapreduce : {}", useMapreduce);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        modelPath = new Path(modelName);
        if (outputName != null) {
            outputPath = new Path(outputName);
        }
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    testForest();

    return 0;
}

From source file:it.jnrpe.server.console.PluginCommand.java

private Option toOption(PluginOption po) {
    DefaultOptionBuilder oBuilder = new DefaultOptionBuilder();

    oBuilder.withShortName(po.getOption()).withDescription(po.getDescription())
            .withRequired("true".equalsIgnoreCase(po.getRequired()));

    if (po.getLongOpt() != null) {
        oBuilder.withLongName(po.getLongOpt());
    }/*from   w ww .ja  va2 s.  c om*/

    if (po.hasArgs()) {
        ArgumentBuilder aBuilder = new ArgumentBuilder();

        if (po.getArgName() != null) {
            aBuilder.withName(po.getArgName());
        }

        if (po.getArgsOptional()) {
            aBuilder.withMinimum(0);
        }

        if (po.getArgsCount() != null) {
            aBuilder.withMaximum(po.getArgsCount());
        } else {
            aBuilder.withMaximum(1);
        }

        if (po.getValueSeparator() != null && po.getValueSeparator().length() != 0) {
            aBuilder.withInitialSeparator(po.getValueSeparator().charAt(0));
            aBuilder.withSubsequentSeparator(po.getValueSeparator().charAt(0));
        }
        oBuilder.withArgument(aBuilder.create());
    }

    return oBuilder.create();
}

From source file:de.tu_chemnitz.mi.barcd.app.CommandLineArgumentsParser.java

private DefaultOption createXmlSchemaOption() {
    ArgumentBuilder ab = new ArgumentBuilder();
    DefaultOptionBuilder ob = new DefaultOptionBuilder();

    UrlValidator urlValidator = new UrlValidator();

    Argument xmlSchemaArgument = ab.withName("URL").withMinimum(1).withMaximum(1).withValidator(urlValidator)
            .create();/*w ww .j a v a2  s .  c o  m*/

    DefaultOption xmlSchemaOption = ob.withLongName("xml-schema").withShortName("xs")
            .withDescription("Specify the location of the XML schema used for validation.")
            .withArgument(xmlSchemaArgument).withRequired(false).create();

    return xmlSchemaOption;
}

From source file:com.ibm.jaql.util.shell.JaqlShellArguments.java

@SuppressWarnings("unchecked")
static JaqlShellArguments parseArgs(String... args) {
    // option builders
    final DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    final ArgumentBuilder abuilder = new ArgumentBuilder();
    final GroupBuilder gbuilder = new GroupBuilder();

    // create standard options
    Option optHelp = obuilder.withShortName("h").withShortName("?").withLongName("help")
            .withDescription("print this message").create();

    Option optJars = obuilder.withShortName("j").withLongName("jars")
            .withDescription(/*from  w ww. j av  a  2s .c om*/
                    "comma-separated list of jar files to include user defined expressions or data stores")
            .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create();

    Option optSearchPath = obuilder.withShortName("jp").withLongName("jaql-path")
            .withDescription("colon seperated list of all search path entries")
            .withArgument(abuilder.withName("args").withMinimum(1).withMaximum(1).create()).create();

    Option optBatch = obuilder.withShortName("b").withLongName("batch")
            .withDescription("run in batch mode (i.e., do not read from stdin)").create();

    Option optOutOptions = obuilder.withShortName("o").withLongName("outoptions")
            .withDescription("output options: json, del and xml or an output IO descriptor. "
                    + "This option is ignored when not running in batch mode.")
            .withArgument(abuilder.withName("outoptions").withMinimum(1).withMaximum(1).create()).create();

    Option optEval = obuilder.withShortName("e").withLongName("eval")
            .withDescription("evaluate Jaql expression")
            .withArgument(abuilder.withName("expr").withMinimum(1).withMaximum(1).create()).create();

    // create mini-cluster options
    Option optCluster = obuilder.withShortName("c").withLongName("cluster")
            .withDescription("use existing cluster (i.e., do not launch a mini-cluster)").create();

    Option optNumNodes = obuilder.withShortName("n").withLongName("no-nodes")
            .withDescription("mini-cluster option: number of nodes to spawn")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1)
                    .withValidator(NumberValidator.getIntegerInstance()).create())
            .create();

    Option optDir = obuilder.withShortName("d").withLongName("hdfs-dir")
            .withDescription("mini-cluster option: root HDFs directory")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create();

    Group clusterOptions = gbuilder.withName("Cluster options").withOption(optCluster).withOption(optDir)
            .withOption(optNumNodes).create();

    // create input files option
    Option optInputFiles = abuilder.withName("file").withDescription("list of input files").withMinimum(0)
            .create();

    Option optLog = obuilder.withShortName("l").withLongName("log")
            .withDescription("log options: json, del and xml or an output IO descriptor. ")
            .withArgument(abuilder.withName("arg").withMinimum(1).withMaximum(1).create()).create();

    // combine all options
    Group options = gbuilder.withName("options").withOption(optHelp).withOption(optJars)
            .withOption(optSearchPath).withOption(optBatch).withOption(optLog).withOption(optOutOptions)
            .withOption(optEval).withOption(optInputFiles).withOption(clusterOptions).create();

    // parse and print help if necessary
    CommandLine cl;
    try {
        Parser parser = new Parser();
        parser.setGroup(options);
        cl = parser.parse(args);
    } catch (OptionException e) {
        printHelpAndExit(e, null, options);
        return null;
    }
    if (cl.hasOption(optHelp)) {
        printHelpAndExit(null, options);
    }

    // validate arguments
    JaqlShellArguments result = new JaqlShellArguments();

    // mini-cluster options         
    if (cl.hasOption(optCluster)) {
        result.useExistingCluster = true;
    }
    if (cl.hasOption(optDir)) {
        if (result.useExistingCluster) {
            printHelpAndExit("Options " + optCluster.getPreferredName() + " and " + optDir.getPreferredName()
                    + " are mutually exclusive", options);
        }
        result.hdfsDir = (String) cl.getValue(optDir);
    }
    if (cl.hasOption(optNumNodes)) {
        if (result.useExistingCluster) {
            printHelpAndExit("Options " + optCluster.getPreferredName() + " and "
                    + optNumNodes.getPreferredName() + " are mutually exclusive", options);
        }
        result.numNodes = ((Number) cl.getValue(optNumNodes)).intValue();
    }

    // jar files
    if (cl.hasOption(optJars)) {
        result.jars = ((String) cl.getValue(optJars)).split(",");
        for (String jar : result.jars) {
            if (!new File(jar).exists()) {
                printHelpAndExit("Jar file " + jar + " not found", options);
            }
        }
    }

    // search path directories
    if (cl.hasOption(optSearchPath)) {
        result.searchPath = ((String) cl.getValue(optSearchPath)).split(":");
        for (String dir : result.searchPath) {
            if (!new File(dir).exists() || !new File(dir).isDirectory()) {
                printHelpAndExit("Search-path entry " + dir + " not found or is no directory", options);
            }
        }
    }

    if (cl.hasOption(optBatch)) {
        result.batchMode = true;
        if (cl.hasOption(optOutOptions)) {
            String format = (String) cl.getValue(optOutOptions);
            try {
                result.outputAdapter = getOutputAdapter(format);
            } catch (Exception e) {
                printHelpAndExit(e,
                        "\"" + format + "\" is neither a valid output format nor a valid IO descriptor",
                        options);
            }
        }
    }

    // input
    if (cl.hasOption(optEval)) {
        String eval = (String) cl.getValue(optEval);
        if (!eval.endsWith(";"))
            eval += ";";
        result.addInputStream(new ByteArrayInputStream(eval.getBytes()));
    }
    if (cl.hasOption(optInputFiles)) {
        List<String> files = (List<String>) cl.getValues(optInputFiles);
        for (String file : files) {
            try {
                result.addInputStream(new FileInputStream(file));
            } catch (FileNotFoundException e) {
                printHelpAndExit(e, "Input file " + file + " not found", options);
            }
        }
    }

    // error log
    if (cl.hasOption(optLog)) {
        String path = (String) cl.getValue(optLog);
        try {
            BufferedJsonRecord logFD = new BufferedJsonRecord();
            logFD.add(Adapter.TYPE_NAME, new JsonString("local"));
            logFD.add(Adapter.LOCATION_NAME, new JsonString(path));
            OutputAdapter oa = (OutputAdapter) JaqlUtil.getAdapterStore().output.getAdapter(logFD);
            result.logAdapter = oa;
        } catch (Exception e) {
            printHelpAndExit(e, "\"" + path + "\" invalid", options);
        }
    }

    if (!result.batchMode) {
        result.addStdin();
    }

    return result;
}

From source file:haflow.component.mahout.logistic.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();
    Option outputFileOption = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to store predicting data").create();
    Option accurateFileOption = builder.withLongName("accurate").withRequired(true)
            .withArgument(argumentBuilder.withName("accurate").withMaximum(1).create())
            .withDescription("where to store accurate information").create();
    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(outputFileOption).withOption(accurateFileOption).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);//from   w  w w .  j a v a2 s . co m
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    outputFile = getStringArgument(cmdLine, outputFileOption);
    accurateFile = getStringArgument(cmdLine, accurateFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);

    return true;
}

From source file:com.ml.ira.algos.RunLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();

    Option auc = builder.withLongName("auc").withDescription("print AUC").create();
    Option confusion = builder.withLongName("confusion").withDescription("print confusion matrix").create();

    Option scores = builder.withLongName("scores").withDescription("print scores").create();

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    Option fieldNames = builder.withLongName("fdnames").withRequired(true)
            .withArgument(argumentBuilder.withName("fns").create())
            .withDescription("the field names of training data set").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(auc).withOption(scores)
            .withOption(confusion).withOption(inputFileOption).withOption(modelFileOption)
            .withOption(fieldNames).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);/*from ww  w  .j av a2 s.c  om*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = getStringArgument(cmdLine, inputFileOption);
    modelFile = getStringArgument(cmdLine, modelFileOption);
    showAuc = getBooleanArgument(cmdLine, auc);
    showScores = getBooleanArgument(cmdLine, scores);
    showConfusion = getBooleanArgument(cmdLine, confusion);
    RunLogistic.fieldNames = getStringArgument(cmdLine, fieldNames);

    System.out.println("inputFile: " + inputFile);
    System.out.println("modelFile: " + modelFile);
    System.out.println("fieldNames: " + RunLogistic.fieldNames);

    return true;
}

From source file:com.memonews.mahout.sentiment.SentimentModelTester.java

boolean parseArgs(final String[] args) {
    final DefaultOptionBuilder builder = new DefaultOptionBuilder();

    final Option help = builder.withLongName("help").withDescription("print this list").create();

    final ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    final Option inputFileOption = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    final Option modelFileOption = builder.withLongName("model").withRequired(true)
            .withArgument(argumentBuilder.withName("model").withMaximum(1).create())
            .withDescription("where to get a model").create();

    final Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption)
            .withOption(modelFileOption).create();

    final Parser parser = new Parser();
    parser.setHelpOption(help);/*www  .ja  va  2  s.c o  m*/
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    final CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    inputFile = (String) cmdLine.getValue(inputFileOption);
    modelFile = (String) cmdLine.getValue(modelFileOption);
    return true;
}

From source file:haflow.component.mahout.logistic.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//from  w  w w  .j av a2s  .c o m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();
    Option inforFile = builder.withLongName("infor").withRequired(true)
            .withArgument(argumentBuilder.withName("infor").withMaximum(1).create())
            .withDescription("where to store information about the training").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(inforFile).withOption(target).withOption(targetCategories)
            .withOption(predictors).withOption(types).withOption(passes).withOption(lambda).withOption(rate)
            .withOption(noBias).withOption(features).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);
    TrainLogistic.inforFile = getStringArgument(cmdLine, inforFile);

    List<String> typeList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(types)) {
        typeList.add(x.toString());
    }

    List<String> predictorList = Lists.newArrayList();
    for (Object x : cmdLine.getValues(predictors)) {
        predictorList.add(x.toString());
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);

    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);
    System.out.print("infor:" + TrainLogistic.inforFile);

    System.out.println("target:" + getStringArgument(cmdLine, target));
    System.out.println("targetCategories:" + String.valueOf(getStringArgument(cmdLine, targetCategories)));
    System.out.println("features:" + String.valueOf(getStringArgument(cmdLine, features)));
    System.out.println("lambda:" + String.valueOf(getStringArgument(cmdLine, lambda)));
    System.out.println("rate:" + String.valueOf(getStringArgument(cmdLine, rate)));
    return true;
}

From source file:com.ml.ira.algos.TrainLogistic.java

private static boolean parseArgs(String[] args) {
    DefaultOptionBuilder builder = new DefaultOptionBuilder();

    Option help = builder.withLongName("help").withDescription("print this list").create();

    Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create();
    Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training")
            .create();//w ww.j  a v  a2  s  .co  m

    ArgumentBuilder argumentBuilder = new ArgumentBuilder();
    Option inputFile = builder.withLongName("input").withRequired(true)
            .withArgument(argumentBuilder.withName("input").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option outputFile = builder.withLongName("output").withRequired(true)
            .withArgument(argumentBuilder.withName("output").withMaximum(1).create())
            .withDescription("where to get training data").create();

    Option predictors = builder.withLongName("predictors").withRequired(true)
            .withArgument(argumentBuilder.withName("p").create())
            .withDescription("a list of predictor variables").create();

    Option types = builder.withLongName("types").withRequired(true)
            .withArgument(argumentBuilder.withName("t").create())
            .withDescription("a list of predictor variable types (numeric, word, or text)").create();

    Option target = builder.withLongName("target").withRequired(true)
            .withArgument(argumentBuilder.withName("target").withMaximum(1).create())
            .withDescription("the name of the target variable").create();

    Option features = builder.withLongName("features")
            .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create())
            .withDescription("the number of internal hashed features to use").create();

    Option passes = builder.withLongName("passes")
            .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create())
            .withDescription("the number of times to pass over the input data").create();

    Option lambda = builder.withLongName("lambda")
            .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create())
            .withDescription("the amount of coefficient decay to use").create();

    Option rate = builder.withLongName("rate")
            .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create())
            .withDescription("the learning rate").create();

    Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create();

    Option targetCategories = builder.withLongName("categories").withRequired(true)
            .withArgument(argumentBuilder.withName("number").withMaximum(1).create())
            .withDescription("the number of target categories to be considered").create();

    Option fieldNames = builder.withLongName("fdnames").withRequired(true)
            .withArgument(argumentBuilder.withName("fns").create())
            .withDescription("the field names of training data set").create();

    Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile)
            .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors)
            .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias)
            .withOption(features).withOption(fieldNames).create();

    Parser parser = new Parser();
    parser.setHelpOption(help);
    parser.setHelpTrigger("--help");
    parser.setGroup(normalArgs);
    parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
    CommandLine cmdLine = parser.parseAndHelp(args);

    if (cmdLine == null) {
        return false;
    }

    TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile);
    TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile);
    TrainLogistic.fieldNames = getStringArgument(cmdLine, fieldNames);

    List<String> typeList = Lists.newArrayList();
    String tmp = getStringArgument(cmdLine, types);
    if (tmp != null) {
        typeList.addAll(Arrays.asList(tmp.split(",")));
    }

    tmp = getStringArgument(cmdLine, predictors);
    List<String> predictorList = Lists.newArrayList();
    if (tmp != null) {
        predictorList.addAll(Arrays.asList(tmp.split(",")));
    }

    lmp = new LogisticModelParameters();
    lmp.setTargetVariable(getStringArgument(cmdLine, target));
    lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories));
    lmp.setNumFeatures(getIntegerArgument(cmdLine, features));
    lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
    lmp.setTypeMap(predictorList, typeList);
    lmp.setFieldNames(TrainLogistic.fieldNames);
    lmp.setLambda(getDoubleArgument(cmdLine, lambda));
    lmp.setLearningRate(getDoubleArgument(cmdLine, rate));

    TrainLogistic.scores = getBooleanArgument(cmdLine, scores);
    TrainLogistic.passes = getIntegerArgument(cmdLine, passes);

    System.out.println("@Train inputFile: " + TrainLogistic.inputFile);
    System.out.println("@Train fieldNames: " + TrainLogistic.fieldNames);
    System.out.println("@Train typeList: " + typeList);
    System.out.println("@Train predictorList: " + predictorList);

    return true;
}