List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder
public DefaultOptionBuilder()
From source file:com.digitalpebble.behemoth.tika.TikaDriver.java
private Option buildOption(String name, String shortName, String description, boolean hasArg, boolean required, String defaultValue) {/*from www. j a v a 2 s . c o m*/ DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description) .withRequired(required); if (shortName != null) { optBuilder.withShortName(shortName); } if (hasArg) { ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1); if (defaultValue != null) { argBuilder = argBuilder.withDefault(defaultValue); } optBuilder.withArgument(argBuilder.create()); } return optBuilder.create(); }
From source file:com.tamingtext.util.SplitInput.java
/** Configure this instance based on the command-line arguments contained within provided array. * Calls {@link #validate()} to ensure consistency of configuration. * //from w w w . j a va 2 s. co m * @return true if the arguments were parsed successfully and execution should proceed. * @throws Exception if there is a problem parsing the command-line arguments or the particular * combination would violate class invariants. */ public boolean parseArgs(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("inputDir").withRequired(true) .withArgument(abuilder.withName("inputDir").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory").withShortName("i").create(); Option trainingOutputDirOpt = obuilder.withLongName("trainingOutputDir").withRequired(true) .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()) .withDescription("The training data output directory").withShortName("tr").create(); Option testOutputDirOpt = obuilder.withLongName("testOutputDir").withRequired(true) .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()) .withDescription("The test data output directory").withShortName("te").create(); Option testSplitSizeOpt = obuilder.withLongName("testSplitSize").withRequired(false) .withArgument(abuilder.withName("splitSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of documents held back as test data for each category") .withShortName("ss").create(); Option testSplitPctOpt = obuilder.withLongName("testSplitPct").withRequired(false) .withArgument(abuilder.withName("splitPct").withMinimum(1).withMaximum(1).create()) .withDescription("The percentage of documents held back as test data for each category") .withShortName("sp").create(); Option splitLocationOpt = obuilder.withLongName("splitLocation").withRequired(false) .withArgument(abuilder.withName("splitLoc").withMinimum(1).withMaximum(1).create()) .withDescription( "Location for start of test data expressed as a percentage of the input file size (0=start, 50=middle, 100=end") .withShortName("sl").create(); Option randomSelectionSizeOpt = obuilder.withLongName("randomSelectionSize").withRequired(false) .withArgument(abuilder.withName("randomSize").withMinimum(1).withMaximum(1).create()) .withDescription("The number of itemr to be randomly selected as test data ").withShortName("rs") .create(); Option randomSelectionPctOpt = obuilder.withLongName("randomSelectionPct").withRequired(false) .withArgument(abuilder.withName("randomPct").withMinimum(1).withMaximum(1).create()) .withDescription("Percentage of items to be randomly selected as test data ").withShortName("rp") .create(); Option charsetOpt = obuilder.withLongName("charset").withRequired(true) .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create()) .withDescription("The name of the character encoding of the input files").withShortName("c") .create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(trainingOutputDirOpt) .withOption(testOutputDirOpt).withOption(testSplitSizeOpt).withOption(testSplitPctOpt) .withOption(splitLocationOpt).withOption(randomSelectionSizeOpt).withOption(randomSelectionPctOpt) .withOption(charsetOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return false; } inputDirectory = new Path((String) cmdLine.getValue(inputDirOpt)); trainingOutputDirectory = new Path((String) cmdLine.getValue(trainingOutputDirOpt)); testOutputDirectory = new Path((String) cmdLine.getValue(testOutputDirOpt)); charset = Charset.forName((String) cmdLine.getValue(charsetOpt)); if (cmdLine.hasOption(testSplitSizeOpt) && cmdLine.hasOption(testSplitPctOpt)) { throw new OptionException(testSplitSizeOpt, "must have either split size or split percentage option, not BOTH"); } else if (!cmdLine.hasOption(testSplitSizeOpt) && !cmdLine.hasOption(testSplitPctOpt)) { throw new OptionException(testSplitSizeOpt, "must have either split size or split percentage option"); } if (cmdLine.hasOption(testSplitSizeOpt)) { setTestSplitSize(Integer.parseInt((String) cmdLine.getValue(testSplitSizeOpt))); } if (cmdLine.hasOption(testSplitPctOpt)) { setTestSplitPct(Integer.parseInt((String) cmdLine.getValue(testSplitPctOpt))); } if (cmdLine.hasOption(splitLocationOpt)) { setSplitLocation(Integer.parseInt((String) cmdLine.getValue(splitLocationOpt))); } if (cmdLine.hasOption(randomSelectionSizeOpt)) { setTestRandomSelectionSize(Integer.parseInt((String) cmdLine.getValue(randomSelectionSizeOpt))); } if (cmdLine.hasOption(randomSelectionPctOpt)) { setTestRandomSelectionPct(Integer.parseInt((String) cmdLine.getValue(randomSelectionPctOpt))); } fs.mkdirs(trainingOutputDirectory); fs.mkdirs(testOutputDirectory); } catch (OptionException e) { log.error("Command-line option Exception", e); CommandLineUtil.printHelp(group); return false; } validate(); return true; }
From source file:it.jnrpe.client.JNRPEClient.java
/** * Configures the command line parser./*w w w .j a va 2 s. c o m*/ * * @return The command line parser configuration */ private static Group configureCommandLine() { DefaultOptionBuilder oBuilder = new DefaultOptionBuilder(); ArgumentBuilder aBuilder = new ArgumentBuilder(); GroupBuilder gBuilder = new GroupBuilder(); DefaultOption nosslOption = oBuilder.withLongName("nossl").withShortName("n") .withDescription("Do no use SSL").create(); DefaultOption weakSslOption = oBuilder.withLongName("weakCiherSuites").withShortName("w") .withDescription("Enable weak cipher suites").create(); DefaultOption unknownOption = oBuilder.withLongName("unknown").withShortName("u") .withDescription("Make socket timeouts return an UNKNOWN state instead of CRITICAL").create(); DefaultOption hostOption = oBuilder.withLongName("host").withShortName("H") .withDescription("The address of the host running the JNRPE/NRPE daemon") .withArgument(aBuilder.withName("host").withMinimum(1).withMaximum(1).create()).create(); NumberValidator positiveInt = NumberValidator.getIntegerInstance(); positiveInt.setMinimum(0); DefaultOption portOption = oBuilder.withLongName("port").withShortName("p") .withDescription("The port on which the daemon is running (default=5666)") .withArgument(aBuilder.withName("port").withMinimum(1).withMaximum(1) .withDefault(Long.valueOf(DEFAULT_PORT)).withValidator(positiveInt).create()) .create(); DefaultOption timeoutOption = oBuilder.withLongName("timeout").withShortName("t") .withDescription("Number of seconds before connection times out (default=10)") .withArgument(aBuilder.withName("timeout").withMinimum(1).withMaximum(1) .withDefault(Long.valueOf(DEFAULT_TIMEOUT)).withValidator(positiveInt).create()) .create(); DefaultOption commandOption = oBuilder.withLongName("command").withShortName("c") .withDescription("The name of the command that the remote daemon should run") .withArgument(aBuilder.withName("command").withMinimum(1).withMaximum(1).create()).create(); DefaultOption argsOption = oBuilder.withLongName("arglist").withShortName("a").withDescription( "Optional arguments that should be passed to the command. Multiple arguments should be separated by " + "a space (' '). If provided, this must be the last option supplied on the command line.") .withArgument(aBuilder.withName("arglist").withMinimum(1).create()).create(); DefaultOption helpOption = oBuilder.withLongName("help").withShortName("h") .withDescription("Shows this help").create(); Group executionOption = gBuilder.withOption(nosslOption).withOption(weakSslOption).withOption(unknownOption) .withOption(hostOption).withOption(portOption).withOption(timeoutOption).withOption(commandOption) .withOption(argsOption).create(); return gBuilder.withOption(executionOption).withOption(helpOption).withMinimum(1).withMaximum(1).create(); }
From source file:it.jnrpe.plugins.PluginOption.java
/** * Convert this {@link PluginOption} to the Option required by Apache. * Commons Cli./* www. jav a2 s .c o m*/ * * @return The option object required by commons cli */ public Option toOption() { DefaultOptionBuilder oBuilder = new DefaultOptionBuilder(); oBuilder.withShortName(option).withDescription(description).withRequired(required); if (longOptionName != null) { oBuilder.withLongName(longOptionName); } if (hasArgs) { ArgumentBuilder aBuilder = new ArgumentBuilder(); if (argName != null) { aBuilder.withName(argName); } if (argsAreOptional) { aBuilder.withMinimum(0); } if (argsCount != null) { aBuilder.withMaximum(argsCount); } else { aBuilder.withMaximum(1); } if (argsValueSeparator != null && argsValueSeparator.length() != 0) { aBuilder.withInitialSeparator(argsValueSeparator.charAt(0)); aBuilder.withSubsequentSeparator(argsValueSeparator.charAt(0)); } oBuilder.withArgument(aBuilder.create()); } return oBuilder.create(); }
From source file:com.gsinnovations.howdah.AbstractJob.java
/** Build an option with the given parameters. Name and description are * required.//from w ww .j a v a 2 s . c o m * * @param name the long name of the option prefixed with '--' on the command-line * @param shortName the short name of the option, prefixed with '-' on the command-line * @param description description of the option displayed in help method * @param hasArg true if the option has an argument. * @param required true if the option is required. * @param defaultValue default argument value, can be null. * @return the option. */ private static Option buildOption(String name, String shortName, String description, boolean hasArg, boolean required, String defaultValue) { DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description) .withRequired(required); if (shortName != null) { optBuilder.withShortName(shortName); } if (hasArg) { ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1); if (defaultValue != null) { argBuilder = argBuilder.withDefault(defaultValue); } optBuilder.withArgument(argBuilder.create()); } return optBuilder.create(); }
From source file:TrainLogistic.java
private static boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); Option quiet = builder.withLongName("quiet").withDescription("be extra quiet").create(); Option scores = builder.withLongName("scores").withDescription("output score diagnostics during training") .create();/*w ww .ja v a2 s . c o m*/ ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFile = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data").create(); Option outputFile = builder.withLongName("output").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to get training data").create(); Option predictors = builder.withLongName("predictors").withRequired(true) .withArgument(argumentBuilder.withName("p").create()) .withDescription("a list of predictor variables").create(); Option types = builder.withLongName("types").withRequired(true) .withArgument(argumentBuilder.withName("t").create()) .withDescription("a list of predictor variable types (numeric, word, or text)").create(); Option target = builder.withLongName("target").withRequired(true) .withArgument(argumentBuilder.withName("target").withMaximum(1).create()) .withDescription("the name of the target variable").create(); Option features = builder.withLongName("features") .withArgument(argumentBuilder.withName("numFeatures").withDefault("1000").withMaximum(1).create()) .withDescription("the number of internal hashed features to use").create(); Option passes = builder.withLongName("passes") .withArgument(argumentBuilder.withName("passes").withDefault("2").withMaximum(1).create()) .withDescription("the number of times to pass over the input data").create(); Option lambda = builder.withLongName("lambda") .withArgument(argumentBuilder.withName("lambda").withDefault("1e-4").withMaximum(1).create()) .withDescription("the amount of coefficient decay to use").create(); Option rate = builder.withLongName("rate") .withArgument(argumentBuilder.withName("learningRate").withDefault("1e-3").withMaximum(1).create()) .withDescription("the learning rate").create(); Option noBias = builder.withLongName("noBias").withDescription("don't include a bias term").create(); Option targetCategories = builder.withLongName("categories").withRequired(true) .withArgument(argumentBuilder.withName("number").withMaximum(1).create()) .withDescription("the number of target categories to be considered").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(quiet).withOption(inputFile) .withOption(outputFile).withOption(target).withOption(targetCategories).withOption(predictors) .withOption(types).withOption(passes).withOption(lambda).withOption(rate).withOption(noBias) .withOption(features).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { System.out.println(args); return false; } TrainLogistic.inputFile = getStringArgument(cmdLine, inputFile); TrainLogistic.outputFile = getStringArgument(cmdLine, outputFile); List<String> typeList = Lists.newArrayList(); for (Object x : cmdLine.getValues(types)) { typeList.add(x.toString()); } List<String> predictorList = Lists.newArrayList(); for (Object x : cmdLine.getValues(predictors)) { predictorList.add(x.toString()); } lmp = new LogisticModelParameters(); lmp.setTargetVariable(getStringArgument(cmdLine, target)); lmp.setMaxTargetCategories(getIntegerArgument(cmdLine, targetCategories)); lmp.setNumFeatures(getIntegerArgument(cmdLine, features)); lmp.setUseBias(!getBooleanArgument(cmdLine, noBias)); lmp.setTypeMap(predictorList, typeList); lmp.setLambda(getDoubleArgument(cmdLine, lambda)); lmp.setLearningRate(getDoubleArgument(cmdLine, rate)); TrainLogistic.scores = getBooleanArgument(cmdLine, scores); TrainLogistic.passes = getIntegerArgument(cmdLine, passes); return true; }
From source file:my.mahout.AbstractJob.java
protected static Option buildOption(String name, String shortName, String description, boolean hasArg, int min, int max, boolean required, String defaultValue) { DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description) .withRequired(required);// w w w . jav a 2 s .com if (shortName != null) { optBuilder.withShortName(shortName); } if (hasArg) { ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(min).withMaximum(max); if (defaultValue != null) { argBuilder = argBuilder.withDefault(defaultValue); } optBuilder.withArgument(argBuilder.create()); } return optBuilder.create(); }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
public static int main2(String[] args, Configuration conf) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option helpOpt = DefaultOptionCreator.helpOption(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Directory on HDFS containing the collapsed, properly formatted files having " + "one doc per line") .withShortName("i").create(); Option dictOpt = obuilder.withLongName("dictionary").withRequired(false) .withArgument(abuilder.withName("dictionary").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the term-dictionary format is ... ").withShortName("d").create(); Option dfsOpt = obuilder.withLongName("dfs").withRequired(false) .withArgument(abuilder.withName("dfs").withMinimum(1).withMaximum(1).create()) .withDescription("HDFS namenode URI").withShortName("dfs").create(); Option numTopicsOpt = obuilder.withLongName("numTopics").withRequired(true) .withArgument(abuilder.withName("numTopics").withMinimum(1).withMaximum(1).create()) .withDescription("Number of topics to learn").withShortName("top").create(); Option outputTopicFileOpt = obuilder.withLongName("topicOutputFile").withRequired(true) .withArgument(abuilder.withName("topicOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(term | topic)").withShortName("to").create(); Option outputDocFileOpt = obuilder.withLongName("docOutputFile").withRequired(true) .withArgument(abuilder.withName("docOutputFile").withMinimum(1).withMaximum(1).create()) .withDescription("File to write out p(topic | docid)").withShortName("do").create(); Option alphaOpt = obuilder.withLongName("alpha").withRequired(false) .withArgument(abuilder.withName("alpha").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(topic | document) prior").withShortName("a").create(); Option etaOpt = obuilder.withLongName("eta").withRequired(false) .withArgument(abuilder.withName("eta").withMinimum(1).withMaximum(1).withDefault("0.1").create()) .withDescription("Smoothing parameter for p(term | topic)").withShortName("e").create(); Option maxIterOpt = obuilder.withLongName("maxIterations").withRequired(false) .withArgument(/* w ww .j ava 2 s . c o m*/ abuilder.withName("maxIterations").withMinimum(1).withMaximum(1).withDefault(10).create()) .withDescription("Maximum number of training passes").withShortName("m").create(); Option modelCorpusFractionOption = obuilder.withLongName("modelCorpusFraction").withRequired(false) .withArgument(abuilder.withName("modelCorpusFraction").withMinimum(1).withMaximum(1) .withDefault(0.0).create()) .withShortName("mcf").withDescription("For online updates, initial value of |model|/|corpus|") .create(); Option burnInOpt = obuilder.withLongName("burnInIterations").withRequired(false) .withArgument( abuilder.withName("burnInIterations").withMinimum(1).withMaximum(1).withDefault(5).create()) .withDescription("Minimum number of iterations").withShortName("b").create(); Option convergenceOpt = obuilder.withLongName("convergence").withRequired(false) .withArgument( abuilder.withName("convergence").withMinimum(1).withMaximum(1).withDefault("0.0").create()) .withDescription("Fractional rate of perplexity to consider convergence").withShortName("c") .create(); Option reInferDocTopicsOpt = obuilder.withLongName("reInferDocTopics").withRequired(false) .withArgument(abuilder.withName("reInferDocTopics").withMinimum(1).withMaximum(1).withDefault("no") .create()) .withDescription("re-infer p(topic | doc) : [no | randstart | continue]").withShortName("rdt") .create(); Option numTrainThreadsOpt = obuilder .withLongName("numTrainThreads").withRequired(false).withArgument(abuilder .withName("numTrainThreads").withMinimum(1).withMaximum(1).withDefault("1").create()) .withDescription("number of threads to train with").withShortName("ntt").create(); Option numUpdateThreadsOpt = obuilder.withLongName("numUpdateThreads").withRequired(false) .withArgument(abuilder.withName("numUpdateThreads").withMinimum(1).withMaximum(1).withDefault("1") .create()) .withDescription("number of threads to update the model with").withShortName("nut").create(); Option verboseOpt = obuilder.withLongName("verbose").withRequired(false) .withArgument( abuilder.withName("verbose").withMinimum(1).withMaximum(1).withDefault("false").create()) .withDescription("print verbose information, like top-terms in each topic, during iteration") .withShortName("v").create(); Group group = gbuilder.withName("Options").withOption(inputDirOpt).withOption(numTopicsOpt) .withOption(alphaOpt).withOption(etaOpt).withOption(maxIterOpt).withOption(burnInOpt) .withOption(convergenceOpt).withOption(dictOpt).withOption(reInferDocTopicsOpt) .withOption(outputDocFileOpt).withOption(outputTopicFileOpt).withOption(dfsOpt) .withOption(numTrainThreadsOpt).withOption(numUpdateThreadsOpt) .withOption(modelCorpusFractionOption).withOption(verboseOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return -1; } String inputDirString = (String) cmdLine.getValue(inputDirOpt); String dictDirString = cmdLine.hasOption(dictOpt) ? (String) cmdLine.getValue(dictOpt) : null; int numTopics = Integer.parseInt((String) cmdLine.getValue(numTopicsOpt)); double alpha = Double.parseDouble((String) cmdLine.getValue(alphaOpt)); double eta = Double.parseDouble((String) cmdLine.getValue(etaOpt)); int maxIterations = Integer.parseInt((String) cmdLine.getValue(maxIterOpt)); int burnInIterations = (Integer) cmdLine.getValue(burnInOpt); double minFractionalErrorChange = Double.parseDouble((String) cmdLine.getValue(convergenceOpt)); int numTrainThreads = Integer.parseInt((String) cmdLine.getValue(numTrainThreadsOpt)); int numUpdateThreads = Integer.parseInt((String) cmdLine.getValue(numUpdateThreadsOpt)); String topicOutFile = (String) cmdLine.getValue(outputTopicFileOpt); String docOutFile = (String) cmdLine.getValue(outputDocFileOpt); String reInferDocTopics = (String) cmdLine.getValue(reInferDocTopicsOpt); boolean verbose = Boolean.parseBoolean((String) cmdLine.getValue(verboseOpt)); double modelCorpusFraction = (Double) cmdLine.getValue(modelCorpusFractionOption); long start = System.nanoTime(); if (conf.get("fs.default.name") == null) { String dfsNameNode = (String) cmdLine.getValue(dfsOpt); conf.set("fs.default.name", dfsNameNode); } String[] terms = loadDictionary(dictDirString, conf); logTime("dictionary loading", System.nanoTime() - start); start = System.nanoTime(); Matrix corpus = loadVectors(inputDirString, conf); logTime("vector seqfile corpus loading", System.nanoTime() - start); start = System.nanoTime(); InMemoryCollapsedVariationalBayes0 cvb0 = new InMemoryCollapsedVariationalBayes0(corpus, terms, numTopics, alpha, eta, numTrainThreads, numUpdateThreads, modelCorpusFraction, 1234); logTime("cvb0 init", System.nanoTime() - start); start = System.nanoTime(); cvb0.setVerbose(verbose); cvb0.iterateUntilConvergence(minFractionalErrorChange, maxIterations, burnInIterations); logTime("total training time", System.nanoTime() - start); if ("randstart".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, true); } else if ("continue".equalsIgnoreCase(reInferDocTopics)) { cvb0.inferDocuments(0.0, 100, false); } start = System.nanoTime(); cvb0.writeModel(new Path(topicOutFile)); DistributedRowMatrixWriter.write(new Path(docOutFile), conf, cvb0.docTopicCounts); logTime("printTopics", System.nanoTime() - start); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); } return 0; }
From source file:org.apache.mahout.avro.text.AvroDocumentsFromDirectory.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser p = new GenericOptionsParser(conf, args); args = p.getRemainingArgs();/*w ww. jav a 2 s. co m*/ DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option parentOpt = obuilder.withLongName("parent").withRequired(true) .withArgument(abuilder.withName("parent").withMinimum(1).withMaximum(1).create()) .withDescription("Parent dir containing the documents").withShortName("p").create(); Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true) .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option chunkSizeOpt = obuilder.withLongName("chunkSize") .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()) .withDescription("The chunkSize in documents. Defaults to " + DEFAULT_CHUNK_SIZE) .withShortName("chunk").create(); Option keyPrefixOpt = obuilder.withLongName("keyPrefix") .withArgument(abuilder.withName("keyPrefix").withMinimum(1).withMaximum(1).create()) .withDescription("The prefix to be prepended to the key").withShortName("prefix").create(); Option charsetOpt = obuilder.withLongName("charset").withRequired(true) .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create()) .withDescription("The name of the character encoding of the input files").withShortName("c") .create(); Group group = gbuilder.withName("Options").withOption(keyPrefixOpt).withOption(chunkSizeOpt) .withOption(charsetOpt).withOption(outputDirOpt).withOption(parentOpt).create(); Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); File parentDir = new File((String) cmdLine.getValue(parentOpt)); String outputDir = (String) cmdLine.getValue(outputDirOpt); int documentsPerChunk = DEFAULT_CHUNK_SIZE; if (cmdLine.hasOption(chunkSizeOpt)) { documentsPerChunk = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt)); } String prefix = ""; if (cmdLine.hasOption(keyPrefixOpt)) { prefix = (String) cmdLine.getValue(keyPrefixOpt); } Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt)); AvroDocumentsFromDirectory dir = new AvroDocumentsFromDirectory(); dir.createAvroDocuments(conf, parentDir, outputDir, prefix, documentsPerChunk, charset); }
From source file:org.apache.mahout.avro.text.mapred.WikipediaToAvroDocuments.java
/** * Takes in two arguments:/*from w ww . java 2 s.c om*/ * <ol> * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents * live</li> * <li>The output {@link org.apache.hadoop.fs.Path} where to write the * classifier as a {@link org.apache.hadoop.io.SequenceFile}</li> * </ol> * * @param args * The args */ public int run(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dirInputPathOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The input directory path").withShortName("i").create(); Option dirOutputPathOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory Path").withShortName("o").create(); Option categoriesOpt = obuilder.withLongName("categories") .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create()) .withDescription("Location of the categories file. One entry per line. " + "Will be used to make a string match in Wikipedia Category field") .withShortName("c").create(); Option exactMatchOpt = obuilder.withLongName("exactMatch") .withDescription("If set, then the category name must exactly match the " + "entry in the categories file. Default is false") .withShortName("e").create(); Option allOpt = obuilder.withLongName("all").withDescription("If set, Select all files. Default is false") .withShortName("all").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt) .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(allOpt).withOption(helpOpt) .create(); Parser parser = new Parser(); parser.setGroup(group); try { CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return 0; } String inputPath = (String) cmdLine.getValue(dirInputPathOpt); String outputPath = (String) cmdLine.getValue(dirOutputPathOpt); String catFile = ""; if (cmdLine.hasOption(categoriesOpt)) { catFile = (String) cmdLine.getValue(categoriesOpt); } boolean all = false; if (cmdLine.hasOption(allOpt)) { all = true; } runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); return 0; } return 1; }