List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder
public DefaultOptionBuilder()
From source file:org.apache.mahout.clustering.lda.LDAPrintTopics.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = DefaultOptionCreator.inputOption().create(); Option dictOpt = obuilder.withLongName("dict").withRequired(true) .withArgument(abuilder.withName("dict").withMinimum(1).withMaximum(1).create()) .withDescription("Dictionary to read in, in the same format as one created by " + "org.apache.mahout.utils.vectors.lucene.Driver") .withShortName("d").create(); Option outOpt = DefaultOptionCreator.outputOption().create(); Option wordOpt = obuilder.withLongName("words").withRequired(false) .withArgument(abuilder.withName("words").withMinimum(0).withMaximum(1).withDefault("20").create()) .withDescription("Number of words to print").withShortName("w").create(); Option dictTypeOpt = obuilder.withLongName("dictionaryType").withRequired(false) .withArgument(abuilder.withName("dictionaryType").withMinimum(1).withMaximum(1).create()) .withDescription("The dictionary file type (text|sequencefile)").withShortName("dt").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create();//from www. j av a 2 s . co m Group group = gbuilder.withName("Options").withOption(dictOpt).withOption(outOpt).withOption(wordOpt) .withOption(inputOpt).withOption(dictTypeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String input = cmdLine.getValue(inputOpt).toString(); String dictFile = cmdLine.getValue(dictOpt).toString(); int numWords = 20; if (cmdLine.hasOption(wordOpt)) { numWords = Integer.parseInt(cmdLine.getValue(wordOpt).toString()); } Configuration config = new Configuration(); String dictionaryType = "text"; if (cmdLine.hasOption(dictTypeOpt)) { dictionaryType = cmdLine.getValue(dictTypeOpt).toString(); } List<String> wordList; if ("text".equals(dictionaryType)) { wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile))); } else if ("sequencefile".equals(dictionaryType)) { wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, dictFile)); } else { throw new IllegalArgumentException("Invalid dictionary format"); } List<Queue<Pair<String, Double>>> topWords = topWordsForTopics(input, config, wordList, numWords); File output = null; if (cmdLine.hasOption(outOpt)) { output = new File(cmdLine.getValue(outOpt).toString()); if (!output.exists() && !output.mkdirs()) { throw new IOException("Could not create directory: " + output); } } printTopWords(topWords, output); } catch (OptionException e) { CommandLineUtil.printHelp(group); throw e; } }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
public static DefaultOptionBuilder debugOutputOption() { return new DefaultOptionBuilder().withLongName(DEBUG_OUTPUT).withShortName("debug") .withDescription("Output the whole vectors for debugging"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
public static DefaultOptionBuilder numReducersOption() { return new DefaultOptionBuilder().withLongName(NUM_REDUCERS).withRequired(false).withShortName("r") .withArgument(new ArgumentBuilder().withName(NUM_REDUCERS).withDefault("2").withMinimum(1) .withMaximum(1).create()) .withDescription("The number of reduce tasks. Defaults to 2"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
/** * Returns a default command line option for specifying the minimum cluster * size in MinHash clustering//from ww w .j a va2s.c o m */ public static DefaultOptionBuilder minClusterSizeOption() { return new DefaultOptionBuilder().withLongName(MIN_CLUSTER_SIZE).withRequired(false) .withArgument(new ArgumentBuilder().withName(MIN_CLUSTER_SIZE).withDefault("10").withMinimum(1) .withMaximum(1).create()) .withDescription("Minimum points inside a cluster").withShortName("mcs"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
/** * Returns a default command line option for specifying the type of hash to * use in MinHash clustering: Should one out of * ("linear","polynomial","murmur")/*from w ww.j a v a 2 s . c o m*/ */ public static DefaultOptionBuilder hashTypeOption() { return new DefaultOptionBuilder().withLongName(HASH_TYPE).withRequired(false) .withArgument(new ArgumentBuilder().withName(HASH_TYPE).withDefault("murmur").withMinimum(1) .withMaximum(1).create()) .withDescription("Type of hash function to use. Available types: (linear, polynomial, murmur) ") .withShortName("ht"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
/** * Returns a default command line option for specifying the min size of the * vector to hash Should one out of ("linear","polynomial","murmur") *//*from ww w .j a v a2s . c om*/ public static DefaultOptionBuilder minVectorSizeOption() { return new DefaultOptionBuilder().withLongName(MIN_VECTOR_SIZE).withRequired(false) .withArgument(new ArgumentBuilder().withName(MIN_VECTOR_SIZE).withDefault("5").withMinimum(1) .withMaximum(1).create()) .withDescription("Minimum size of vector to be hashed").withShortName("mvs"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
/** * Returns a default command line option for specifying the number of hash * functions to be used in MinHash clustering *//* ww w . ja v a 2 s . co m*/ public static DefaultOptionBuilder numHashFunctionsOption() { return new DefaultOptionBuilder().withLongName(NUM_HASH_FUNCTIONS).withRequired(false) .withArgument(new ArgumentBuilder().withName(NUM_HASH_FUNCTIONS).withDefault("10").withMinimum(1) .withMaximum(1).create()) .withDescription("Number of hash functions to be used").withShortName("nh"); }
From source file:org.apache.mahout.clustering.minhash.MinhashOptionCreator.java
/** * Returns a default command line option for specifying the number of key * groups to be used in MinHash clustering *//* w ww . j av a 2s .co m*/ public static DefaultOptionBuilder keyGroupsOption() { return new DefaultOptionBuilder().withLongName(KEY_GROUPS).withRequired(false) .withArgument(new ArgumentBuilder().withName(KEY_GROUPS).withDefault("2").withMinimum(1) .withMaximum(1).create()) .withDescription("Number of key groups to be used").withShortName("kg"); }
From source file:org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get seq files with the vectors (training set)").create(); Option testInputFileOption = builder.withLongName("testInput").withShortName("itest") .withArgument(argumentBuilder.withName("testInput").withMaximum(1).create()) .withDescription("where to get seq files with the vectors (test set)").create(); Option centroidsFileOption = builder.withLongName("centroids").withShortName("c").withRequired(true) .withArgument(argumentBuilder.withName("centroids").withMaximum(1).create()) .withDescription(/* w w w . java 2 s.c om*/ "where to get seq files with the centroids (from Mahout KMeans or StreamingKMeansDriver)") .create(); Option centroidsCompareFileOption = builder.withLongName("centroidsCompare").withShortName("cc") .withRequired(false) .withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create()) .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or " + "StreamingKMeansDriver)") .create(); Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to dump the CSV file with the results").create(); Option mahoutKMeansFormatOption = builder.withLongName("mahoutkmeansformat").withShortName("mkm") .withDescription("if set, read files as (IntWritable, ClusterWritable) pairs") .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create(); Option mahoutKMeansCompareFormatOption = builder.withLongName("mahoutkmeansformatCompare") .withShortName("mkmc").withDescription("if set, read files as (IntWritable, ClusterWritable) pairs") .withArgument(argumentBuilder.withName("numpoints").withMaximum(1).create()).create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption) .withOption(testInputFileOption).withOption(outputFileOption).withOption(centroidsFileOption) .withOption(centroidsCompareFileOption).withOption(mahoutKMeansFormatOption) .withOption(mahoutKMeansCompareFormatOption).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 150)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } trainFile = (String) cmdLine.getValue(inputFileOption); if (cmdLine.hasOption(testInputFileOption)) { testFile = (String) cmdLine.getValue(testInputFileOption); } centroidFile = (String) cmdLine.getValue(centroidsFileOption); if (cmdLine.hasOption(centroidsCompareFileOption)) { centroidCompareFile = (String) cmdLine.getValue(centroidsCompareFileOption); } outputFile = (String) cmdLine.getValue(outputFileOption); if (cmdLine.hasOption(mahoutKMeansFormatOption)) { mahoutKMeansFormat = true; } if (cmdLine.hasOption(mahoutKMeansCompareFormatOption)) { mahoutKMeansFormatCompare = true; } return true; }
From source file:org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFileOption = builder.withLongName("input").withShortName("i").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription(/*from ww w .ja va2 s .co m*/ "what the base folder for sequence files is (they all must have the same key/value type") .create(); Option outputFileOption = builder.withLongName("output").withShortName("o").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription( "the base name of the file split that the files will be split it; the i'th split has the " + "suffix -i") .create(); Option numSplitsOption = builder.withLongName("numSplits").withShortName("ns").withRequired(true) .withArgument(argumentBuilder.withName("numSplits").withMaximum(1).create()) .withDescription("how many splits to use for the given files").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFileOption) .withOption(outputFileOption).withOption(numSplitsOption).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } inputFile = (String) cmdLine.getValue(inputFileOption); outputFileBase = (String) cmdLine.getValue(outputFileOption); numSplits = Integer.parseInt((String) cmdLine.getValue(numSplitsOption)); return true; }