List of usage examples for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder
public DefaultOptionBuilder()
From source file:org.apache.mahout.regression.penalizedlinear.LinearRegularizePath.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFile = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription("where to get training data (CSV or white-spaced TEXT file)").create(); Option outputFile = builder.withLongName("output").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to get results").create(); Option dependent = builder.withLongName("dependent").withRequired(true) .withArgument(argumentBuilder.withName("dependent").withMinimum(1).withMaximum(1).create()) .withDescription("the dependent features").create(); Option independent = builder.withLongName("independent").withRequired(true) .withArgument(argumentBuilder.withName("independent").create()) .withDescription("the independent features").create(); Option interaction = builder.withLongName("interaction").withRequired(true) .withArgument(argumentBuilder.withName("interaction").withMinimum(0).create()) .withDescription(/* w ww .ja va2s . c o m*/ "the interactions of features, the format is: feature1:feature2 (identical features are OK)") .create(); Option bias = builder.withLongName("bias").withDescription("include a bias term").create(); Option lambda = builder.withLongName("lambda") .withArgument(argumentBuilder.withName("lambda").withDefault("0").withMinimum(1).create()) .withDescription("an increasing positive sequence of penalty coefficient, " + "with length n >= 0; if lambda is not specified, the sequence is chosen by algorithm.") .create(); Option alpha = builder.withLongName("alpha") .withArgument( argumentBuilder.withName("alpha").withDefault("1").withMinimum(1).withMaximum(1).create()) .withDescription("the elastic-net coefficient with default value 1 (LASSO)").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFile).withOption(outputFile) .withOption(dependent).withOption(independent).withOption(interaction).withOption(bias) .withOption(lambda).withOption(alpha).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } parameter = new LinearRegularizePathParameter(); parameter.numOfCV = 1; parameter.alpha = Float.parseFloat((String) cmdLine.getValue(alpha)); parameter.intercept = cmdLine.hasOption(bias); parameter.dependent = (String) cmdLine.getValue(dependent); String independentString = ""; for (Object x : cmdLine.getValues(independent)) { independentString += x.toString() + ","; } parameter.independent = independentString.substring(0, Math.max(independentString.length() - 1, 0)); String interactionString = ""; for (Object x : cmdLine.getValues(interaction)) { interactionString += x.toString() + ","; } parameter.interaction = interactionString.substring(0, Math.max(interactionString.length() - 1, 0)); if (!processLambda(parameter, cmdLine, lambda) || parameter.alpha < 0.0 || parameter.alpha > 1.0 || parameter.numOfCV < 1 || parameter.numOfCV > 20) { log.error( "please make sure the lambda sequence is positive and increasing, and 0.0 <= alphaValue <= 1.0 and 1 <= numofCV <= 20"); return false; } input = (String) cmdLine.getValue(inputFile); output = (String) cmdLine.getValue(outputFile); return true; }
From source file:org.apache.mahout.regression.penalizedlinear.PenalizedLinearDriver.java
private boolean parseArgs(String[] args) { DefaultOptionBuilder builder = new DefaultOptionBuilder(); Option help = builder.withLongName("help").withDescription("print this list").create(); ArgumentBuilder argumentBuilder = new ArgumentBuilder(); Option inputFile = builder.withLongName("input").withRequired(true) .withArgument(argumentBuilder.withName("input").withMaximum(1).create()) .withDescription(// w ww .java 2 s .c o m "where to get training data (Mahout sequence file of VectorWritable); in each line, the first element is response; rest are predictors.") .create(); Option outputFile = builder.withLongName("output").withRequired(true) .withArgument(argumentBuilder.withName("output").withMaximum(1).create()) .withDescription("where to get results").create(); Option lambda = builder.withLongName("lambda") .withArgument(argumentBuilder.withName("lambda").withDefault("0").withMinimum(1).create()) .withDescription("an increasing positive sequence of penalty coefficient, " + "with length n >= 0; if lambda is not specified, the sequence is chosen by algorithm.") .create(); Option alpha = builder.withLongName("alpha") .withArgument( argumentBuilder.withName("alpha").withDefault("1").withMinimum(1).withMaximum(1).create()) .withDescription("the elastic-net coefficient with default value 1 (LASSO)").create(); Option bias = builder.withLongName("bias").withDescription("include a bias term").create(); Option numOfCV = builder.withLongName("numOfCV") .withArgument( argumentBuilder.withName("numOfCV").withDefault("5").withMinimum(0).withMaximum(1).create()) .withDescription("number of cross validation, the rule of thumb is 5 or 10").create(); Group normalArgs = new GroupBuilder().withOption(help).withOption(inputFile).withOption(outputFile) .withOption(lambda).withOption(alpha).withOption(bias).withOption(numOfCV).create(); Parser parser = new Parser(); parser.setHelpOption(help); parser.setHelpTrigger("--help"); parser.setGroup(normalArgs); parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130)); CommandLine cmdLine = parser.parseAndHelp(args); if (cmdLine == null) { return false; } parameter = new PenalizedLinearParameter(); parameter.setNumOfCV(Integer.parseInt((String) cmdLine.getValue(numOfCV))); parameter.setAlpha(Float.parseFloat((String) cmdLine.getValue(alpha))); parameter.setIntercept(cmdLine.hasOption(bias)); if (!processLambda(parameter, cmdLine, lambda) || parameter.alpha < 0.0 || parameter.alpha > 1.0 || parameter.numOfCV < 1 || parameter.numOfCV > 20) { log.error( "please make sure the lambda sequence is positive and increasing, and 0.0 <= alphaValue <= 1.0 and 1 <= numOfCV <= 20"); return false; } input = (String) cmdLine.getValue(inputFile); output = (String) cmdLine.getValue(outputFile); return true; }
From source file:org.apache.mahout.text.SparseVectorsFromSequenceFiles.java
public static void main(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputDirOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("input dir containing the documents in sequence file format").withShortName("i") .create();//from w w w . ja v a2s . c o m Option outputDirOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory").withShortName("o").create(); Option minSupportOpt = obuilder.withLongName("minSupport") .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) Minimum Support. Default Value: 2").withShortName("s").create(); Option analyzerNameOpt = obuilder.withLongName("analyzerName") .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create()) .withDescription("The class name of the analyzer").withShortName("a").create(); Option chunkSizeOpt = obuilder.withLongName("chunkSize") .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()) .withDescription("The chunkSize in MegaBytes. 100-10000 MB").withShortName("chunk").create(); Option weightOpt = obuilder.withLongName("weight").withRequired(false) .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create()) .withDescription("The kind of weight to use. Currently TF or TFIDF").withShortName("wt").create(); Option minDFOpt = obuilder.withLongName("minDF").withRequired(false) .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()) .withDescription("The minimum document frequency. Default is 1").withShortName("md").create(); Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false) .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs for the DF. Can be used to remove really high frequency terms." + " Expressed as an integer between 0 and 100. Default is 99.") .withShortName("x").create(); Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false) .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional)The minimum Log Likelihood Ratio(Float) Default is " + LLRReducer.DEFAULT_MIN_LLR) .withShortName("ml").create(); Option numReduceTasksOpt = obuilder.withLongName("numReducers") .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) Number of reduce tasks. Default Value: 1").withShortName("nr") .create(); Option powerOpt = obuilder.withLongName("norm").withRequired(false) .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create()) .withDescription( "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm. " + "Must be greater or equal to 0. The default is not to normalize") .withShortName("n").create(); Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false) .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) The maximum size of ngrams to create" + " (2 = bigrams, 3 = trigrams, etc) Default Value:1") .withShortName("ng").create(); Option sequentialAccessVectorOpt = obuilder.withLongName("sequentialAccessVector").withRequired(false) .withDescription( "(Optional) Whether output vectors should be SequentialAccessVectors. If set true else false") .withShortName("seq").create(); Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false) .withDescription("If set, overwrite the output directory").withShortName("ow").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(minSupportOpt).withOption(analyzerNameOpt) .withOption(chunkSizeOpt).withOption(outputDirOpt).withOption(inputDirOpt).withOption(minDFOpt) .withOption(maxDFPercentOpt).withOption(weightOpt).withOption(powerOpt).withOption(minLLROpt) .withOption(numReduceTasksOpt).withOption(maxNGramSizeOpt).withOption(overwriteOutput) .withOption(helpOpt).withOption(sequentialAccessVectorOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path inputDir = new Path((String) cmdLine.getValue(inputDirOpt)); Path outputDir = new Path((String) cmdLine.getValue(outputDirOpt)); int chunkSize = 100; if (cmdLine.hasOption(chunkSizeOpt)) { chunkSize = Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt)); } int minSupport = 2; if (cmdLine.hasOption(minSupportOpt)) { String minSupportString = (String) cmdLine.getValue(minSupportOpt); minSupport = Integer.parseInt(minSupportString); } int maxNGramSize = 1; if (cmdLine.hasOption(maxNGramSizeOpt)) { try { maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString()); } catch (NumberFormatException ex) { log.warn("Could not parse ngram size option"); } } log.info("Maximum n-gram size is: {}", maxNGramSize); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(outputDir); } float minLLRValue = LLRReducer.DEFAULT_MIN_LLR; if (cmdLine.hasOption(minLLROpt)) { minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString()); } log.info("Minimum LLR value: {}", minLLRValue); int reduceTasks = 1; if (cmdLine.hasOption(numReduceTasksOpt)) { reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString()); } log.info("Number of reduce tasks: {}", reduceTasks); Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class; if (cmdLine.hasOption(analyzerNameOpt)) { String className = cmdLine.getValue(analyzerNameOpt).toString(); analyzerClass = (Class<? extends Analyzer>) Class.forName(className); // try instantiating it, b/c there isn't any point in setting it if // you can't instantiate it analyzerClass.newInstance(); } boolean processIdf; if (cmdLine.hasOption(weightOpt)) { String wString = cmdLine.getValue(weightOpt).toString(); if (wString.equalsIgnoreCase("tf")) { processIdf = false; } else if (wString.equalsIgnoreCase("tfidf")) { processIdf = true; } else { throw new OptionException(weightOpt); } } else { processIdf = true; } int minDf = 1; if (cmdLine.hasOption(minDFOpt)) { minDf = Integer.parseInt(cmdLine.getValue(minDFOpt).toString()); } int maxDFPercent = 99; if (cmdLine.hasOption(maxDFPercentOpt)) { maxDFPercent = Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString()); } float norm = PartialVectorMerger.NO_NORMALIZING; if (cmdLine.hasOption(powerOpt)) { String power = cmdLine.getValue(powerOpt).toString(); if (power.equals("INF")) { norm = Float.POSITIVE_INFINITY; } else { norm = Float.parseFloat(power); } } HadoopUtil.overwriteOutput(outputDir); Path tokenizedPath = new Path(outputDir, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER); DocumentProcessor.tokenizeDocuments(inputDir, analyzerClass, tokenizedPath); boolean sequentialAccessOutput = false; if (cmdLine.hasOption(sequentialAccessVectorOpt)) { sequentialAccessOutput = true; } DictionaryVectorizer.createTermFrequencyVectors(tokenizedPath, outputDir, minSupport, maxNGramSize, minLLRValue, reduceTasks, chunkSize, sequentialAccessOutput); if (processIdf) { TFIDFConverter.processTfIdf(new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER), new Path(outputDir, TFIDFConverter.TFIDF_OUTPUT_FOLDER), chunkSize, minDf, maxDFPercent, norm, sequentialAccessOutput, reduceTasks); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.text.wikipedia.WikipediaXmlSplitter.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dumpFileOpt = obuilder.withLongName("dumpFile").withRequired(true) .withArgument(abuilder.withName("dumpFile").withMinimum(1).withMaximum(1).create()) .withDescription("The path to the wikipedia dump file (.bz2 or uncompressed)").withShortName("d") .create();/*w ww . j a v a2 s . co m*/ Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true) .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create()) .withDescription("The output directory to place the splits in:\n" + "local files:\n\t/var/data/wikipedia-xml-chunks or\n\tfile:///var/data/wikipedia-xml-chunks\n" + "Hadoop DFS:\n\thdfs://wikipedia-xml-chunks\n" + "AWS S3 (blocks):\n\ts3://bucket-name/wikipedia-xml-chunks\n" + "AWS S3 (native files):\n\ts3n://bucket-name/wikipedia-xml-chunks\n") .withShortName("o").create(); Option s3IdOpt = obuilder.withLongName("s3ID").withRequired(false) .withArgument(abuilder.withName("s3Id").withMinimum(1).withMaximum(1).create()) .withDescription("Amazon S3 ID key").withShortName("i").create(); Option s3SecretOpt = obuilder.withLongName("s3Secret").withRequired(false) .withArgument(abuilder.withName("s3Secret").withMinimum(1).withMaximum(1).create()) .withDescription("Amazon S3 secret key").withShortName("s").create(); Option chunkSizeOpt = obuilder.withLongName("chunkSize").withRequired(true) .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create()) .withDescription("The Size of the chunk, in megabytes").withShortName("c").create(); Option numChunksOpt = obuilder.withLongName("numChunks").withRequired(false) .withArgument(abuilder.withName("numChunks").withMinimum(1).withMaximum(1).create()) .withDescription( "The maximum number of chunks to create. If specified, program will only create a subset of the chunks") .withShortName("n").create(); Group group = gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt) .withOption(chunkSizeOpt).withOption(numChunksOpt).withOption(s3IdOpt).withOption(s3SecretOpt) .create(); Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine; try { cmdLine = parser.parse(args); } catch (OptionException e) { log.error("Error while parsing options", e); CommandLineUtil.printHelp(group); return; } Configuration conf = new Configuration(); String dumpFilePath = (String) cmdLine.getValue(dumpFileOpt); String outputDirPath = (String) cmdLine.getValue(outputDirOpt); if (cmdLine.hasOption(s3IdOpt)) { String id = (String) cmdLine.getValue(s3IdOpt); conf.set("fs.s3n.awsAccessKeyId", id); conf.set("fs.s3.awsAccessKeyId", id); } if (cmdLine.hasOption(s3SecretOpt)) { String secret = (String) cmdLine.getValue(s3SecretOpt); conf.set("fs.s3n.awsSecretAccessKey", secret); conf.set("fs.s3.awsSecretAccessKey", secret); } // do not compute crc file when using local FS conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem"); FileSystem fs = FileSystem.get(URI.create(outputDirPath), conf); int chunkSize = 1024 * 1024 * Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt)); int numChunks = Integer.MAX_VALUE; if (cmdLine.hasOption(numChunksOpt)) { numChunks = Integer.parseInt((String) cmdLine.getValue(numChunksOpt)); } String header = "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" " + "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " + "xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ " + "http://www.mediawiki.org/xml/export-0.3.xsd\" " + "version=\"0.3\" " + "xml:lang=\"en\">\n" + " <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n" + " <base>http://en.wikipedia.org/wiki/Main_Page</base>\n" + " <generator>MediaWiki 1.13alpha</generator>\n" + " <case>first-letter</case>\n" + " <namespaces>\n" + " <namespace key=\"-2\">Media</namespace>\n" + " <namespace key=\"-1\">Special</namespace>\n" + " <namespace key=\"0\" />\n" + " <namespace key=\"1\">Talk</namespace>\n" + " <namespace key=\"2\">User</namespace>\n" + " <namespace key=\"3\">User talk</namespace>\n" + " <namespace key=\"4\">Wikipedia</namespace>\n" + " <namespace key=\"5\">Wikipedia talk</namespace>\n" + " <namespace key=\"6\">Image</namespace>\n" + " <namespace key=\"7\">Image talk</namespace>\n" + " <namespace key=\"8\">MediaWiki</namespace>\n" + " <namespace key=\"9\">MediaWiki talk</namespace>\n" + " <namespace key=\"10\">Template</namespace>\n" + " <namespace key=\"11\">Template talk</namespace>\n" + " <namespace key=\"12\">Help</namespace>\n" + " <namespace key=\"13\">Help talk</namespace>\n" + " <namespace key=\"14\">Category</namespace>\n" + " <namespace key=\"15\">Category talk</namespace>\n" + " <namespace key=\"100\">Portal</namespace>\n" + " <namespace key=\"101\">Portal talk</namespace>\n" + " </namespaces>\n" + " </siteinfo>\n"; StringBuilder content = new StringBuilder(); content.append(header); NumberFormat decimalFormatter = new DecimalFormat("0000"); File dumpFile = new File(dumpFilePath); // If the specified path for the input file is incorrect, return immediately if (!dumpFile.exists()) { log.error("Input file path {} doesn't exist", dumpFilePath); return; } FileLineIterator it; if (dumpFilePath.endsWith(".bz2")) { // default compression format from http://download.wikimedia.org CompressionCodec codec = new BZip2Codec(); it = new FileLineIterator(codec.createInputStream(new FileInputStream(dumpFile))); } else { // assume the user has previously de-compressed the dump file it = new FileLineIterator(dumpFile); } int fileNumber = 0; while (it.hasNext()) { String thisLine = it.next(); if (thisLine.trim().startsWith("<page>")) { boolean end = false; while (!thisLine.trim().startsWith("</page>")) { content.append(thisLine).append('\n'); if (it.hasNext()) { thisLine = it.next(); } else { end = true; break; } } content.append(thisLine).append('\n'); if (content.length() > chunkSize || end) { content.append("</mediawiki>"); fileNumber++; String filename = outputDirPath + "/chunk-" + decimalFormatter.format(fileNumber) + ".xml"; BufferedWriter chunkWriter = new BufferedWriter( new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8")); try { chunkWriter.write(content.toString(), 0, content.length()); } finally { Closeables.close(chunkWriter, false); } if (fileNumber >= numChunks) { break; } content = new StringBuilder(); content.append(header); } } } }
From source file:org.apache.mahout.text.WikipediaToSequenceFile.java
/** * Takes in two arguments://from w w w . j av a 2 s.c o m * <ol> * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li> * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a * {@link org.apache.hadoop.io.SequenceFile}</li> * </ol> */ public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dirInputPathOpt = DefaultOptionCreator.inputOption().create(); Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create(); Option categoriesOpt = obuilder.withLongName("categories") .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create()) .withDescription("Location of the categories file. One entry per line. " + "Will be used to make a string match in Wikipedia Category field") .withShortName("c").create(); Option exactMatchOpt = obuilder.withLongName("exactMatch") .withDescription("If set, then the category name must exactly match the " + "entry in the categories file. Default is false") .withShortName("e").create(); Option allOpt = obuilder.withLongName("all").withDescription("If set, Select all files. Default is false") .withShortName("all").create(); Option removeLabelOpt = obuilder.withLongName("removeLabels") .withDescription("If set, remove [[Category:labels]] from document text after extracting label." + "Default is false") .withShortName("rl").create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt) .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(allOpt).withOption(helpOpt) .withOption(removeLabelOpt).create(); Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); try { CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String inputPath = (String) cmdLine.getValue(dirInputPathOpt); String outputPath = (String) cmdLine.getValue(dirOutputPathOpt); String catFile = ""; if (cmdLine.hasOption(categoriesOpt)) { catFile = (String) cmdLine.getValue(categoriesOpt); } boolean all = false; if (cmdLine.hasOption(allOpt)) { all = true; } boolean removeLabels = false; if (cmdLine.hasOption(removeLabelOpt)) { removeLabels = true; } runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), all, removeLabels); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } catch (InterruptedException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } catch (ClassNotFoundException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.utils.nlp.collocations.llr.CollocDriver.java
@Override public int run(String[] args) throws Exception { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription("The Path for input files.").withShortName("i").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The Path write output to").withShortName("o").create(); Option maxNGramSizeOpt = obuilder.withLongName("maxNGramSize").withRequired(false) .withArgument(abuilder.withName("ngramSize").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) The maximum size of ngrams to create" + " (2 = bigrams, 3 = trigrams, etc) Default Value:2") .withShortName("ng").create(); Option minSupportOpt = obuilder.withLongName("minSupport").withRequired(false) .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional) Minimum Support. Default Value: " + CollocReducer.DEFAULT_MIN_SUPPORT) .withShortName("s").create(); Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false) .withArgument(abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()) .withDescription("(Optional)The minimum Log Likelihood Ratio(Float) Default is " + LLRReducer.DEFAULT_MIN_LLR) .withShortName("ml").create(); Option numReduceTasksOpt = obuilder.withLongName("numReducers").withRequired(false) .withArgument(abuilder.withName("numReducers").withMinimum(1).withMaximum(1).create()) .withDescription(//from ww w . j a v a 2 s .com "(Optional) Number of reduce tasks. Default Value: " + DEFAULT_PASS1_NUM_REDUCE_TASKS) .withShortName("nr").create(); Option preprocessOpt = obuilder.withLongName("preprocess").withRequired(false) .withDescription("If set, input is SequenceFile<Text,Text> where the value is the document, " + " which will be tokenized using the specified analyzer.") .withShortName("p").create(); Option unigramOpt = obuilder.withLongName("unigram").withRequired(false) .withDescription("If set, unigrams will be emitted in the final output alongside collocations") .withShortName("u").create(); Option overwriteOutput = obuilder.withLongName("overwrite").withRequired(false) .withDescription("If set, overwrite the output directory").withShortName("w").create(); Option analyzerNameOpt = obuilder.withLongName("analyzerName") .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create()) .withDescription("The class name of the analyzer").withShortName("a").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt) .withOption(maxNGramSizeOpt).withOption(overwriteOutput).withOption(minSupportOpt) .withOption(minLLROpt).withOption(numReduceTasksOpt).withOption(analyzerNameOpt) .withOption(preprocessOpt).withOption(unigramOpt).withOption(helpOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return 1; } Path input = new Path(cmdLine.getValue(inputOpt).toString()); Path output = new Path(cmdLine.getValue(outputOpt).toString()); int maxNGramSize = DEFAULT_MAX_NGRAM_SIZE; if (cmdLine.hasOption(maxNGramSizeOpt)) { try { maxNGramSize = Integer.parseInt(cmdLine.getValue(maxNGramSizeOpt).toString()); } catch (NumberFormatException ex) { log.warn("Could not parse ngram size option"); } } log.info("Maximum n-gram size is: {}", maxNGramSize); if (cmdLine.hasOption(overwriteOutput)) { HadoopUtil.overwriteOutput(output); } int minSupport = CollocReducer.DEFAULT_MIN_SUPPORT; if (cmdLine.hasOption(minSupportOpt)) { minSupport = Integer.parseInt(cmdLine.getValue(minSupportOpt).toString()); } log.info("Minimum Support value: {}", minSupport); float minLLRValue = LLRReducer.DEFAULT_MIN_LLR; if (cmdLine.hasOption(minLLROpt)) { minLLRValue = Float.parseFloat(cmdLine.getValue(minLLROpt).toString()); } log.info("Minimum LLR value: {}", minLLRValue); int reduceTasks = DEFAULT_PASS1_NUM_REDUCE_TASKS; if (cmdLine.hasOption(numReduceTasksOpt)) { reduceTasks = Integer.parseInt(cmdLine.getValue(numReduceTasksOpt).toString()); } log.info("Number of pass1 reduce tasks: {}", reduceTasks); boolean emitUnigrams = cmdLine.hasOption(unigramOpt); if (cmdLine.hasOption(preprocessOpt)) { log.info("Input will be preprocessed"); Class<? extends Analyzer> analyzerClass = DefaultAnalyzer.class; if (cmdLine.hasOption(analyzerNameOpt)) { String className = cmdLine.getValue(analyzerNameOpt).toString(); analyzerClass = Class.forName(className).asSubclass(Analyzer.class); // try instantiating it, b/c there isn't any point in setting it if // you can't instantiate it analyzerClass.newInstance(); } Path tokenizedPath = new Path(output, DocumentProcessor.TOKENIZED_DOCUMENT_OUTPUT_FOLDER); DocumentProcessor.tokenizeDocuments(input, analyzerClass, tokenizedPath); input = tokenizedPath; } else { log.info("Input will NOT be preprocessed"); } // parse input and extract collocations long ngramCount = generateCollocations(input, output, emitUnigrams, maxNGramSize, reduceTasks, minSupport); // tally collocations and perform LLR calculation computeNGramsPruneByLLR(ngramCount, output, emitUnigrams, minLLRValue, reduceTasks); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); return 1; } return 0; }
From source file:org.apache.mahout.utils.vectors.arff.Driver.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription(//w ww .j a v a2 s.com "The file or directory containing the ARFF files. If it is a directory, all .arff files will be converted") .withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription( "The output directory. Files will have the same name as the input, but with the extension .mvc") .withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription( "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true) .withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The file to output the label bindings").withShortName("t").create(); Option jsonDictonaryOpt = obuilder.withLongName("json-dictonary").withRequired(false) .withDescription("Write dictonary in JSON format").withShortName("j").create(); Option delimiterOpt = obuilder.withLongName("delimiter").withRequired(false) .withArgument(abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()) .withDescription("The delimiter for outputing the dictionary").withShortName("l").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(helpOpt).withOption(dictOutOpt).withOption(jsonDictonaryOpt).withOption(delimiterOpt) .create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } if (cmdLine.hasOption(inputOpt)) { // Lucene case File input = new File(cmdLine.getValue(inputOpt).toString()); long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String outDir = cmdLine.getValue(outputOpt).toString(); log.info("Output Dir: {}", outDir); String delimiter = cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString() : "\t"; File dictOut = new File(cmdLine.getValue(dictOutOpt).toString()); boolean jsonDictonary = cmdLine.hasOption(jsonDictonaryOpt); ARFFModel model = new MapBackedARFFModel(); if (input.exists() && input.isDirectory()) { File[] files = input.listFiles(new FilenameFilter() { @Override public boolean accept(File file, String name) { return name.endsWith(".arff"); } }); for (File file : files) { writeFile(outDir, file, maxDocs, model, dictOut, delimiter, jsonDictonary); } } else { writeFile(outDir, input, maxDocs, model, dictOut, delimiter, jsonDictonary); } } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.utils.vectors.libsvm.Driver.java
/** * The main method./*from w w w.ja va2s .c om*/ * * @param args the arguments * @throws IOException Signals that an I/O exception has occurred. */ public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("input").withRequired(true) .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create()) .withDescription( "The file or directory containing the ARFF files. If it is a directory, all .arff files will be converted") .withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription( "The output directory. Files will have the same name as the input, but with the extension .mvc") .withShortName("o").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription( "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true) .withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The file to output the label bindings").withShortName("t").create(); Option outWriterOpt = obuilder.withLongName("outputWriter").withRequired(false) .withArgument(abuilder.withName("outputWriter").withMinimum(1).withMaximum(1).create()) .withDescription( "The VectorWriter to use, either seq (SequenceFileVectorWriter - default) or file (Writes to a File using JSON format)") .withShortName("e").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(maxOpt) .withOption(helpOpt).withOption(dictOutOpt).withOption(outWriterOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } if (cmdLine.hasOption(inputOpt)) {// Lucene case File input = new File(cmdLine.getValue(inputOpt).toString()); long maxDocs = Long.MAX_VALUE; if (cmdLine.hasOption(maxOpt)) { maxDocs = Long.parseLong(cmdLine.getValue(maxOpt).toString()); } if (maxDocs < 0) { throw new IllegalArgumentException("maxDocs must be >= 0"); } String outDir = cmdLine.getValue(outputOpt).toString(); Driver.log.info("Output Dir: {}", outDir); String outWriter = null; if (cmdLine.hasOption(outWriterOpt)) { outWriter = cmdLine.getValue(outWriterOpt).toString(); } File dictOut = new File(cmdLine.getValue(dictOutOpt).toString()); List<Double> labels = new ArrayList<Double>(); if (input.exists() && input.isDirectory()) { File[] files = input.listFiles(); for (File file : files) { // Driver.writeFile(outWriter, outDir, file, maxDocs, labels); } } else { // Driver.writeFile(outWriter, outDir, input, maxDocs, labels); } Driver.log.info("Dictionary Output file: {}", dictOut); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(dictOut), Charset.forName("UTF8"))); for (Double label : labels) { writer.append(label.toString()).append('\n'); } writer.close(); } } catch (OptionException e) { Driver.log.error("Exception", e); CommandLineUtil.printHelp(group); } }
From source file:org.apache.mahout.utils.vectors.lucene.ClusterLabels.java
public static void main(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option indexOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene index directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(false) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output file. If not specified, the result is printed on console.") .withShortName("o").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The content field in the index").withShortName("f").create(); Option idFieldOpt = obuilder.withLongName("idField").withRequired(false) .withArgument(abuilder.withName("idField").withMinimum(1).withMaximum(1).create()) .withDescription(//from www . ja va2 s . c om "The field for the document ID in the index. If null, then the Lucene internal doc " + "id is used which is prone to error if the underlying index changes") .withShortName("i").create(); Option seqOpt = obuilder.withLongName("seqFileDir").withRequired(true) .withArgument(abuilder.withName("seqFileDir").withMinimum(1).withMaximum(1).create()) .withDescription("The directory containing Sequence Files for the Clusters").withShortName("s") .create(); Option pointsOpt = obuilder.withLongName("pointsDir").withRequired(true) .withArgument(abuilder.withName("pointsDir").withMinimum(1).withMaximum(1).create()) .withDescription( "The directory containing points sequence files mapping input vectors to their cluster. ") .withShortName("p").create(); Option minClusterSizeOpt = obuilder.withLongName("minClusterSize").withRequired(false) .withArgument(abuilder.withName("minClusterSize").withMinimum(1).withMaximum(1).create()) .withDescription("The minimum number of points required in a cluster to print the labels for") .withShortName("m").create(); Option maxLabelsOpt = obuilder.withLongName("maxLabels").withRequired(false) .withArgument(abuilder.withName("maxLabels").withMinimum(1).withMaximum(1).create()) .withDescription("The maximum number of labels to print per cluster").withShortName("x").create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder.withName("Options").withOption(indexOpt).withOption(idFieldOpt).withOption(outputOpt) .withOption(fieldOpt).withOption(seqOpt).withOption(pointsOpt).withOption(helpOpt) .withOption(maxLabelsOpt).withOption(minClusterSizeOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } Path seqFileDir = new Path(cmdLine.getValue(seqOpt).toString()); Path pointsDir = new Path(cmdLine.getValue(pointsOpt).toString()); String indexDir = cmdLine.getValue(indexOpt).toString(); String contentField = cmdLine.getValue(fieldOpt).toString(); String idField = null; if (cmdLine.hasOption(idFieldOpt)) { idField = cmdLine.getValue(idFieldOpt).toString(); } String output = null; if (cmdLine.hasOption(outputOpt)) { output = cmdLine.getValue(outputOpt).toString(); } int maxLabels = DEFAULT_MAX_LABELS; if (cmdLine.hasOption(maxLabelsOpt)) { maxLabels = Integer.parseInt(cmdLine.getValue(maxLabelsOpt).toString()); } int minSize = DEFAULT_MIN_IDS; if (cmdLine.hasOption(minClusterSizeOpt)) { minSize = Integer.parseInt(cmdLine.getValue(minClusterSizeOpt).toString()); } ClusterLabels clusterLabel = new ClusterLabels(seqFileDir, pointsDir, indexDir, contentField, minSize, maxLabels); if (idField != null) { clusterLabel.setIdField(idField); } if (output != null) { clusterLabel.setOutput(output); } clusterLabel.getLabels(); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } catch (IOException e) { log.error("Exception", e); } }
From source file:org.apache.mahout.utils.vectors.lucene.Driver.java
public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option inputOpt = obuilder.withLongName("dir").withRequired(true) .withArgument(abuilder.withName("dir").withMinimum(1).withMaximum(1).create()) .withDescription("The Lucene directory").withShortName("d").create(); Option outputOpt = obuilder.withLongName("output").withRequired(true) .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create()) .withDescription("The output file").withShortName("o").create(); Option fieldOpt = obuilder.withLongName("field").withRequired(true) .withArgument(abuilder.withName("field").withMinimum(1).withMaximum(1).create()) .withDescription("The field in the index").withShortName("f").create(); Option idFieldOpt = obuilder.withLongName("idField").withRequired(false) .withArgument(abuilder.withName("idField").withMinimum(1).withMaximum(1).create()) .withDescription(/*from w w w. ja v a 2s . c o m*/ "The field in the index containing the index. If null, then the Lucene internal doc " + "id is used which is prone to error if the underlying index changes") .create(); Option dictOutOpt = obuilder.withLongName("dictOut").withRequired(true) .withArgument(abuilder.withName("dictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary").withShortName("t").create(); Option seqDictOutOpt = obuilder.withLongName("seqDictOut").withRequired(false) .withArgument(abuilder.withName("seqDictOut").withMinimum(1).withMaximum(1).create()) .withDescription("The output of the dictionary as sequence file").withShortName("st").create(); Option weightOpt = obuilder.withLongName("weight").withRequired(false) .withArgument(abuilder.withName("weight").withMinimum(1).withMaximum(1).create()) .withDescription("The kind of weight to use. Currently TF or TFIDF").withShortName("w").create(); Option delimiterOpt = obuilder.withLongName("delimiter").withRequired(false) .withArgument(abuilder.withName("delimiter").withMinimum(1).withMaximum(1).create()) .withDescription("The delimiter for outputting the dictionary").withShortName("l").create(); Option powerOpt = obuilder.withLongName("norm").withRequired(false) .withArgument(abuilder.withName("norm").withMinimum(1).withMaximum(1).create()) .withDescription( "The norm to use, expressed as either a double or \"INF\" if you want to use the Infinite norm. " + "Must be greater or equal to 0. The default is not to normalize") .withShortName("n").create(); Option maxOpt = obuilder.withLongName("max").withRequired(false) .withArgument(abuilder.withName("max").withMinimum(1).withMaximum(1).create()) .withDescription( "The maximum number of vectors to output. If not specified, then it will loop over all docs") .withShortName("m").create(); Option minDFOpt = obuilder.withLongName("minDF").withRequired(false) .withArgument(abuilder.withName("minDF").withMinimum(1).withMaximum(1).create()) .withDescription("The minimum document frequency. Default is 1").withShortName("md").create(); Option maxDFPercentOpt = obuilder.withLongName("maxDFPercent").withRequired(false) .withArgument(abuilder.withName("maxDFPercent").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs for the DF. Can be used to remove really high frequency terms." + " Expressed as an integer between 0 and 100. Default is 99.") .withShortName("x").create(); Option maxPercentErrorDocsOpt = obuilder.withLongName("maxPercentErrorDocs").withRequired(false) .withArgument(abuilder.withName("maxPercentErrorDocs").withMinimum(1).withMaximum(1).create()) .withDescription( "The max percentage of docs that can have a null term vector. These are noise document and can occur if the " + "analyzer used strips out all terms in the target field. This percentage is expressed as a value " + "between 0 and 1. The default is 0.") .withShortName("err").create(); Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") .create(); Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(idFieldOpt).withOption(outputOpt) .withOption(delimiterOpt).withOption(helpOpt).withOption(fieldOpt).withOption(maxOpt) .withOption(dictOutOpt).withOption(seqDictOutOpt).withOption(powerOpt).withOption(maxDFPercentOpt) .withOption(weightOpt).withOption(minDFOpt).withOption(maxPercentErrorDocsOpt).create(); try { Parser parser = new Parser(); parser.setGroup(group); CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } if (cmdLine.hasOption(inputOpt)) { // Lucene case Driver luceneDriver = new Driver(); luceneDriver.setLuceneDir(cmdLine.getValue(inputOpt).toString()); if (cmdLine.hasOption(maxOpt)) { luceneDriver.setMaxDocs(Long.parseLong(cmdLine.getValue(maxOpt).toString())); } if (cmdLine.hasOption(weightOpt)) { luceneDriver.setWeightType(cmdLine.getValue(weightOpt).toString()); } luceneDriver.setField(cmdLine.getValue(fieldOpt).toString()); if (cmdLine.hasOption(minDFOpt)) { luceneDriver.setMinDf(Integer.parseInt(cmdLine.getValue(minDFOpt).toString())); } if (cmdLine.hasOption(maxDFPercentOpt)) { luceneDriver.setMaxDFPercent(Integer.parseInt(cmdLine.getValue(maxDFPercentOpt).toString())); } if (cmdLine.hasOption(powerOpt)) { String power = cmdLine.getValue(powerOpt).toString(); if ("INF".equals(power)) { luceneDriver.setNorm(Double.POSITIVE_INFINITY); } else { luceneDriver.setNorm(Double.parseDouble(power)); } } if (cmdLine.hasOption(idFieldOpt)) { luceneDriver.setIdField(cmdLine.getValue(idFieldOpt).toString()); } if (cmdLine.hasOption(maxPercentErrorDocsOpt)) { luceneDriver.setMaxPercentErrorDocs( Double.parseDouble(cmdLine.getValue(maxPercentErrorDocsOpt).toString())); } luceneDriver.setOutFile(cmdLine.getValue(outputOpt).toString()); luceneDriver.setDelimiter( cmdLine.hasOption(delimiterOpt) ? cmdLine.getValue(delimiterOpt).toString() : "\t"); luceneDriver.setDictOut(cmdLine.getValue(dictOutOpt).toString()); if (cmdLine.hasOption(seqDictOutOpt)) { luceneDriver.setSeqDictOut(cmdLine.getValue(seqDictOutOpt).toString()); } luceneDriver.dumpVectors(); } } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }