Example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

Introduction

In this page you can find the example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder.

Prototype

public DefaultOptionBuilder()

Source Link

Document

Creates a new DefaultOptionBuilder using defaults

Usage

From source file:org.apache.mahout.benchmark.VectorBenchmarks.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option vectorSizeOpt = obuilder.withLongName("vectorSize").withRequired(false)
            .withArgument(abuilder.withName("vs").withDefault(1000000).create())
            .withDescription("Cardinality of the vector. Default: 1000000").withShortName("vs").create();
    Option numNonZeroOpt = obuilder.withLongName("numNonZero").withRequired(false)
            .withArgument(abuilder.withName("nz").withDefault(1000).create())
            .withDescription("Size of the vector. Default: 1000").withShortName("nz").create();
    Option numVectorsOpt = obuilder.withLongName("numVectors").withRequired(false)
            .withArgument(abuilder.withName("nv").withDefault(25).create())
            .withDescription("Number of Vectors to create. Default: 25").withShortName("nv").create();
    Option numClustersOpt = obuilder.withLongName("numClusters").withRequired(false)
            .withArgument(abuilder.withName("nc").withDefault(0).create())
            .withDescription(//from   w ww  . java  2  s . c  om
                    "Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0")
            .withShortName("nc").create();
    Option numOpsOpt = obuilder.withLongName("numOps").withRequired(false)
            .withArgument(abuilder.withName("numOps").withDefault(10).create())
            .withDescription("Number of operations to do per timer. "
                    + "E.g In distance measure, the distance is calculated numOps times"
                    + " and the total time is measured. Default: 10")
            .withShortName("no").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(numNonZeroOpt)
            .withOption(numVectorsOpt).withOption(numOpsOpt).withOption(numClustersOpt).withOption(helpOpt)
            .create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelpWithGenericOptions(group);
            return;
        }

        int cardinality = 1000000;
        if (cmdLine.hasOption(vectorSizeOpt)) {
            cardinality = Integer.parseInt((String) cmdLine.getValue(vectorSizeOpt));

        }

        int numClusters = 0;
        if (cmdLine.hasOption(numClustersOpt)) {
            numClusters = Integer.parseInt((String) cmdLine.getValue(numClustersOpt));
        }

        int numNonZero = 1000;
        if (cmdLine.hasOption(numNonZeroOpt)) {
            numNonZero = Integer.parseInt((String) cmdLine.getValue(numNonZeroOpt));
        }

        int numVectors = 25;
        if (cmdLine.hasOption(numVectorsOpt)) {
            numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt));

        }

        int numOps = 10;
        if (cmdLine.hasOption(numOpsOpt)) {
            numOps = Integer.parseInt((String) cmdLine.getValue(numOpsOpt));

        }
        VectorBenchmarks mark = new VectorBenchmarks(cardinality, numNonZero, numVectors, numClusters, numOps);
        runBenchmark(mark);

        // log.info("\n{}", mark);
        log.info("\n{}", mark.asCsvString());
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.cf.taste.example.TasteOptionParser.java

/**
 * Parse the given command line arguments.
 * @param args the arguments as given to the application.
 * @return the input file if a file was given on the command line, null otherwise.
 *//*  w ww.  jav a 2  s  .  co  m*/
public static File getRatings(String[] args) throws OptionException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = obuilder.withLongName("input").withRequired(false).withShortName("i")
            .withArgument(abuilder.withName("input").withMinimum(1).withMaximum(1).create())
            .withDescription("The Path for input data directory.").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(helpOpt).create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine = parser.parse(args);

    if (cmdLine.hasOption(helpOpt)) {
        CommandLineUtil.printHelp(group);
        return null;
    }

    return cmdLine.hasOption(inputOpt) ? new File(cmdLine.getValue(inputOpt).toString()) : null;
}

From source file:org.apache.mahout.classifier.bayes.PrepareTwentyNewsgroups.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Option parentOpt = obuilder.withLongName("parent").withRequired(true)
            .withArgument(abuilder.withName("parent").withMinimum(1).withMaximum(1).create())
            .withDescription("Parent dir containing the newsgroups").withShortName("p").create();

    Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory").withShortName("o").create();

    Option analyzerNameOpt = obuilder.withLongName("analyzerName").withRequired(true)
            .withArgument(abuilder.withName("analyzerName").withMinimum(1).withMaximum(1).create())
            .withDescription("The class name of the analyzer").withShortName("a").create();

    Option charsetOpt = obuilder.withLongName("charset").withRequired(true)
            .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
            .withDescription("The name of the character encoding of the input files").withShortName("c")
            .create();/* w w w  . j  a  v a  2s  . c o m*/

    Group group = gbuilder.withName("Options").withOption(analyzerNameOpt).withOption(charsetOpt)
            .withOption(outputDirOpt).withOption(parentOpt).withOption(helpOpt).create();
    try {

        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        File parentDir = new File((String) cmdLine.getValue(parentOpt));
        File outputDir = new File((String) cmdLine.getValue(outputDirOpt));
        String analyzerName = (String) cmdLine.getValue(analyzerNameOpt);
        Charset charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
        Analyzer analyzer = ClassUtils.instantiateAs(analyzerName, Analyzer.class);
        // parent dir contains dir by category
        if (!parentDir.exists()) {
            throw new FileNotFoundException("Can't find input directory " + parentDir);
        }
        File[] categoryDirs = parentDir.listFiles();
        for (File dir : categoryDirs) {
            if (dir.isDirectory()) {
                if (!outputDir.exists() && !outputDir.mkdirs()) {
                    throw new IllegalStateException("Can't create output directory");
                }

                File outputFile = new File(outputDir, dir.getName() + ".txt");
                BayesFileFormatter.collapse(dir.getName(), analyzer, dir, charset, outputFile);
            }
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.TestClassifier.java

public static void main(String[] args) throws IOException, InvalidDatastoreException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option pathOpt = obuilder.withLongName("model").withRequired(true)
            .withArgument(abuilder.withName("model").withMinimum(1).withMaximum(1).create())
            .withDescription("The path on HDFS as defined by the -source parameter").withShortName("m")
            .create();//from  w w  w  .  j a  v a2 s .co m

    Option dirOpt = obuilder.withLongName("testDir").withRequired(true)
            .withArgument(abuilder.withName("testDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The directory where test documents resides in").withShortName("d").create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option encodingOpt = obuilder.withLongName("encoding")
            .withArgument(abuilder.withName("encoding").withMinimum(1).withMaximum(1).create())
            .withDescription("The file encoding.  Defaults to UTF-8").withShortName("e").create();

    Option defaultCatOpt = obuilder.withLongName("defaultCat")
            .withArgument(abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create())
            .withDescription("The default category Default Value: unknown").withShortName("default").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1").withShortName("ng").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("a").withMinimum(1).withMaximum(1).create())
            .withDescription("Smoothing parameter Default Value: 1.0").withShortName("a").create();

    Option verboseOutputOpt = obuilder.withLongName("verbose").withRequired(false)
            .withDescription("Output which values were correctly and incorrectly classified").withShortName("v")
            .create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: bayes|cbayes. Default Value: bayes").withShortName("type")
            .create();

    Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(false)
            .withArgument(abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of model: hdfs").withShortName("source").create();

    Option methodOpt = obuilder.withLongName("method").withRequired(false)
            .withArgument(abuilder.withName("method").withMinimum(1).withMaximum(1).create())
            .withDescription("Method of Classification: sequential|mapreduce. Default Value: mapreduce")
            .withShortName("method").create();

    Option confusionMatrixOpt = obuilder.withLongName("confusionMatrix").withRequired(false)
            .withArgument(abuilder.withName("confusionMatrix").withMinimum(1).withMaximum(1).create())
            .withDescription("Export ConfusionMatrix as SequenceFile").withShortName("cm").create();

    Group group = gbuilder.withName("Options").withOption(defaultCatOpt).withOption(dirOpt)
            .withOption(encodingOpt).withOption(gramSizeOpt).withOption(pathOpt).withOption(typeOpt)
            .withOption(dataSourceOpt).withOption(helpOpt).withOption(methodOpt).withOption(verboseOutputOpt)
            .withOption(alphaOpt).withOption(confusionMatrixOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        BayesParameters params = new BayesParameters();
        // Setting all default values
        int gramSize = 1;

        String modelBasePath = (String) cmdLine.getValue(pathOpt);

        if (cmdLine.hasOption(gramSizeOpt)) {
            gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));

        }

        String classifierType = "bayes";
        if (cmdLine.hasOption(typeOpt)) {
            classifierType = (String) cmdLine.getValue(typeOpt);
        }

        String dataSource = "hdfs";
        if (cmdLine.hasOption(dataSourceOpt)) {
            dataSource = (String) cmdLine.getValue(dataSourceOpt);
        }

        String defaultCat = "unknown";
        if (cmdLine.hasOption(defaultCatOpt)) {
            defaultCat = (String) cmdLine.getValue(defaultCatOpt);
        }

        String encoding = "UTF-8";
        if (cmdLine.hasOption(encodingOpt)) {
            encoding = (String) cmdLine.getValue(encodingOpt);
        }

        String alphaI = "1.0";
        if (cmdLine.hasOption(alphaOpt)) {
            alphaI = (String) cmdLine.getValue(alphaOpt);
        }

        boolean verbose = cmdLine.hasOption(verboseOutputOpt);

        String testDirPath = (String) cmdLine.getValue(dirOpt);

        String classificationMethod = "mapreduce";
        if (cmdLine.hasOption(methodOpt)) {
            classificationMethod = (String) cmdLine.getValue(methodOpt);
        }

        String confusionMatrixFile = null;
        if (cmdLine.hasOption(confusionMatrixOpt)) {
            confusionMatrixFile = (String) cmdLine.getValue(confusionMatrixOpt);
        }

        params.setGramSize(gramSize);
        params.set("verbose", Boolean.toString(verbose));
        params.setBasePath(modelBasePath);
        params.set("classifierType", classifierType);
        params.set("dataSource", dataSource);
        params.set("defaultCat", defaultCat);
        params.set("encoding", encoding);
        params.set("alpha_i", alphaI);
        params.set("testDirPath", testDirPath);
        params.set("confusionMatrix", confusionMatrixFile);

        if ("sequential".equalsIgnoreCase(classificationMethod)) {
            classifySequential(params);
        } else if ("mapreduce".equalsIgnoreCase(classificationMethod)) {
            classifyParallel(params);
        }
    } catch (OptionException e) {
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.TrainClassifier.java

public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Option inputDirOpt = DefaultOptionCreator.inputOption().create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(false)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram. Default Value: 1 ").withShortName("ng").create();

    Option minDfOpt = obuilder.withLongName("minDf").withRequired(false)
            .withArgument(abuilder.withName("minDf").withMinimum(1).withMaximum(1).create())
            .withDescription("Minimum Term Document Frequency: 1 ").withShortName("mf").create();

    Option minSupportOpt = obuilder.withLongName("minSupport").withRequired(false)
            .withArgument(abuilder.withName("minSupport").withMinimum(1).withMaximum(1).create())
            .withDescription("Minimum Support (Term Frequency): 1 ").withShortName("ms").create();

    Option alphaOpt = obuilder.withLongName("alpha").withRequired(false)
            .withArgument(abuilder.withName("a").withMinimum(1).withMaximum(1).create())
            .withDescription("Smoothing parameter Default Value: 1.0").withShortName("a").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(false)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier: bayes|cbayes. Default: bayes").withShortName("type").create();

    Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(false)
            .withArgument(abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of model: hdfs. Default Value: hdfs").withShortName("source").create();

    Option skipCleanupOpt = obuilder.withLongName("skipCleanup").withRequired(false)
            .withDescription("Skip cleanup of feature extraction output").withShortName("sc").create();

    Option compressOpt = obuilder.withLongName("compress").withRequired(false)
            .withArgument(abuilder.withName("compress").withDefault("0").withMinimum(0).withMaximum(1).create())
            .withDescription("True if the output should be compressed. Default is false").withShortName("comp")
            .create();/*from   ww w  .  ja  v a2  s  .  co  m*/

    Option compressCodecOpt = obuilder.withLongName("codec").withRequired(false)
            .withArgument(abuilder.withName("codec").withDefault("org.apache.hadoop.io.compress.DefaultCodec")
                    .withMinimum(0).withMaximum(1).create())
            .withDescription("Compress codec Default Value: org.apache.hadoop.io.compress.DefaultCodec")
            .withShortName("co").create();

    Group group = gbuilder.withName("Options").withOption(gramSizeOpt).withOption(helpOpt)
            .withOption(inputDirOpt).withOption(outputOpt).withOption(typeOpt).withOption(dataSourceOpt)
            .withOption(alphaOpt).withOption(minDfOpt).withOption(minSupportOpt).withOption(skipCleanupOpt)
            .withOption(compressOpt).withOption(compressCodecOpt).create();
    try {
        Parser parser = new Parser();

        parser.setGroup(group);
        parser.setHelpOption(helpOpt);
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String classifierType = (String) cmdLine.getValue(typeOpt);
        String dataSourceType = (String) cmdLine.getValue(dataSourceOpt);

        BayesParameters params = new BayesParameters();
        // Setting all the default parameter values
        params.setGramSize(1);
        params.setMinDF(1);
        params.set("alpha_i", "1.0");
        params.set("dataSource", "hdfs");

        if (cmdLine.hasOption(gramSizeOpt)) {
            params.setGramSize(Integer.parseInt((String) cmdLine.getValue(gramSizeOpt)));
        }

        if (cmdLine.hasOption(minDfOpt)) {
            params.setMinDF(Integer.parseInt((String) cmdLine.getValue(minDfOpt)));
        }

        if (cmdLine.hasOption(minSupportOpt)) {
            params.setMinSupport(Integer.parseInt((String) cmdLine.getValue(minSupportOpt)));
        }

        if (cmdLine.hasOption(skipCleanupOpt)) {
            params.setSkipCleanup(true);
        }

        if (cmdLine.hasOption(alphaOpt)) {
            params.set("alpha_i", (String) cmdLine.getValue(alphaOpt));
        }

        if (cmdLine.hasOption(dataSourceOpt)) {
            params.set("dataSource", dataSourceType);
        }

        if (cmdLine.hasOption(compressOpt) && cmdLine.getValue(compressOpt).toString().equals("1")) {
            params.set("compress", "true");
        } else {
            params.set("compress", "false");
        }

        if (cmdLine.hasOption(compressCodecOpt)) {
            params.set("codec", (String) cmdLine.getValue(compressCodecOpt));
        }

        Path inputPath = new Path((String) cmdLine.getValue(inputDirOpt));
        Path outputPath = new Path((String) cmdLine.getValue(outputOpt));
        if ("cbayes".equalsIgnoreCase(classifierType)) {
            log.info("Training Complementary Bayes Classifier");
            trainCNaiveBayes(inputPath, outputPath, params);
        } else {
            log.info("Training Bayes Classifier");
            // setup the HDFS and copy the files there, then run the trainer
            trainNaiveBayes(inputPath, outputPath, params);
        }
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver.java

/**
 * Takes in two arguments:/*from www  .j  av a 2s.c om*/
 * <ol>
 * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
 * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a
 * {@link org.apache.hadoop.io.SequenceFile}</li>
 * </ol>
 */
public static void main(String[] args) throws IOException, InterruptedException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dirInputPathOpt = DefaultOptionCreator.inputOption().create();

    Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create();

    Option categoriesOpt = obuilder.withLongName("categories").withRequired(true)
            .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of the categories file.  One entry per line. "
                    + "Will be used to make a string match in Wikipedia Category field")
            .withShortName("c").create();

    Option exactMatchOpt = obuilder.withLongName("exactMatch")
            .withDescription("If set, then the category name must exactly match the "
                    + "entry in the categories file. Default is false")
            .withShortName("e").create();
    Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(false)
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The analyzer to use, must have a no argument constructor").withShortName("a")
            .create();
    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(categoriesOpt).withOption(dirInputPathOpt)
            .withOption(dirOutputPathOpt).withOption(exactMatchOpt).withOption(analyzerOpt).withOption(helpOpt)
            .create();

    Parser parser = new Parser();
    parser.setGroup(group);
    try {
        CommandLine cmdLine = parser.parse(args);
        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String inputPath = (String) cmdLine.getValue(dirInputPathOpt);
        String outputPath = (String) cmdLine.getValue(dirOutputPathOpt);
        String catFile = (String) cmdLine.getValue(categoriesOpt);
        Class<? extends Analyzer> analyzerClass = WikipediaAnalyzer.class;
        if (cmdLine.hasOption(analyzerOpt)) {
            String className = cmdLine.getValue(analyzerOpt).toString();
            analyzerClass = Class.forName(className).asSubclass(Analyzer.class);
            // try instantiating it, b/c there isn't any point in setting it if
            // you can't instantiate it
            ClassUtils.instantiateAs(analyzerClass, Analyzer.class);
        }
        runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), analyzerClass);
    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    } catch (ClassNotFoundException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.bayes.WikipediaXmlSplitter.java

public static void main(String[] args) throws IOException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dumpFileOpt = obuilder.withLongName("dumpFile").withRequired(true)
            .withArgument(abuilder.withName("dumpFile").withMinimum(1).withMaximum(1).create())
            .withDescription("The path to the wikipedia dump file (.bz2 or uncompressed)").withShortName("d")
            .create();//from w w w  .j  a  va 2s  .c  o m

    Option outputDirOpt = obuilder.withLongName("outputDir").withRequired(true)
            .withArgument(abuilder.withName("outputDir").withMinimum(1).withMaximum(1).create())
            .withDescription("The output directory to place the splits in:\n"
                    + "local files:\n\t/var/data/wikipedia-xml-chunks or\n\tfile:///var/data/wikipedia-xml-chunks\n"
                    + "Hadoop DFS:\n\thdfs://wikipedia-xml-chunks\n"
                    + "AWS S3 (blocks):\n\ts3://bucket-name/wikipedia-xml-chunks\n"
                    + "AWS S3 (native files):\n\ts3n://bucket-name/wikipedia-xml-chunks\n")

            .withShortName("o").create();

    Option s3IdOpt = obuilder.withLongName("s3ID").withRequired(false)
            .withArgument(abuilder.withName("s3Id").withMinimum(1).withMaximum(1).create())
            .withDescription("Amazon S3 ID key").withShortName("i").create();
    Option s3SecretOpt = obuilder.withLongName("s3Secret").withRequired(false)
            .withArgument(abuilder.withName("s3Secret").withMinimum(1).withMaximum(1).create())
            .withDescription("Amazon S3 secret key").withShortName("s").create();

    Option chunkSizeOpt = obuilder.withLongName("chunkSize").withRequired(true)
            .withArgument(abuilder.withName("chunkSize").withMinimum(1).withMaximum(1).create())
            .withDescription("The Size of the chunk, in megabytes").withShortName("c").create();
    Option numChunksOpt = obuilder.withLongName("numChunks").withRequired(false)
            .withArgument(abuilder.withName("numChunks").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The maximum number of chunks to create.  If specified, program will only create a subset of the chunks")
            .withShortName("n").create();
    Group group = gbuilder.withName("Options").withOption(dumpFileOpt).withOption(outputDirOpt)
            .withOption(chunkSizeOpt).withOption(numChunksOpt).withOption(s3IdOpt).withOption(s3SecretOpt)
            .create();

    Parser parser = new Parser();
    parser.setGroup(group);
    CommandLine cmdLine;
    try {
        cmdLine = parser.parse(args);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return;
    }

    Configuration conf = new Configuration();
    String dumpFilePath = (String) cmdLine.getValue(dumpFileOpt);
    String outputDirPath = (String) cmdLine.getValue(outputDirOpt);

    if (cmdLine.hasOption(s3IdOpt)) {
        String id = (String) cmdLine.getValue(s3IdOpt);
        conf.set("fs.s3n.awsAccessKeyId", id);
        conf.set("fs.s3.awsAccessKeyId", id);
    }
    if (cmdLine.hasOption(s3SecretOpt)) {
        String secret = (String) cmdLine.getValue(s3SecretOpt);
        conf.set("fs.s3n.awsSecretAccessKey", secret);
        conf.set("fs.s3.awsSecretAccessKey", secret);
    }
    // do not compute crc file when using local FS
    conf.set("fs.file.impl", "org.apache.hadoop.fs.RawLocalFileSystem");
    FileSystem fs = FileSystem.get(URI.create(outputDirPath), conf);

    int chunkSize = 1024 * 1024 * Integer.parseInt((String) cmdLine.getValue(chunkSizeOpt));

    int numChunks = Integer.MAX_VALUE;
    if (cmdLine.hasOption(numChunksOpt)) {
        numChunks = Integer.parseInt((String) cmdLine.getValue(numChunksOpt));
    }

    String header = "<mediawiki xmlns=\"http://www.mediawiki.org/xml/export-0.3/\" "
            + "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
            + "xsi:schemaLocation=\"http://www.mediawiki.org/xml/export-0.3/ "
            + "http://www.mediawiki.org/xml/export-0.3.xsd\" " + "version=\"0.3\" " + "xml:lang=\"en\">\n"
            + "  <siteinfo>\n" + "<sitename>Wikipedia</sitename>\n"
            + "    <base>http://en.wikipedia.org/wiki/Main_Page</base>\n"
            + "    <generator>MediaWiki 1.13alpha</generator>\n" + "    <case>first-letter</case>\n"
            + "    <namespaces>\n" + "      <namespace key=\"-2\">Media</namespace>\n"
            + "      <namespace key=\"-1\">Special</namespace>\n" + "      <namespace key=\"0\" />\n"
            + "      <namespace key=\"1\">Talk</namespace>\n" + "      <namespace key=\"2\">User</namespace>\n"
            + "      <namespace key=\"3\">User talk</namespace>\n"
            + "      <namespace key=\"4\">Wikipedia</namespace>\n"
            + "      <namespace key=\"5\">Wikipedia talk</namespace>\n"
            + "      <namespace key=\"6\">Image</namespace>\n"
            + "      <namespace key=\"7\">Image talk</namespace>\n"
            + "      <namespace key=\"8\">MediaWiki</namespace>\n"
            + "      <namespace key=\"9\">MediaWiki talk</namespace>\n"
            + "      <namespace key=\"10\">Template</namespace>\n"
            + "      <namespace key=\"11\">Template talk</namespace>\n"
            + "      <namespace key=\"12\">Help</namespace>\n"
            + "      <namespace key=\"13\">Help talk</namespace>\n"
            + "      <namespace key=\"14\">Category</namespace>\n"
            + "      <namespace key=\"15\">Category talk</namespace>\n"
            + "      <namespace key=\"100\">Portal</namespace>\n"
            + "      <namespace key=\"101\">Portal talk</namespace>\n" + "    </namespaces>\n"
            + "  </siteinfo>\n";

    StringBuilder content = new StringBuilder();
    content.append(header);
    NumberFormat decimalFormatter = new DecimalFormat("0000");
    File dumpFile = new File(dumpFilePath);
    FileLineIterator it;
    if (dumpFilePath.endsWith(".bz2")) {
        // default compression format from http://download.wikimedia.org
        CompressionCodec codec = new BZip2Codec();
        it = new FileLineIterator(codec.createInputStream(new FileInputStream(dumpFile)));
    } else {
        // assume the user has previously de-compressed the dump file
        it = new FileLineIterator(dumpFile);
    }
    int filenumber = 0;
    while (it.hasNext()) {
        String thisLine = it.next();
        if (thisLine.trim().startsWith("<page>")) {
            boolean end = false;
            while (!thisLine.trim().startsWith("</page>")) {
                content.append(thisLine).append('\n');
                if (it.hasNext()) {
                    thisLine = it.next();
                } else {
                    end = true;
                    break;
                }
            }
            content.append(thisLine).append('\n');

            if (content.length() > chunkSize || end) {
                content.append("</mediawiki>");
                filenumber++;
                String filename = outputDirPath + "/chunk-" + decimalFormatter.format(filenumber) + ".xml";
                BufferedWriter chunkWriter = new BufferedWriter(
                        new OutputStreamWriter(fs.create(new Path(filename)), "UTF-8"));
                try {
                    chunkWriter.write(content.toString(), 0, content.length());
                } finally {
                    Closeables.closeQuietly(chunkWriter);
                }
                if (filenumber >= numChunks) {
                    break;
                }
                content = new StringBuilder();
                content.append(header);
            }
        }
    }
}

From source file:org.apache.mahout.classifier.BayesFileFormatter.java

/**
 * Run the FileFormatter//from  w ww  . java2s  .c o m
 * 
 * @param args
 *          The input args. Run with -h to see the help
 * @throws ClassNotFoundException
 *           if the Analyzer can't be found
 * @throws IllegalAccessException
 *           if the Analyzer can't be constructed
 * @throws InstantiationException
 *           if the Analyzer can't be constructed
 * @throws IOException
 *           if the files can't be dealt with properly
 */
public static void main(String[] args) throws Exception {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option labelOpt = obuilder.withLongName("label").withRequired(true)
            .withArgument(abuilder.withName("label").withMinimum(1).withMaximum(1).create())
            .withDescription("The label of the file").withShortName("l").create();

    Option analyzerOpt = obuilder.withLongName("analyzer")
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The fully qualified class name of the analyzer to use. "
                    + "Must have a no-arg constructor.  Default is the StandardAnalyzer")
            .withShortName("a").create();

    Option charsetOpt = obuilder.withLongName("charset")
            .withArgument(abuilder.withName("charset").withMinimum(1).withMaximum(1).create())
            .withDescription("The character encoding of the input file").withShortName("c").create();

    Option collapseOpt = obuilder.withLongName("collapse").withRequired(true)
            .withArgument(abuilder.withName("collapse").withMinimum(1).withMaximum(1).create())
            .withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p")
            .create();

    Option helpOpt = DefaultOptionCreator.helpOption();
    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt)
            .withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt)
            .create();
    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {

            return;
        }
        File input = new File((String) cmdLine.getValue(inputOpt));
        File output = new File((String) cmdLine.getValue(outputOpt));
        String label = (String) cmdLine.getValue(labelOpt);
        Analyzer analyzer;
        if (cmdLine.hasOption(analyzerOpt)) {
            analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
        } else {
            analyzer = new StandardAnalyzer(Version.LUCENE_31);
        }
        Charset charset = Charsets.UTF_8;
        if (cmdLine.hasOption(charsetOpt)) {
            charset = Charset.forName((String) cmdLine.getValue(charsetOpt));
        }
        boolean collapse = cmdLine.hasOption(collapseOpt);

        if (collapse) {
            collapse(label, analyzer, input, charset, output);
        } else {
            format(label, analyzer, input, charset, output);
        }

    } catch (OptionException e) {
        log.error("Exception", e);
    }
}

From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.BuildModel.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("The path of the file descriptor of the dataset").create();

    Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Time path").create();

    Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Output path, will contain the Decision Forest").create();

    Option labelsOpt = obuilder.withLongName("labels").withShortName("l").withRequired(true)
            .withArgument(abuilder.withName("labels").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of Labels").create();

    Option combinationTypeOpt = obuilder.withLongName("combinationType").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("combinationType").withMinimum(1).withMaximum(1).create())
            .withDescription("T-norm for the computation of the compatibility degree").create();

    Option rule_weightOpt = obuilder.withLongName("rule_weight").withShortName("r").withRequired(true)
            .withArgument(abuilder.withName("rule_weight").withMinimum(1).withMaximum(1).create())
            .withDescription("Rule Weight").create();

    Option fuzzy_r_mOpt = obuilder.withLongName("fuzzy_r_m").withShortName("f").withRequired(true)
            .withArgument(abuilder.withName("fuzzy_r_m").withMinimum(1).withMaximum(1).create())
            .withDescription("Fuzzy Reasoning Method").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/*  w  w  w .j av  a 2  s . c  o m*/

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt)
            .withOption(outputOpt).withOption(labelsOpt).withOption(combinationTypeOpt)
            .withOption(rule_weightOpt).withOption(fuzzy_r_mOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String outputName = cmdLine.getValue(outputOpt).toString();
        nLabels = Integer.parseInt(cmdLine.getValue(labelsOpt).toString());
        String combinationType_aux = cmdLine.getValue(combinationTypeOpt).toString();
        String ruleWeight_aux = cmdLine.getValue(rule_weightOpt).toString();
        String inferenceType_aux = cmdLine.getValue(fuzzy_r_mOpt).toString();

        if (cmdLine.hasOption(timeOpt)) {
            buildTimeIsStored = true;
            timeName = cmdLine.getValue(timeOpt).toString();
        }

        if (log.isDebugEnabled()) {
            log.debug("data : {}", dataName);
            log.debug("dataset : {}", datasetName);
            log.debug("output : {}", outputName);
            log.debug("labels : {}", nLabels);
            log.debug("t_norm : {}", combinationType_aux);
            log.debug("rule_weight : {}", ruleWeight_aux);
            log.debug("fuzzy_r_m : {}", inferenceType_aux);
            log.debug("time : {}", timeName);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        outputPath = new Path(outputName);
        if (buildTimeIsStored)
            timePath = new Path(timeName);

        combinationType = PRODUCT;
        if (combinationType_aux.compareToIgnoreCase("minimum") == 0) {
            combinationType = MINIMUM;
        }

        ruleWeight = PCF_IV;
        if (ruleWeight_aux.compareToIgnoreCase("Certainty_Factor") == 0) {
            ruleWeight = CF;
        } else if (ruleWeight_aux.compareToIgnoreCase("Average_Penalized_Certainty_Factor") == 0) {
            ruleWeight = PCF_II;
        } else if (ruleWeight_aux.compareToIgnoreCase("No_Weights") == 0) {
            ruleWeight = NO_RW;
        }

        inferenceType = WINNING_RULE;
        if (inferenceType_aux.compareToIgnoreCase("Additive_Combination") == 0) {
            inferenceType = ADDITIVE_COMBINATION;
        }

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    buildModel();

    return 0;
}

From source file:org.apache.mahout.classifier.chi_rwcs.mapreduce.TestModel.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    // TODO Auto-generated method stub
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option inputOpt = DefaultOptionCreator.inputOption().create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Model").create();

    Option outputOpt = DefaultOptionCreator.outputOption().create();

    Option helpOpt = DefaultOptionCreator.helpOption();

    Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt)
            .withOption(outputOpt).withOption(helpOpt).create();

    try {/*from w  w w . j a  va2 s .c  o m*/
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(inputOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null;

        if (log.isDebugEnabled()) {
            log.debug("inout     : {}", dataName);
            log.debug("dataset   : {}", datasetName);
            log.debug("model     : {}", modelName);
            log.debug("output    : {}", outputName);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        modelPath = new Path(modelName);
        if (outputName != null) {
            outputPath = new Path(outputName);
        }

    } catch (OptionException e) {

        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
        return -1;

    }

    time = System.currentTimeMillis();

    testModel();

    time = System.currentTimeMillis() - time;

    writeToFileClassifyTime(Chi_RWCSUtils.elapsedTime(time));

    return 0;
}