Example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder

Introduction

In this page you can find the example usage for org.apache.commons.cli2.builder DefaultOptionBuilder DefaultOptionBuilder.

Prototype

public DefaultOptionBuilder()

Source Link

Document

Creates a new DefaultOptionBuilder using defaults

Usage

From source file:org.apache.mahout.classifier.Classify.java

public static void main(String[] args) throws Exception {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option pathOpt = obuilder.withLongName("path").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("The local file system path").withShortName("m").create();

    Option classifyOpt = obuilder.withLongName("classify").withRequired(true)
            .withArgument(abuilder.withName("classify").withMinimum(1).withMaximum(1).create())
            .withDescription("The doc to classify").withShortName("").create();

    Option encodingOpt = obuilder.withLongName("encoding").withRequired(true)
            .withArgument(abuilder.withName("encoding").withMinimum(1).withMaximum(1).create())
            .withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();

    Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(true)
            .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create())
            .withDescription("The Analyzer to use").withShortName("a").create();

    Option defaultCatOpt = obuilder.withLongName("defaultCat").withRequired(true)
            .withArgument(abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create())
            .withDescription("The default category").withShortName("d").create();

    Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true)
            .withArgument(abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create())
            .withDescription("Size of the n-gram").withShortName("ng").create();

    Option typeOpt = obuilder.withLongName("classifierType").withRequired(true)
            .withArgument(abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create())
            .withDescription("Type of classifier").withShortName("type").create();

    Option dataSourceOpt = obuilder.withLongName("dataSource").withRequired(true)
            .withArgument(abuilder.withName("dataSource").withMinimum(1).withMaximum(1).create())
            .withDescription("Location of model: hdfs").withShortName("source").create();

    Group options = gbuilder.withName("Options").withOption(pathOpt).withOption(classifyOpt)
            .withOption(encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt)
            .withOption(typeOpt).withOption(dataSourceOpt).create();

    Parser parser = new Parser();
    parser.setGroup(options);//from  ww  w  .  j  ava2s.c  o  m
    CommandLine cmdLine = parser.parse(args);

    int gramSize = 1;
    if (cmdLine.hasOption(gramSizeOpt)) {
        gramSize = Integer.parseInt((String) cmdLine.getValue(gramSizeOpt));

    }

    BayesParameters params = new BayesParameters();
    params.setGramSize(gramSize);
    String modelBasePath = (String) cmdLine.getValue(pathOpt);
    params.setBasePath(modelBasePath);

    log.info("Loading model from: {}", params.print());

    Algorithm algorithm;
    Datastore datastore;

    String classifierType = (String) cmdLine.getValue(typeOpt);

    String dataSource = (String) cmdLine.getValue(dataSourceOpt);
    if ("hdfs".equals(dataSource)) {
        if ("bayes".equalsIgnoreCase(classifierType)) {
            log.info("Using Bayes Classifier");
            algorithm = new BayesAlgorithm();
            datastore = new InMemoryBayesDatastore(params);
        } else if ("cbayes".equalsIgnoreCase(classifierType)) {
            log.info("Using Complementary Bayes Classifier");
            algorithm = new CBayesAlgorithm();
            datastore = new InMemoryBayesDatastore(params);
        } else {
            throw new IllegalArgumentException("Unrecognized classifier type: " + classifierType);
        }

    } else {
        throw new IllegalArgumentException("Unrecognized dataSource type: " + dataSource);
    }
    ClassifierContext classifier = new ClassifierContext(algorithm, datastore);
    classifier.initialize();
    String defaultCat = "unknown";
    if (cmdLine.hasOption(defaultCatOpt)) {
        defaultCat = (String) cmdLine.getValue(defaultCatOpt);
    }
    File docPath = new File((String) cmdLine.getValue(classifyOpt));
    String encoding = "UTF-8";
    if (cmdLine.hasOption(encodingOpt)) {
        encoding = (String) cmdLine.getValue(encodingOpt);
    }
    Analyzer analyzer = null;
    if (cmdLine.hasOption(analyzerOpt)) {
        analyzer = ClassUtils.instantiateAs((String) cmdLine.getValue(analyzerOpt), Analyzer.class);
    }
    if (analyzer == null) {
        analyzer = new StandardAnalyzer(Version.LUCENE_31);
    }

    log.info("Converting input document to proper format");

    String[] document = BayesFileFormatter.readerToDocument(analyzer,
            Files.newReader(docPath, Charset.forName(encoding)));
    StringBuilder line = new StringBuilder();
    for (String token : document) {
        line.append(token).append(' ');
    }

    List<String> doc = new NGrams(line.toString(), gramSize).generateNGramsWithoutLabel();

    log.info("Done converting");
    log.info("Classifying document: {}", docPath);
    ClassifierResult category = classifier.classifyDocument(doc.toArray(new String[doc.size()]), defaultCat);
    log.info("Category for {} is {}", docPath, category);

}

From source file:org.apache.mahout.classifier.df.BreimanExample.java

@Override
public int run(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true)
            .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of trees to grow, each iteration").create();

    Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true)
            .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of times to repeat the test").create();

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();//  w  w  w. j ava2 s .c  om

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(nbItersOpt)
            .withOption(nbtreesOpt).withOption(helpOpt).create();

    Path dataPath;
    Path datasetPath;
    int nbTrees;
    int nbIterations;

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        String dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString());
        nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString());

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
    } catch (OptionException e) {
        log.error("Error while parsing options", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    // load the data
    FileSystem fs = dataPath.getFileSystem(new Configuration());
    Dataset dataset = Dataset.load(getConf(), datasetPath);
    Data data = DataLoader.loadData(dataset, fs, dataPath);

    // take m to be the first integer less than log2(M) + 1, where M is the
    // number of inputs
    int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1);

    Random rng = RandomUtils.getRandom();
    for (int iteration = 0; iteration < nbIterations; iteration++) {
        log.info("Iteration {}", iteration);
        runIteration(rng, data, m, nbTrees);
    }

    log.info("********************************************");
    log.info("Random Input Test Error : {}", sumTestErrM / nbIterations);
    log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations);
    log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations));
    log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations));
    log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations);
    log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations);

    return 0;
}

From source file:org.apache.mahout.classifier.df.mapreduce.Resampling.java

public int run(String[] args) throws Exception, ClassNotFoundException, InterruptedException {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option dataPreprocessingOpt = obuilder.withLongName("dataPreprocessing").withShortName("dp")
            .withRequired(true).withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data Preprocessing path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option timeOpt = obuilder.withLongName("time").withShortName("tm").withRequired(false)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Time path").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/* w w w . jav a2  s  .  c o m*/

    Option resamplingOpt = obuilder.withLongName("resampling").withShortName("rs").withRequired(true)
            .withArgument(abuilder.withName("resampling").withMinimum(1).withMaximum(1).create())
            .withDescription(
                    "The resampling technique (oversampling (overs), undersampling (unders) or SMOTE (smote))")
            .create();

    Option nbpartitionsOpt = obuilder.withLongName("nbpartitions").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("nbpartitions").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of partitions").create();

    Option nposOpt = obuilder.withLongName("npos").withShortName("npos").withRequired(true)
            .withArgument(abuilder.withName("npos").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the positive class").create();

    Option nnegOpt = obuilder.withLongName("nneg").withShortName("nneg").withRequired(true)
            .withArgument(abuilder.withName("nneg").withMinimum(1).withMaximum(1).create())
            .withDescription("Number of instances of the negative class").create();

    Option negclassOpt = obuilder.withLongName("negclass").withShortName("negclass").withRequired(true)
            .withArgument(abuilder.withName("negclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the negative class").create();

    Option posclassOpt = obuilder.withLongName("posclass").withShortName("posclass").withRequired(true)
            .withArgument(abuilder.withName("posclass").withMinimum(1).withMaximum(1).create())
            .withDescription("Name of the positive class").create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(timeOpt)
            .withOption(helpOpt).withOption(resamplingOpt).withOption(dataPreprocessingOpt)
            .withOption(nbpartitionsOpt).withOption(nposOpt).withOption(nnegOpt).withOption(negclassOpt)
            .withOption(posclassOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return -1;
        }

        dataName = cmdLine.getValue(dataOpt).toString();
        String datasetName = cmdLine.getValue(datasetOpt).toString();
        dataPreprocessing = cmdLine.getValue(dataPreprocessingOpt).toString();
        String resampling = cmdLine.getValue(resamplingOpt).toString();
        partitions = Integer.parseInt(cmdLine.getValue(nbpartitionsOpt).toString());
        npos = Integer.parseInt(cmdLine.getValue(nposOpt).toString());
        nneg = Integer.parseInt(cmdLine.getValue(nnegOpt).toString());
        negclass = cmdLine.getValue(negclassOpt).toString();
        posclass = cmdLine.getValue(posclassOpt).toString();

        if (resampling.equalsIgnoreCase("overs")) {
            withOversampling = true;
        } else if (resampling.equalsIgnoreCase("unders")) {
            withUndersampling = true;
        } else if (resampling.equalsIgnoreCase("smote")) {
            withSmote = true;
        }

        if (cmdLine.hasOption(timeOpt)) {
            preprocessingTimeIsStored = true;
            timeName = cmdLine.getValue(timeOpt).toString();
        }

        if (log.isDebugEnabled()) {
            log.debug("data : {}", dataName);
            log.debug("dataset : {}", datasetName);
            log.debug("time : {}", timeName);
            log.debug("Oversampling : {}", withOversampling);
            log.debug("Undersampling : {}", withUndersampling);
            log.debug("SMOTE : {}", withSmote);
        }

        dataPath = new Path(dataName);
        datasetPath = new Path(datasetName);
        dataPreprocessingPath = new Path(dataPreprocessing);
        if (preprocessingTimeIsStored)
            timePath = new Path(timeName);

    } catch (OptionException e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
        return -1;
    }

    if (withOversampling) {
        overSampling();
    } else if (withUndersampling) {
        underSampling();
    } else if (withSmote) {
        smote();
    }

    return 0;
}

From source file:org.apache.mahout.classifier.df.tools.ForestVisualizer.java

public static void main(String[] args) {
    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create())
            .withDescription("Dataset path").create();

    Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to the Decision Forest").create();

    Option attrNamesOpt = obuilder.withLongName("names").withShortName("n").withRequired(false)
            .withArgument(abuilder.withName("names").withMinimum(1).create())
            .withDescription("Optional, Attribute names").create();

    Option helpOpt = obuilder.withLongName("help").withShortName("h").withDescription("Print out help")
            .create();/* w  ww . j  av  a2 s .  c o m*/

    Group group = gbuilder.withName("Options").withOption(datasetOpt).withOption(modelOpt)
            .withOption(attrNamesOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption("help")) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String datasetName = cmdLine.getValue(datasetOpt).toString();
        String modelName = cmdLine.getValue(modelOpt).toString();
        String[] attrNames = null;
        if (cmdLine.hasOption(attrNamesOpt)) {
            Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt);
            if (!names.isEmpty()) {
                attrNames = new String[names.size()];
                names.toArray(attrNames);
            }
        }

        print(modelName, datasetName, attrNames);
    } catch (Exception e) {
        log.error("Exception", e);
        CommandLineUtil.printHelp(group);
    }
}

From source file:org.apache.mahout.classifier.df.tools.Frequencies.java

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("path").withMinimum(1).create()).withDescription("dataset path")
            .create();/*from  w  w  w .ja  v  a 2 s .  c o  m*/

    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption(helpOpt)
            .create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return 0;
        }

        String dataPath = cmdLine.getValue(dataOpt).toString();
        String datasetPath = cmdLine.getValue(datasetOpt).toString();

        log.debug("Data path : {}", dataPath);
        log.debug("Dataset path : {}", datasetPath);

        runTool(dataPath, datasetPath);
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
    }

    return 0;
}

From source file:org.apache.mahout.classifier.df.tools.UDistrib.java

/**
 * Launch the uniform distribution tool. Requires the following command line arguments:<br>
 * //from   www .  j a va  2s  . com
 * data : data path dataset : dataset path numpartitions : num partitions output : output path
 *
 * @throws java.io.IOException
 */
public static void main(String[] args) throws IOException {

    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
    ArgumentBuilder abuilder = new ArgumentBuilder();
    GroupBuilder gbuilder = new GroupBuilder();

    Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true)
            .withArgument(abuilder.withName("data").withMinimum(1).withMaximum(1).create())
            .withDescription("Data path").create();

    Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true)
            .withArgument(abuilder.withName("dataset").withMinimum(1).create()).withDescription("Dataset path")
            .create();

    Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(abuilder.withName("output").withMinimum(1).withMaximum(1).create())
            .withDescription("Path to generated files").create();

    Option partitionsOpt = obuilder.withLongName("numpartitions").withShortName("p").withRequired(true)
            .withArgument(abuilder.withName("numparts").withMinimum(1).withMinimum(1).create())
            .withDescription("Number of partitions to create").create();
    Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h")
            .create();

    Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(outputOpt).withOption(datasetOpt)
            .withOption(partitionsOpt).withOption(helpOpt).create();

    try {
        Parser parser = new Parser();
        parser.setGroup(group);
        CommandLine cmdLine = parser.parse(args);

        if (cmdLine.hasOption(helpOpt)) {
            CommandLineUtil.printHelp(group);
            return;
        }

        String data = cmdLine.getValue(dataOpt).toString();
        String dataset = cmdLine.getValue(datasetOpt).toString();
        int numPartitions = Integer.parseInt(cmdLine.getValue(partitionsOpt).toString());
        String output = cmdLine.getValue(outputOpt).toString();

        runTool(data, dataset, output, numPartitions);
    } catch (OptionException e) {
        log.warn(e.toString(), e);
        CommandLineUtil.printHelp(group);
    }

}

From source file:org.apache.mahout.classifier.mlp.RunMultilayerPerceptron.java

/**
 * Parse the arguments./*from w w  w  . j  ava2  s. c o m*/
 *
 * @param args The input arguments.
 * @param parameters  The parameters need to be filled.
 * @return true or false
 * @throws Exception
 */
private static boolean parseArgs(String[] args, Parameters parameters) throws Exception {
    // build the options
    log.info("Validate and parse arguments...");
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    GroupBuilder groupBuilder = new GroupBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    Option inputFileFormatOption = optionBuilder
            .withLongName("format").withShortName("f").withArgument(argumentBuilder.withName("file type")
                    .withDefault("csv").withMinimum(1).withMaximum(1).create())
            .withDescription("type of input file, currently support 'csv'").create();

    List<Integer> columnRangeDefault = Lists.newArrayList();
    columnRangeDefault.add(0);
    columnRangeDefault.add(Integer.MAX_VALUE);

    Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").withRequired(false)
            .withDescription("whether to skip the first row of the input file").create();

    Option inputColumnRangeOption = optionBuilder.withLongName("columnRange").withShortName("cr")
            .withDescription("the column range of the input file, start from 0").withArgument(argumentBuilder
                    .withName("range").withMinimum(2).withMaximum(2).withDefaults(columnRangeDefault).create())
            .create();

    Group inputFileTypeGroup = groupBuilder.withOption(skipHeaderOption).withOption(inputColumnRangeOption)
            .withOption(inputFileFormatOption).create();

    Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true)
            .withArgument(argumentBuilder.withName("file path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of unlabelled dataset").withChildren(inputFileTypeGroup).create();

    Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true)
            .withArgument(argumentBuilder.withName("model file").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of the model").create();

    Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels")
            .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create())
            .withDescription("an ordered list of label names").create();

    Group labelsGroup = groupBuilder.withOption(labelsOption).create();

    Option outputOption = optionBuilder.withLongName("output").withShortName("o").withRequired(true)
            .withArgument(
                    argumentBuilder.withConsumeRemaining("file path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of labelled results").withChildren(labelsGroup).create();

    // parse the input
    Parser parser = new Parser();
    Group normalOption = groupBuilder.withOption(inputOption).withOption(modelOption).withOption(outputOption)
            .create();
    parser.setGroup(normalOption);
    CommandLine commandLine = parser.parseAndHelp(args);
    if (commandLine == null) {
        return false;
    }

    // obtain the arguments
    parameters.inputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, inputOption);
    parameters.inputFileFormat = TrainMultilayerPerceptron.getString(commandLine, inputFileFormatOption);
    parameters.skipHeader = commandLine.hasOption(skipHeaderOption);
    parameters.modelFilePathStr = TrainMultilayerPerceptron.getString(commandLine, modelOption);
    parameters.outputFilePathStr = TrainMultilayerPerceptron.getString(commandLine, outputOption);

    List<?> columnRange = commandLine.getValues(inputColumnRangeOption);
    parameters.columnStart = Integer.parseInt(columnRange.get(0).toString());
    parameters.columnEnd = Integer.parseInt(columnRange.get(1).toString());

    return true;
}

From source file:org.apache.mahout.classifier.mlp.TrainMultilayerPerceptron.java

/**
 * Parse the input arguments./*w  w w .  j  a  va  2s .  c o m*/
 * 
 * @param args The input arguments
 * @param parameters The parameters parsed.
 * @return Whether the input arguments are valid.
 * @throws Exception
 */
private static boolean parseArgs(String[] args, Parameters parameters) throws Exception {
    // build the options
    log.info("Validate and parse arguments...");
    DefaultOptionBuilder optionBuilder = new DefaultOptionBuilder();
    GroupBuilder groupBuilder = new GroupBuilder();
    ArgumentBuilder argumentBuilder = new ArgumentBuilder();

    // whether skip the first row of the input file
    Option skipHeaderOption = optionBuilder.withLongName("skipHeader").withShortName("sh").create();

    Group skipHeaderGroup = groupBuilder.withOption(skipHeaderOption).create();

    Option inputOption = optionBuilder.withLongName("input").withShortName("i").withRequired(true)
            .withChildren(skipHeaderGroup)
            .withArgument(argumentBuilder.withName("path").withMinimum(1).withMaximum(1).create())
            .withDescription("the file path of training dataset").create();

    Option labelsOption = optionBuilder.withLongName("labels").withShortName("labels").withRequired(true)
            .withArgument(argumentBuilder.withName("label-name").withMinimum(2).create())
            .withDescription("label names").create();

    Option updateOption = optionBuilder.withLongName("update").withShortName("u")
            .withDescription("whether to incrementally update model if the model exists").create();

    Group modelUpdateGroup = groupBuilder.withOption(updateOption).create();

    Option modelOption = optionBuilder.withLongName("model").withShortName("mo").withRequired(true)
            .withArgument(argumentBuilder.withName("model-path").withMinimum(1).withMaximum(1).create())
            .withDescription("the path to store the trained model").withChildren(modelUpdateGroup).create();

    Option layerSizeOption = optionBuilder.withLongName("layerSize").withShortName("ls").withRequired(true)
            .withArgument(argumentBuilder.withName("size of layer").withMinimum(2).withMaximum(5).create())
            .withDescription("the size of each layer").create();

    Option squashingFunctionOption = optionBuilder.withLongName("squashingFunction").withShortName("sf")
            .withArgument(argumentBuilder.withName("squashing function").withMinimum(1).withMaximum(1)
                    .withDefault("Sigmoid").create())
            .withDescription("the name of squashing function (currently only supports Sigmoid)").create();

    Option learningRateOption = optionBuilder.withLongName("learningRate").withShortName("l")
            .withArgument(argumentBuilder.withName("learning rate").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_LEARNING_RATE).create())
            .withDescription("learning rate").create();

    Option momemtumOption = optionBuilder.withLongName("momemtumWeight").withShortName("m")
            .withArgument(argumentBuilder.withName("momemtum weight").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_MOMENTUM_WEIGHT).create())
            .withDescription("momemtum weight").create();

    Option regularizationOption = optionBuilder.withLongName("regularizationWeight").withShortName("r")
            .withArgument(argumentBuilder.withName("regularization weight").withMaximum(1).withMinimum(1)
                    .withDefault(NeuralNetwork.DEFAULT_REGULARIZATION_WEIGHT).create())
            .withDescription("regularization weight").create();

    // parse the input
    Parser parser = new Parser();
    Group normalOptions = groupBuilder.withOption(inputOption).withOption(skipHeaderOption)
            .withOption(updateOption).withOption(labelsOption).withOption(modelOption)
            .withOption(layerSizeOption).withOption(squashingFunctionOption).withOption(learningRateOption)
            .withOption(momemtumOption).withOption(regularizationOption).create();

    parser.setGroup(normalOptions);

    CommandLine commandLine = parser.parseAndHelp(args);
    if (commandLine == null) {
        return false;
    }

    parameters.learningRate = getDouble(commandLine, learningRateOption);
    parameters.momemtumWeight = getDouble(commandLine, momemtumOption);
    parameters.regularizationWeight = getDouble(commandLine, regularizationOption);

    parameters.inputFilePath = getString(commandLine, inputOption);
    parameters.skipHeader = commandLine.hasOption(skipHeaderOption);

    List<String> labelsList = getStringList(commandLine, labelsOption);
    int currentIndex = 0;
    for (String label : labelsList) {
        parameters.labelsIndex.put(label, currentIndex++);
    }

    parameters.modelFilePath = getString(commandLine, modelOption);
    parameters.updateModel = commandLine.hasOption(updateOption);

    parameters.layerSizeList = getIntegerList(commandLine, layerSizeOption);

    parameters.squashingFunctionName = getString(commandLine, squashingFunctionOption);

    System.out.printf(
            "Input: %s, Model: %s, Update: %s, Layer size: %s, Squashing function: %s, Learning rate: %f,"
                    + " Momemtum weight: %f, Regularization Weight: %f\n",
            parameters.inputFilePath, parameters.modelFilePath, parameters.updateModel,
            Arrays.toString(parameters.layerSizeList.toArray()), parameters.squashingFunctionName,
            parameters.learningRate, parameters.momemtumWeight, parameters.regularizationWeight);

    return true;
}

From source file:org.apache.mahout.classifier.rbm.test.TestRBMClassifierJob.java

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from  ww w .  j  a  va 2 s  .  c  o  m
    addOption("model", "m", "The path to the model built during training", true);
    addOption("labelcount", "lc", "total count of labels existent in the training set", true);
    addOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION, "max",
            "least number of stable iterations in classification layer when classifying", "10");
    addOption(new DefaultOptionBuilder().withLongName(DefaultOptionCreator.MAPREDUCE_METHOD).withRequired(false)
            .withDescription("Run tests with map/reduce").withShortName("mr").create());

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    int labelcount = Integer.parseInt(getOption("labelcount"));
    iterations = Integer.parseInt(getOption("maxIter"));

    //check models existence
    Path model = new Path(parsedArgs.get("--model"));
    if (!model.getFileSystem(getConf()).exists(model)) {
        log.error("Model file does not exist!");
        return -1;
    }

    //create the list of all labels
    List<String> lables = new ArrayList<String>();
    for (int i = 0; i < labelcount; i++)
        lables.add(String.valueOf(i));

    FileSystem fs = getInputPath().getFileSystem(getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(lables, "-1");
    //initiate the paths to the test batches
    Path[] batches;
    if (fs.isFile(getInputPath()))
        batches = new Path[] { getInputPath() };
    else {
        FileStatus[] stati = fs.listStatus(getInputPath());
        batches = new Path[stati.length];
        for (int i = 0; i < stati.length; i++) {
            batches[i] = stati[i].getPath();
        }
    }

    if (hasOption("mapreduce"))
        HadoopUtil.delete(getConf(), getTempPath("testresults"));

    for (Path input : batches) {
        if (hasOption("mapreduce")) {
            HadoopUtil.cacheFiles(model, getConf());
            //the output key is the expected value, the output value are the scores for all the labels
            Job testJob = prepareJob(input, getTempPath("testresults"), SequenceFileInputFormat.class,
                    TestRBMClassifierMapper.class, IntWritable.class, VectorWritable.class,
                    SequenceFileOutputFormat.class);
            testJob.getConfiguration().set("maxIter", String.valueOf(iterations));
            testJob.waitForCompletion(true);

            //loop over the results and create the confusion matrix
            SequenceFileDirIterable<IntWritable, VectorWritable> dirIterable = new SequenceFileDirIterable<IntWritable, VectorWritable>(
                    getTempPath("testresults"), PathType.LIST, PathFilters.partFilter(), getConf());

            analyzeResults(dirIterable, analyzer);

        } else {
            //test job locally
            runTestsLocally(model, analyzer, input);
        }
    }

    //output the result of the tests
    log.info("RBMClassifier Results: {}", analyzer);

    //stop all running threads
    if (executor != null)
        executor.shutdownNow();
    return 0;
}

From source file:org.apache.mahout.classifier.rbm.training.RBMClassifierTrainingJob.java

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from ww  w  .j  av  a2  s. co  m
    addOutputOption();
    addOption("epochs", "e", "number of training epochs through the trainingset", true);
    addOption("structure", "s", "comma-separated list of layer sizes", false);
    addOption("labelcount", "lc", "total count of labels existent in the training set", true);
    addOption("learningrate", "lr", "learning rate at the beginning of training", "0.005");
    addOption("momentum", "m", "momentum of learning at the beginning", "0.5");
    addOption("rbmnr", "nr", "rbm to train, < 0 means train all", "-1");
    addOption("nrgibbs", "gn", "number of gibbs sampling used in contrastive divergence", "5");
    addOption(new DefaultOptionBuilder().withLongName(DefaultOptionCreator.MAPREDUCE_METHOD).withRequired(false)
            .withDescription("Run training with map/reduce").withShortName("mr").create());
    addOption(new DefaultOptionBuilder().withLongName("nogreedy").withRequired(false)
            .withDescription("Don't run greedy pre training").withShortName("ng").create());
    addOption(new DefaultOptionBuilder().withLongName("nofinetuning").withRequired(false)
            .withDescription("Don't run fine tuning at the end").withShortName("nf").create());
    addOption(new DefaultOptionBuilder().withLongName("nobiases").withRequired(false)
            .withDescription("Don't initialize biases").withShortName("nb").create());
    addOption(new DefaultOptionBuilder().withLongName("monitor").withRequired(false)
            .withDescription("If present, errors can be monitored in cosole").withShortName("mon").create());
    addOption(DefaultOptionCreator.overwriteOption().create());

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path input = getInputPath();
    Path output = getOutputPath();
    FileSystem fs = FileSystem.get(output.toUri(), getConf());
    labelcount = Integer.parseInt(getOption("labelcount"));

    boolean local = !hasOption("mapreduce");
    monitor = hasOption("monitor");
    initbiases = !hasOption("nobiases");
    finetuning = !hasOption("nofinetuning");
    greedy = !hasOption("nogreedy");

    if (fs.isFile(input))
        batches = new Path[] { input };
    else {
        FileStatus[] stati = fs.listStatus(input);
        batches = new Path[stati.length];
        for (int i = 0; i < stati.length; i++) {
            batches[i] = stati[i].getPath();
        }
    }

    epochs = Integer.valueOf(getOption("epochs"));
    learningrate = Double.parseDouble(getOption("learningrate"));
    momentum = Double.parseDouble(getOption("momentum"));
    rbmNrtoTrain = Integer.parseInt(getOption("rbmnr"));
    nrGibbsSampling = Integer.parseInt(getOption("nrgibbs"));

    boolean initialize = hasOption(DefaultOptionCreator.OVERWRITE_OPTION) || !fs.exists(output)
            || fs.listStatus(output).length <= 0;

    if (initialize) {
        String structure = getOption("structure");
        if (structure == null || structure.isEmpty())
            return -1;

        String[] layers = structure.split(",");
        if (layers.length < 2) {
            return -1;
        }

        int[] actualLayerSizes = new int[layers.length];
        for (int i = 0; i < layers.length; i++) {
            actualLayerSizes[i] = Integer.parseInt(layers[i]);
        }

        rbmCl = new RBMClassifier(labelcount, actualLayerSizes);
        logger.info("New model initialized!");
    } else {
        rbmCl = RBMClassifier.materialize(output, getConf());
        logger.info("Model found and materialized!");
    }

    HadoopUtil.setSerializations(getConf());
    lastUpdate = new Matrix[rbmCl.getDbm().getRbmCount()];

    if (initbiases) {
        //init biases!
        Vector biases = null;
        int counter = 0;
        for (Path batch : batches) {
            for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(
                    batch, getConf())) {
                if (biases == null)
                    biases = record.getSecond().get().clone();
                else
                    biases.plus(record.getSecond().get());
                counter++;
            }
        }
        if (biases == null) {
            logger.info("No training data found!");
            return -1;
        }

        rbmCl.getDbm().getLayer(0).setBiases(biases.divide(counter));
        logger.info("Biases initialized");
    }

    //greedy pre training with gradually decreasing learningrates
    if (greedy) {
        if (!local)
            rbmCl.serialize(output, getConf());

        double tempLearningrate = learningrate;
        if (rbmNrtoTrain < 0)
            //train all rbms
            for (int rbmNr = 0; rbmNr < rbmCl.getDbm().getRbmCount(); rbmNr++) {
                tempLearningrate = learningrate;

                //double weights if dbm was materialized, because it was halved after greedy pretraining
                if (!initialize && rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                    ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNr)).setWeightMatrix(
                            ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNr)).getWeightMatrix().times(2));
                }

                for (int j = 0; j < epochs; j++) {
                    logger.info("Greedy training, epoch " + (j + 1) + "\nCurrent learningrate: "
                            + tempLearningrate);
                    for (int b = 0; b < batches.length; b++) {
                        tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                        if (local) {
                            if (!trainGreedySeq(rbmNr, batches[b], j, tempLearningrate))
                                return -1;
                        } else if (!trainGreedyMR(rbmNr, batches[b], j, tempLearningrate))
                            return -1;
                        if (monitor && (batches.length > 19) && (b + 1) % (batches.length / 20) == 0)
                            logger.info(rbmNr + "-RBM: " + Math.round(((double) b + 1) / batches.length * 100.0)
                                    + "% in epoch done!");
                    }
                    logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training on rbm number "
                            + rbmNr + " is done!");

                    if (monitor) {
                        double error = rbmError(batches[0], rbmNr);
                        logger.info(
                                "Average reconstruction error on batch " + batches[0].getName() + ": " + error);
                    }

                    rbmCl.serialize(output, getConf());
                }

                //weight normalization to avoid double counting
                if (rbmNr > 0 && rbmNr < rbmCl.getDbm().getRbmCount() - 1) {
                    ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                            ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(0.5));
                }
            }
        else {
            //double weights if dbm was materialized, because it was halved after greedy pretraining
            if (!initialize && rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                        ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(2));
            }
            //train just wanted rbm
            for (int j = 0; j < epochs; j++) {
                logger.info(
                        "Greedy training, epoch " + (j + 1) + "\nCurrent learningrate: " + tempLearningrate);
                for (int b = 0; b < batches.length; b++) {
                    tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                    if (local) {
                        if (!trainGreedySeq(rbmNrtoTrain, batches[b], j, tempLearningrate))
                            return -1;
                    } else if (!trainGreedyMR(rbmNrtoTrain, batches[b], j, tempLearningrate))
                        return -1;
                    if (monitor && (batches.length > 19) && (b + 1) % (batches.length / 20) == 0)
                        logger.info(rbmNrtoTrain + "-RBM: "
                                + Math.round(((double) b + 1) / batches.length * 100.0) + "% in epoch done!");
                }
                logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training is done!");

                if (monitor) {
                    double error = rbmError(batches[0], rbmNrtoTrain);
                    logger.info("Average reconstruction error on batch " + batches[0].getName() + ": " + error);
                }
            }

            //weight normalization to avoid double counting
            if (rbmNrtoTrain > 0 && rbmNrtoTrain < rbmCl.getDbm().getRbmCount() - 1) {
                ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).setWeightMatrix(
                        ((SimpleRBM) rbmCl.getDbm().getRBM(rbmNrtoTrain)).getWeightMatrix().times(0.5));
            }
        }

        rbmCl.serialize(output, getConf());
        logger.info("Pretraining done and model written to output");
    }

    if (finetuning) {
        DeepBoltzmannMachine multiLayerDbm = null;

        double tempLearningrate = learningrate;
        //finetuning job
        for (int j = 0; j < epochs; j++) {
            for (int b = 0; b < batches.length; b++) {
                multiLayerDbm = rbmCl.initializeMultiLayerNN();
                logger.info("Finetuning on batch " + batches[b].getName() + "\nCurrent learningrate: "
                        + tempLearningrate);
                tempLearningrate -= learningrate / (epochs * batches.length + epochs);
                if (local) {
                    if (!finetuneSeq(batches[b], j, multiLayerDbm, tempLearningrate))
                        return -1;
                } else if (!fintuneMR(batches[b], j, tempLearningrate))
                    return -1;
                logger.info("Finetuning: " + Math.round(((double) b + 1) / batches.length * 100.0)
                        + "% in epoch done!");
            }
            logger.info(Math.round(((double) j + 1) / epochs * 100) + "% of training is done!");

            if (monitor) {
                double error = feedForwardError(multiLayerDbm, batches[0]);
                logger.info("Average discriminative error on batch " + batches[0].getName() + ": " + error);
            }
        }
        //final serialization
        rbmCl.serialize(output, getConf());
        logger.info("RBM finetuning done and model written to output");
    }

    if (executor != null)
        executor.shutdownNow();

    return 0;
}